forked from eden-emu/eden
		
	Merge pull request #3282 from FernandoS27/indexed-samplers
Partially implement Indexed samplers in general and specific code in GLSL
This commit is contained in:
		
						commit
						b5bbe7e752
					
				
					 24 changed files with 610 additions and 58 deletions
				
			
		|  | @ -29,6 +29,8 @@ add_library(video_core STATIC | ||||||
|     gpu_synch.h |     gpu_synch.h | ||||||
|     gpu_thread.cpp |     gpu_thread.cpp | ||||||
|     gpu_thread.h |     gpu_thread.h | ||||||
|  |     guest_driver.cpp | ||||||
|  |     guest_driver.h | ||||||
|     macro_interpreter.cpp |     macro_interpreter.cpp | ||||||
|     macro_interpreter.h |     macro_interpreter.h | ||||||
|     memory_manager.cpp |     memory_manager.cpp | ||||||
|  |  | ||||||
|  | @ -9,6 +9,7 @@ | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "video_core/engines/shader_bytecode.h" | #include "video_core/engines/shader_bytecode.h" | ||||||
| #include "video_core/engines/shader_type.h" | #include "video_core/engines/shader_type.h" | ||||||
|  | #include "video_core/guest_driver.h" | ||||||
| #include "video_core/textures/texture.h" | #include "video_core/textures/texture.h" | ||||||
| 
 | 
 | ||||||
| namespace Tegra::Engines { | namespace Tegra::Engines { | ||||||
|  | @ -106,6 +107,9 @@ public: | ||||||
|     virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, |     virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, | ||||||
|                                                     u64 offset) const = 0; |                                                     u64 offset) const = 0; | ||||||
|     virtual u32 GetBoundBuffer() const = 0; |     virtual u32 GetBoundBuffer() const = 0; | ||||||
|  | 
 | ||||||
|  |     virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0; | ||||||
|  |     virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| } // namespace Tegra::Engines
 | } // namespace Tegra::Engines
 | ||||||
|  |  | ||||||
|  | @ -94,6 +94,14 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con | ||||||
|     return result; |     return result; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() { | ||||||
|  |     return rasterizer.AccessGuestDriverProfile(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const { | ||||||
|  |     return rasterizer.AccessGuestDriverProfile(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void KeplerCompute::ProcessLaunch() { | void KeplerCompute::ProcessLaunch() { | ||||||
|     const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); |     const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); | ||||||
|     memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, |     memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, | ||||||
|  |  | ||||||
|  | @ -218,6 +218,10 @@ public: | ||||||
|         return regs.tex_cb_index; |         return regs.tex_cb_index; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; | ||||||
|  | 
 | ||||||
|  |     const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; | ||||||
|  | 
 | ||||||
| private: | private: | ||||||
|     Core::System& system; |     Core::System& system; | ||||||
|     VideoCore::RasterizerInterface& rasterizer; |     VideoCore::RasterizerInterface& rasterizer; | ||||||
|  |  | ||||||
|  | @ -784,4 +784,12 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b | ||||||
|     return result; |     return result; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() { | ||||||
|  |     return rasterizer.AccessGuestDriverProfile(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const { | ||||||
|  |     return rasterizer.AccessGuestDriverProfile(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| } // namespace Tegra::Engines
 | } // namespace Tegra::Engines
 | ||||||
|  |  | ||||||
|  | @ -1306,6 +1306,10 @@ public: | ||||||
|         return regs.tex_cb_index; |         return regs.tex_cb_index; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; | ||||||
|  | 
 | ||||||
|  |     const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; | ||||||
|  | 
 | ||||||
|     /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
 |     /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
 | ||||||
|     /// we've seen used.
 |     /// we've seen used.
 | ||||||
|     using MacroMemory = std::array<u32, 0x40000>; |     using MacroMemory = std::array<u32, 0x40000>; | ||||||
|  |  | ||||||
							
								
								
									
										36
									
								
								src/video_core/guest_driver.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								src/video_core/guest_driver.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,36 @@ | ||||||
|  | // Copyright 2020 yuzu Emulator Project
 | ||||||
|  | // Licensed under GPLv2 or any later version
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #include <algorithm> | ||||||
|  | #include <limits> | ||||||
|  | 
 | ||||||
|  | #include "video_core/guest_driver.h" | ||||||
|  | 
 | ||||||
|  | namespace VideoCore { | ||||||
|  | 
 | ||||||
|  | void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets) { | ||||||
|  |     if (texture_handler_size_deduced) { | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |     const std::size_t size = bound_offsets.size(); | ||||||
|  |     if (size < 2) { | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |     std::sort(bound_offsets.begin(), bound_offsets.end(), std::less{}); | ||||||
|  |     u32 min_val = std::numeric_limits<u32>::max(); | ||||||
|  |     for (std::size_t i = 1; i < size; ++i) { | ||||||
|  |         if (bound_offsets[i] == bound_offsets[i - 1]) { | ||||||
|  |             continue; | ||||||
|  |         } | ||||||
|  |         const u32 new_min = bound_offsets[i] - bound_offsets[i - 1]; | ||||||
|  |         min_val = std::min(min_val, new_min); | ||||||
|  |     } | ||||||
|  |     if (min_val > 2) { | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |     texture_handler_size_deduced = true; | ||||||
|  |     texture_handler_size = min_texture_handler_size * min_val; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | } // namespace VideoCore
 | ||||||
							
								
								
									
										41
									
								
								src/video_core/guest_driver.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								src/video_core/guest_driver.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,41 @@ | ||||||
|  | // Copyright 2020 yuzu Emulator Project
 | ||||||
|  | // Licensed under GPLv2 or any later version
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
|  | #include <vector> | ||||||
|  | 
 | ||||||
|  | #include "common/common_types.h" | ||||||
|  | 
 | ||||||
|  | namespace VideoCore { | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect | ||||||
|  |  * information necessary for impossible to avoid HLE methods like shader tracks as they are | ||||||
|  |  * Entscheidungsproblems. | ||||||
|  |  */ | ||||||
|  | class GuestDriverProfile { | ||||||
|  | public: | ||||||
|  |     void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets); | ||||||
|  | 
 | ||||||
|  |     u32 GetTextureHandlerSize() const { | ||||||
|  |         return texture_handler_size; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     bool TextureHandlerSizeKnown() const { | ||||||
|  |         return texture_handler_size_deduced; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     // Minimum size of texture handler any driver can use.
 | ||||||
|  |     static constexpr u32 min_texture_handler_size = 4; | ||||||
|  |     // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily
 | ||||||
|  |     // use 4 bytes instead. Thus, certain drivers may squish the size.
 | ||||||
|  |     static constexpr u32 default_texture_handler_size = 8; | ||||||
|  | 
 | ||||||
|  |     u32 texture_handler_size = default_texture_handler_size; | ||||||
|  |     bool texture_handler_size_deduced = false; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | } // namespace VideoCore
 | ||||||
|  | @ -9,6 +9,7 @@ | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "video_core/engines/fermi_2d.h" | #include "video_core/engines/fermi_2d.h" | ||||||
| #include "video_core/gpu.h" | #include "video_core/gpu.h" | ||||||
|  | #include "video_core/guest_driver.h" | ||||||
| 
 | 
 | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
| class MemoryManager; | class MemoryManager; | ||||||
|  | @ -78,5 +79,18 @@ public: | ||||||
|     /// Initialize disk cached resources for the game being emulated
 |     /// Initialize disk cached resources for the game being emulated
 | ||||||
|     virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false, |     virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false, | ||||||
|                                    const DiskResourceLoadCallback& callback = {}) {} |                                    const DiskResourceLoadCallback& callback = {}) {} | ||||||
|  | 
 | ||||||
|  |     /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
 | ||||||
|  |     GuestDriverProfile& AccessGuestDriverProfile() { | ||||||
|  |         return guest_driver_profile; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
 | ||||||
|  |     const GuestDriverProfile& AccessGuestDriverProfile() const { | ||||||
|  |         return guest_driver_profile; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     GuestDriverProfile guest_driver_profile{}; | ||||||
| }; | }; | ||||||
| } // namespace VideoCore
 | } // namespace VideoCore
 | ||||||
|  |  | ||||||
|  | @ -55,16 +55,20 @@ namespace { | ||||||
| 
 | 
 | ||||||
| template <typename Engine, typename Entry> | template <typename Engine, typename Entry> | ||||||
| Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, | Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, | ||||||
|                                                Tegra::Engines::ShaderType shader_type) { |                                                Tegra::Engines::ShaderType shader_type, | ||||||
|  |                                                std::size_t index = 0) { | ||||||
|     if (entry.IsBindless()) { |     if (entry.IsBindless()) { | ||||||
|         const Tegra::Texture::TextureHandle tex_handle = |         const Tegra::Texture::TextureHandle tex_handle = | ||||||
|             engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset()); |             engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset()); | ||||||
|         return engine.GetTextureInfo(tex_handle); |         return engine.GetTextureInfo(tex_handle); | ||||||
|     } |     } | ||||||
|  |     const auto& gpu_profile = engine.AccessGuestDriverProfile(); | ||||||
|  |     const u32 offset = | ||||||
|  |         entry.GetOffset() + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize()); | ||||||
|     if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { |     if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { | ||||||
|         return engine.GetStageTexture(shader_type, entry.GetOffset()); |         return engine.GetStageTexture(shader_type, offset); | ||||||
|     } else { |     } else { | ||||||
|         return engine.GetTexture(entry.GetOffset()); |         return engine.GetTexture(offset); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -942,8 +946,15 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& | ||||||
|     u32 binding = device.GetBaseBindings(stage_index).sampler; |     u32 binding = device.GetBaseBindings(stage_index).sampler; | ||||||
|     for (const auto& entry : shader->GetShaderEntries().samplers) { |     for (const auto& entry : shader->GetShaderEntries().samplers) { | ||||||
|         const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); |         const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); | ||||||
|         const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); |         if (!entry.IsIndexed()) { | ||||||
|         SetupTexture(binding++, texture, entry); |             const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); | ||||||
|  |             SetupTexture(binding++, texture, entry); | ||||||
|  |         } else { | ||||||
|  |             for (std::size_t i = 0; i < entry.Size(); ++i) { | ||||||
|  |                 const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); | ||||||
|  |                 SetupTexture(binding++, texture, entry); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -952,8 +963,17 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { | ||||||
|     const auto& compute = system.GPU().KeplerCompute(); |     const auto& compute = system.GPU().KeplerCompute(); | ||||||
|     u32 binding = 0; |     u32 binding = 0; | ||||||
|     for (const auto& entry : kernel->GetShaderEntries().samplers) { |     for (const auto& entry : kernel->GetShaderEntries().samplers) { | ||||||
|         const auto texture = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); |         if (!entry.IsIndexed()) { | ||||||
|         SetupTexture(binding++, texture, entry); |             const auto texture = | ||||||
|  |                 GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); | ||||||
|  |             SetupTexture(binding++, texture, entry); | ||||||
|  |         } else { | ||||||
|  |             for (std::size_t i = 0; i < entry.Size(); ++i) { | ||||||
|  |                 const auto texture = | ||||||
|  |                     GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute, i); | ||||||
|  |                 SetupTexture(binding++, texture, entry); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -214,6 +214,7 @@ std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ShaderType s | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) { | void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) { | ||||||
|  |     locker.SetBoundBuffer(usage.bound_buffer); | ||||||
|     for (const auto& key : usage.keys) { |     for (const auto& key : usage.keys) { | ||||||
|         const auto [buffer, offset] = key.first; |         const auto [buffer, offset] = key.first; | ||||||
|         locker.InsertKey(buffer, offset, key.second); |         locker.InsertKey(buffer, offset, key.second); | ||||||
|  | @ -418,7 +419,8 @@ bool CachedShader::EnsureValidLockerVariant() { | ||||||
| 
 | 
 | ||||||
| ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant, | ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant, | ||||||
|                                             const ConstBufferLocker& locker) const { |                                             const ConstBufferLocker& locker) const { | ||||||
|     return ShaderDiskCacheUsage{unique_identifier, variant, locker.GetKeys(), |     return ShaderDiskCacheUsage{unique_identifier,         variant, | ||||||
|  |                                 locker.GetBoundBuffer(),   locker.GetKeys(), | ||||||
|                                 locker.GetBoundSamplers(), locker.GetBindlessSamplers()}; |                                 locker.GetBoundSamplers(), locker.GetBindlessSamplers()}; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -391,6 +391,7 @@ public: | ||||||
|         DeclareVertex(); |         DeclareVertex(); | ||||||
|         DeclareGeometry(); |         DeclareGeometry(); | ||||||
|         DeclareRegisters(); |         DeclareRegisters(); | ||||||
|  |         DeclareCustomVariables(); | ||||||
|         DeclarePredicates(); |         DeclarePredicates(); | ||||||
|         DeclareLocalMemory(); |         DeclareLocalMemory(); | ||||||
|         DeclareInternalFlags(); |         DeclareInternalFlags(); | ||||||
|  | @ -503,6 +504,16 @@ private: | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     void DeclareCustomVariables() { | ||||||
|  |         const u32 num_custom_variables = ir.GetNumCustomVariables(); | ||||||
|  |         for (u32 i = 0; i < num_custom_variables; ++i) { | ||||||
|  |             code.AddLine("float {} = 0.0f;", GetCustomVariable(i)); | ||||||
|  |         } | ||||||
|  |         if (num_custom_variables > 0) { | ||||||
|  |             code.AddNewLine(); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     void DeclarePredicates() { |     void DeclarePredicates() { | ||||||
|         const auto& predicates = ir.GetPredicates(); |         const auto& predicates = ir.GetPredicates(); | ||||||
|         for (const auto pred : predicates) { |         for (const auto pred : predicates) { | ||||||
|  | @ -655,7 +666,8 @@ private: | ||||||
|         u32 binding = device.GetBaseBindings(stage).sampler; |         u32 binding = device.GetBaseBindings(stage).sampler; | ||||||
|         for (const auto& sampler : ir.GetSamplers()) { |         for (const auto& sampler : ir.GetSamplers()) { | ||||||
|             const std::string name = GetSampler(sampler); |             const std::string name = GetSampler(sampler); | ||||||
|             const std::string description = fmt::format("layout (binding = {}) uniform", binding++); |             const std::string description = fmt::format("layout (binding = {}) uniform", binding); | ||||||
|  |             binding += sampler.IsIndexed() ? sampler.Size() : 1; | ||||||
| 
 | 
 | ||||||
|             std::string sampler_type = [&]() { |             std::string sampler_type = [&]() { | ||||||
|                 if (sampler.IsBuffer()) { |                 if (sampler.IsBuffer()) { | ||||||
|  | @ -682,7 +694,11 @@ private: | ||||||
|                 sampler_type += "Shadow"; |                 sampler_type += "Shadow"; | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             code.AddLine("{} {} {};", description, sampler_type, name); |             if (!sampler.IsIndexed()) { | ||||||
|  |                 code.AddLine("{} {} {};", description, sampler_type, name); | ||||||
|  |             } else { | ||||||
|  |                 code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.Size()); | ||||||
|  |             } | ||||||
|         } |         } | ||||||
|         if (!ir.GetSamplers().empty()) { |         if (!ir.GetSamplers().empty()) { | ||||||
|             code.AddNewLine(); |             code.AddNewLine(); | ||||||
|  | @ -775,6 +791,11 @@ private: | ||||||
|             return {GetRegister(index), Type::Float}; |             return {GetRegister(index), Type::Float}; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|  |         if (const auto cv = std::get_if<CustomVarNode>(&*node)) { | ||||||
|  |             const u32 index = cv->GetIndex(); | ||||||
|  |             return {GetCustomVariable(index), Type::Float}; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|         if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { |         if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { | ||||||
|             const u32 value = immediate->GetValue(); |             const u32 value = immediate->GetValue(); | ||||||
|             if (value < 10) { |             if (value < 10) { | ||||||
|  | @ -1098,7 +1119,11 @@ private: | ||||||
|         } else if (!meta->ptp.empty()) { |         } else if (!meta->ptp.empty()) { | ||||||
|             expr += "Offsets"; |             expr += "Offsets"; | ||||||
|         } |         } | ||||||
|         expr += '(' + GetSampler(meta->sampler) + ", "; |         if (!meta->sampler.IsIndexed()) { | ||||||
|  |             expr += '(' + GetSampler(meta->sampler) + ", "; | ||||||
|  |         } else { | ||||||
|  |             expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], "; | ||||||
|  |         } | ||||||
|         expr += coord_constructors.at(count + (has_array ? 1 : 0) + |         expr += coord_constructors.at(count + (has_array ? 1 : 0) + | ||||||
|                                       (has_shadow && !separate_dc ? 1 : 0) - 1); |                                       (has_shadow && !separate_dc ? 1 : 0) - 1); | ||||||
|         expr += '('; |         expr += '('; | ||||||
|  | @ -1310,6 +1335,8 @@ private: | ||||||
|             const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); |             const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); | ||||||
|             target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), |             target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), | ||||||
|                       Type::Uint}; |                       Type::Uint}; | ||||||
|  |         } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) { | ||||||
|  |             target = {GetCustomVariable(cv->GetIndex()), Type::Float}; | ||||||
|         } else { |         } else { | ||||||
|             UNREACHABLE_MSG("Assign called without a proper target"); |             UNREACHABLE_MSG("Assign called without a proper target"); | ||||||
|         } |         } | ||||||
|  | @ -2237,6 +2264,10 @@ private: | ||||||
|         return GetDeclarationWithSuffix(index, "gpr"); |         return GetDeclarationWithSuffix(index, "gpr"); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     std::string GetCustomVariable(u32 index) const { | ||||||
|  |         return GetDeclarationWithSuffix(index, "custom_var"); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     std::string GetPredicate(Tegra::Shader::Pred pred) const { |     std::string GetPredicate(Tegra::Shader::Pred pred) const { | ||||||
|         return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred"); |         return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred"); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  | @ -53,7 +53,7 @@ struct BindlessSamplerKey { | ||||||
|     Tegra::Engines::SamplerDescriptor sampler{}; |     Tegra::Engines::SamplerDescriptor sampler{}; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| constexpr u32 NativeVersion = 11; | constexpr u32 NativeVersion = 12; | ||||||
| 
 | 
 | ||||||
| // Making sure sizes doesn't change by accident
 | // Making sure sizes doesn't change by accident
 | ||||||
| static_assert(sizeof(ProgramVariant) == 20); | static_assert(sizeof(ProgramVariant) == 20); | ||||||
|  | @ -186,7 +186,8 @@ ShaderDiskCacheOpenGL::LoadTransferable() { | ||||||
|             u32 num_bound_samplers{}; |             u32 num_bound_samplers{}; | ||||||
|             u32 num_bindless_samplers{}; |             u32 num_bindless_samplers{}; | ||||||
|             if (file.ReadArray(&usage.unique_identifier, 1) != 1 || |             if (file.ReadArray(&usage.unique_identifier, 1) != 1 || | ||||||
|                 file.ReadArray(&usage.variant, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || |                 file.ReadArray(&usage.variant, 1) != 1 || | ||||||
|  |                 file.ReadArray(&usage.bound_buffer, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || | ||||||
|                 file.ReadArray(&num_bound_samplers, 1) != 1 || |                 file.ReadArray(&num_bound_samplers, 1) != 1 || | ||||||
|                 file.ReadArray(&num_bindless_samplers, 1) != 1) { |                 file.ReadArray(&num_bindless_samplers, 1) != 1) { | ||||||
|                 LOG_ERROR(Render_OpenGL, error_loading); |                 LOG_ERROR(Render_OpenGL, error_loading); | ||||||
|  | @ -281,7 +282,9 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { | ||||||
|         u32 num_bindless_samplers{}; |         u32 num_bindless_samplers{}; | ||||||
|         ShaderDiskCacheUsage usage; |         ShaderDiskCacheUsage usage; | ||||||
|         if (!LoadObjectFromPrecompiled(usage.unique_identifier) || |         if (!LoadObjectFromPrecompiled(usage.unique_identifier) || | ||||||
|             !LoadObjectFromPrecompiled(usage.variant) || !LoadObjectFromPrecompiled(num_keys) || |             !LoadObjectFromPrecompiled(usage.variant) || | ||||||
|  |             !LoadObjectFromPrecompiled(usage.bound_buffer) || | ||||||
|  |             !LoadObjectFromPrecompiled(num_keys) || | ||||||
|             !LoadObjectFromPrecompiled(num_bound_samplers) || |             !LoadObjectFromPrecompiled(num_bound_samplers) || | ||||||
|             !LoadObjectFromPrecompiled(num_bindless_samplers)) { |             !LoadObjectFromPrecompiled(num_bindless_samplers)) { | ||||||
|             return {}; |             return {}; | ||||||
|  | @ -393,6 +396,7 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) { | ||||||
| 
 | 
 | ||||||
|     if (file.WriteObject(TransferableEntryKind::Usage) != 1 || |     if (file.WriteObject(TransferableEntryKind::Usage) != 1 || | ||||||
|         file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 || |         file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 || | ||||||
|  |         file.WriteObject(usage.bound_buffer) != 1 || | ||||||
|         file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 || |         file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 || | ||||||
|         file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 || |         file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 || | ||||||
|         file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) { |         file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) { | ||||||
|  | @ -447,7 +451,7 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
|     if (!SaveObjectToPrecompiled(usage.unique_identifier) || |     if (!SaveObjectToPrecompiled(usage.unique_identifier) || | ||||||
|         !SaveObjectToPrecompiled(usage.variant) || |         !SaveObjectToPrecompiled(usage.variant) || !SaveObjectToPrecompiled(usage.bound_buffer) || | ||||||
|         !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) || |         !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) || | ||||||
|         !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) || |         !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) || | ||||||
|         !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) { |         !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) { | ||||||
|  |  | ||||||
|  | @ -79,6 +79,7 @@ static_assert(std::is_trivially_copyable_v<ProgramVariant>); | ||||||
| struct ShaderDiskCacheUsage { | struct ShaderDiskCacheUsage { | ||||||
|     u64 unique_identifier{}; |     u64 unique_identifier{}; | ||||||
|     ProgramVariant variant; |     ProgramVariant variant; | ||||||
|  |     u32 bound_buffer{}; | ||||||
|     VideoCommon::Shader::KeyMap keys; |     VideoCommon::Shader::KeyMap keys; | ||||||
|     VideoCommon::Shader::BoundSamplerMap bound_samplers; |     VideoCommon::Shader::BoundSamplerMap bound_samplers; | ||||||
|     VideoCommon::Shader::BindlessSamplerMap bindless_samplers; |     VideoCommon::Shader::BindlessSamplerMap bindless_samplers; | ||||||
|  |  | ||||||
|  | @ -353,6 +353,7 @@ private: | ||||||
|         DeclareFragment(); |         DeclareFragment(); | ||||||
|         DeclareCompute(); |         DeclareCompute(); | ||||||
|         DeclareRegisters(); |         DeclareRegisters(); | ||||||
|  |         DeclareCustomVariables(); | ||||||
|         DeclarePredicates(); |         DeclarePredicates(); | ||||||
|         DeclareLocalMemory(); |         DeclareLocalMemory(); | ||||||
|         DeclareSharedMemory(); |         DeclareSharedMemory(); | ||||||
|  | @ -586,6 +587,15 @@ private: | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     void DeclareCustomVariables() { | ||||||
|  |         const u32 num_custom_variables = ir.GetNumCustomVariables(); | ||||||
|  |         for (u32 i = 0; i < num_custom_variables; ++i) { | ||||||
|  |             const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero); | ||||||
|  |             Name(id, fmt::format("custom_var_{}", i)); | ||||||
|  |             custom_variables.emplace(i, AddGlobalVariable(id)); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     void DeclarePredicates() { |     void DeclarePredicates() { | ||||||
|         for (const auto pred : ir.GetPredicates()) { |         for (const auto pred : ir.GetPredicates()) { | ||||||
|             const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); |             const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); | ||||||
|  | @ -982,6 +992,11 @@ private: | ||||||
|             return {OpLoad(t_float, registers.at(index)), Type::Float}; |             return {OpLoad(t_float, registers.at(index)), Type::Float}; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|  |         if (const auto cv = std::get_if<CustomVarNode>(&*node)) { | ||||||
|  |             const u32 index = cv->GetIndex(); | ||||||
|  |             return {OpLoad(t_float, custom_variables.at(index)), Type::Float}; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|         if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { |         if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { | ||||||
|             return {Constant(t_uint, immediate->GetValue()), Type::Uint}; |             return {Constant(t_uint, immediate->GetValue()), Type::Uint}; | ||||||
|         } |         } | ||||||
|  | @ -1333,6 +1348,9 @@ private: | ||||||
|         } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { |         } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | ||||||
|             target = {GetGlobalMemoryPointer(*gmem), Type::Uint}; |             target = {GetGlobalMemoryPointer(*gmem), Type::Uint}; | ||||||
| 
 | 
 | ||||||
|  |         } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) { | ||||||
|  |             target = {custom_variables.at(cv->GetIndex()), Type::Float}; | ||||||
|  | 
 | ||||||
|         } else { |         } else { | ||||||
|             UNIMPLEMENTED(); |             UNIMPLEMENTED(); | ||||||
|         } |         } | ||||||
|  | @ -2508,6 +2526,7 @@ private: | ||||||
|     Id out_vertex{}; |     Id out_vertex{}; | ||||||
|     Id in_vertex{}; |     Id in_vertex{}; | ||||||
|     std::map<u32, Id> registers; |     std::map<u32, Id> registers; | ||||||
|  |     std::map<u32, Id> custom_variables; | ||||||
|     std::map<Tegra::Shader::Pred, Id> predicates; |     std::map<Tegra::Shader::Pred, Id> predicates; | ||||||
|     std::map<u32, Id> flow_variables; |     std::map<u32, Id> flow_variables; | ||||||
|     Id local_memory{}; |     Id local_memory{}; | ||||||
|  |  | ||||||
|  | @ -66,6 +66,18 @@ std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindle | ||||||
|     return value; |     return value; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | std::optional<u32> ConstBufferLocker::ObtainBoundBuffer() { | ||||||
|  |     if (bound_buffer_saved) { | ||||||
|  |         return bound_buffer; | ||||||
|  |     } | ||||||
|  |     if (!engine) { | ||||||
|  |         return std::nullopt; | ||||||
|  |     } | ||||||
|  |     bound_buffer_saved = true; | ||||||
|  |     bound_buffer = engine->GetBoundBuffer(); | ||||||
|  |     return bound_buffer; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { | void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { | ||||||
|     keys.insert_or_assign({buffer, offset}, value); |     keys.insert_or_assign({buffer, offset}, value); | ||||||
| } | } | ||||||
|  | @ -78,6 +90,11 @@ void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDes | ||||||
|     bindless_samplers.insert_or_assign({buffer, offset}, sampler); |     bindless_samplers.insert_or_assign({buffer, offset}, sampler); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | void ConstBufferLocker::SetBoundBuffer(u32 buffer) { | ||||||
|  |     bound_buffer_saved = true; | ||||||
|  |     bound_buffer = buffer; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| bool ConstBufferLocker::IsConsistent() const { | bool ConstBufferLocker::IsConsistent() const { | ||||||
|     if (!engine) { |     if (!engine) { | ||||||
|         return false; |         return false; | ||||||
|  |  | ||||||
|  | @ -10,6 +10,7 @@ | ||||||
| #include "common/hash.h" | #include "common/hash.h" | ||||||
| #include "video_core/engines/const_buffer_engine_interface.h" | #include "video_core/engines/const_buffer_engine_interface.h" | ||||||
| #include "video_core/engines/shader_type.h" | #include "video_core/engines/shader_type.h" | ||||||
|  | #include "video_core/guest_driver.h" | ||||||
| 
 | 
 | ||||||
| namespace VideoCommon::Shader { | namespace VideoCommon::Shader { | ||||||
| 
 | 
 | ||||||
|  | @ -40,6 +41,8 @@ public: | ||||||
| 
 | 
 | ||||||
|     std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); |     std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); | ||||||
| 
 | 
 | ||||||
|  |     std::optional<u32> ObtainBoundBuffer(); | ||||||
|  | 
 | ||||||
|     /// Inserts a key.
 |     /// Inserts a key.
 | ||||||
|     void InsertKey(u32 buffer, u32 offset, u32 value); |     void InsertKey(u32 buffer, u32 offset, u32 value); | ||||||
| 
 | 
 | ||||||
|  | @ -49,6 +52,9 @@ public: | ||||||
|     /// Inserts a bindless sampler key.
 |     /// Inserts a bindless sampler key.
 | ||||||
|     void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); |     void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||||||
| 
 | 
 | ||||||
|  |     /// Set the bound buffer for this locker.
 | ||||||
|  |     void SetBoundBuffer(u32 buffer); | ||||||
|  | 
 | ||||||
|     /// Checks keys and samplers against engine's current const buffers. Returns true if they are
 |     /// Checks keys and samplers against engine's current const buffers. Returns true if they are
 | ||||||
|     /// the same value, false otherwise;
 |     /// the same value, false otherwise;
 | ||||||
|     bool IsConsistent() const; |     bool IsConsistent() const; | ||||||
|  | @ -71,12 +77,27 @@ public: | ||||||
|         return bindless_samplers; |         return bindless_samplers; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     /// Gets bound buffer used on this shader
 | ||||||
|  |     u32 GetBoundBuffer() const { | ||||||
|  |         return bound_buffer; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     /// Obtains access to the guest driver's profile.
 | ||||||
|  |     VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const { | ||||||
|  |         if (engine) { | ||||||
|  |             return &engine->AccessGuestDriverProfile(); | ||||||
|  |         } | ||||||
|  |         return nullptr; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
| private: | private: | ||||||
|     const Tegra::Engines::ShaderType stage; |     const Tegra::Engines::ShaderType stage; | ||||||
|     Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; |     Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; | ||||||
|     KeyMap keys; |     KeyMap keys; | ||||||
|     BoundSamplerMap bound_samplers; |     BoundSamplerMap bound_samplers; | ||||||
|     BindlessSamplerMap bindless_samplers; |     BindlessSamplerMap bindless_samplers; | ||||||
|  |     bool bound_buffer_saved{}; | ||||||
|  |     u32 bound_buffer{}; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| } // namespace VideoCommon::Shader
 | } // namespace VideoCommon::Shader
 | ||||||
|  |  | ||||||
|  | @ -3,6 +3,7 @@ | ||||||
| // Refer to the license.txt file included.
 | // Refer to the license.txt file included.
 | ||||||
| 
 | 
 | ||||||
| #include <cstring> | #include <cstring> | ||||||
|  | #include <limits> | ||||||
| #include <set> | #include <set> | ||||||
| 
 | 
 | ||||||
| #include <fmt/format.h> | #include <fmt/format.h> | ||||||
|  | @ -33,6 +34,52 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { | ||||||
|     return (absolute_offset % SchedPeriod) == 0; |     return (absolute_offset % SchedPeriod) == 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, | ||||||
|  |                               const std::list<Sampler>& used_samplers) { | ||||||
|  |     if (gpu_driver == nullptr) { | ||||||
|  |         LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet"); | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |     if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) { | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |     u32 count{}; | ||||||
|  |     std::vector<u32> bound_offsets; | ||||||
|  |     for (const auto& sampler : used_samplers) { | ||||||
|  |         if (sampler.IsBindless()) { | ||||||
|  |             continue; | ||||||
|  |         } | ||||||
|  |         ++count; | ||||||
|  |         bound_offsets.emplace_back(sampler.GetOffset()); | ||||||
|  |     } | ||||||
|  |     if (count > 1) { | ||||||
|  |         gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets)); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, | ||||||
|  |                                         VideoCore::GuestDriverProfile* gpu_driver, | ||||||
|  |                                         const std::list<Sampler>& used_samplers) { | ||||||
|  |     if (gpu_driver == nullptr) { | ||||||
|  |         LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet"); | ||||||
|  |         return std::nullopt; | ||||||
|  |     } | ||||||
|  |     const u32 base_offset = sampler_to_deduce.GetOffset(); | ||||||
|  |     u32 max_offset{std::numeric_limits<u32>::max()}; | ||||||
|  |     for (const auto& sampler : used_samplers) { | ||||||
|  |         if (sampler.IsBindless()) { | ||||||
|  |             continue; | ||||||
|  |         } | ||||||
|  |         if (sampler.GetOffset() > base_offset) { | ||||||
|  |             max_offset = std::min(sampler.GetOffset(), max_offset); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     if (max_offset == std::numeric_limits<u32>::max()) { | ||||||
|  |         return std::nullopt; | ||||||
|  |     } | ||||||
|  |     return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| } // Anonymous namespace
 | } // Anonymous namespace
 | ||||||
| 
 | 
 | ||||||
| class ASTDecoder { | class ASTDecoder { | ||||||
|  | @ -315,4 +362,25 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | ||||||
|     return pc + 1; |     return pc + 1; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | void ShaderIR::PostDecode() { | ||||||
|  |     // Deduce texture handler size if needed
 | ||||||
|  |     auto gpu_driver = locker.AccessGuestDriverProfile(); | ||||||
|  |     DeduceTextureHandlerSize(gpu_driver, used_samplers); | ||||||
|  |     // Deduce Indexed Samplers
 | ||||||
|  |     if (!uses_indexed_samplers) { | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |     for (auto& sampler : used_samplers) { | ||||||
|  |         if (!sampler.IsIndexed()) { | ||||||
|  |             continue; | ||||||
|  |         } | ||||||
|  |         if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) { | ||||||
|  |             sampler.SetSize(*size); | ||||||
|  |         } else { | ||||||
|  |             LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler"); | ||||||
|  |             sampler.SetSize(1); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
| } // namespace VideoCommon::Shader
 | } // namespace VideoCommon::Shader
 | ||||||
|  |  | ||||||
|  | @ -144,7 +144,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | ||||||
|         Node4 values; |         Node4 values; | ||||||
|         for (u32 element = 0; element < values.size(); ++element) { |         for (u32 element = 0; element < values.size(); ++element) { | ||||||
|             auto coords_copy = coords; |             auto coords_copy = coords; | ||||||
|             MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, {}, {}, component, element}; |             MetaTexture meta{sampler, {}, depth_compare, aoffi,   {}, {}, | ||||||
|  |                              {},      {}, component,     element, {}}; | ||||||
|             values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); |             values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|  | @ -167,9 +168,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | ||||||
|         const auto derivate_reg = instr.gpr20.Value(); |         const auto derivate_reg = instr.gpr20.Value(); | ||||||
|         const auto texture_type = instr.txd.texture_type.Value(); |         const auto texture_type = instr.txd.texture_type.Value(); | ||||||
|         const auto coord_count = GetCoordCount(texture_type); |         const auto coord_count = GetCoordCount(texture_type); | ||||||
| 
 |         Node index_var{}; | ||||||
|         const Sampler* sampler = |         const Sampler* sampler = | ||||||
|             is_bindless ? GetBindlessSampler(base_reg, {{texture_type, is_array, false}}) |             is_bindless ? GetBindlessSampler(base_reg, index_var, {{texture_type, is_array, false}}) | ||||||
|                         : GetSampler(instr.sampler, {{texture_type, is_array, false}}); |                         : GetSampler(instr.sampler, {{texture_type, is_array, false}}); | ||||||
|         Node4 values; |         Node4 values; | ||||||
|         if (sampler == nullptr) { |         if (sampler == nullptr) { | ||||||
|  | @ -200,7 +201,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         for (u32 element = 0; element < values.size(); ++element) { |         for (u32 element = 0; element < values.size(); ++element) { | ||||||
|             MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, {}, {}, {}, element}; |             MetaTexture meta{*sampler, array_node, {}, {},      {},       derivates, | ||||||
|  |                              {},       {},         {}, element, index_var}; | ||||||
|             values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords); |             values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|  | @ -215,8 +217,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | ||||||
|         // TODO: The new commits on the texture refactor, change the way samplers work.
 |         // TODO: The new commits on the texture refactor, change the way samplers work.
 | ||||||
|         // Sadly, not all texture instructions specify the type of texture their sampler
 |         // Sadly, not all texture instructions specify the type of texture their sampler
 | ||||||
|         // uses. This must be fixed at a later instance.
 |         // uses. This must be fixed at a later instance.
 | ||||||
|  |         Node index_var{}; | ||||||
|         const Sampler* sampler = |         const Sampler* sampler = | ||||||
|             is_bindless ? GetBindlessSampler(instr.gpr8) : GetSampler(instr.sampler); |             is_bindless ? GetBindlessSampler(instr.gpr8, index_var) : GetSampler(instr.sampler); | ||||||
| 
 | 
 | ||||||
|         if (sampler == nullptr) { |         if (sampler == nullptr) { | ||||||
|             u32 indexer = 0; |             u32 indexer = 0; | ||||||
|  | @ -240,7 +243,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | ||||||
|                 if (!instr.txq.IsComponentEnabled(element)) { |                 if (!instr.txq.IsComponentEnabled(element)) { | ||||||
|                     continue; |                     continue; | ||||||
|                 } |                 } | ||||||
|                 MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element}; |                 MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; | ||||||
|                 const Node value = |                 const Node value = | ||||||
|                     Operation(OperationCode::TextureQueryDimensions, meta, |                     Operation(OperationCode::TextureQueryDimensions, meta, | ||||||
|                               GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); |                               GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); | ||||||
|  | @ -266,8 +269,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | ||||||
| 
 | 
 | ||||||
|         auto texture_type = instr.tmml.texture_type.Value(); |         auto texture_type = instr.tmml.texture_type.Value(); | ||||||
|         const bool is_array = instr.tmml.array != 0; |         const bool is_array = instr.tmml.array != 0; | ||||||
|  |         Node index_var{}; | ||||||
|         const Sampler* sampler = |         const Sampler* sampler = | ||||||
|             is_bindless ? GetBindlessSampler(instr.gpr20) : GetSampler(instr.sampler); |             is_bindless ? GetBindlessSampler(instr.gpr20, index_var) : GetSampler(instr.sampler); | ||||||
| 
 | 
 | ||||||
|         if (sampler == nullptr) { |         if (sampler == nullptr) { | ||||||
|             u32 indexer = 0; |             u32 indexer = 0; | ||||||
|  | @ -309,7 +313,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | ||||||
|                 continue; |                 continue; | ||||||
|             } |             } | ||||||
|             auto params = coords; |             auto params = coords; | ||||||
|             MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element}; |             MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; | ||||||
|             const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); |             const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); | ||||||
|             SetTemporary(bb, indexer++, value); |             SetTemporary(bb, indexer++, value); | ||||||
|         } |         } | ||||||
|  | @ -383,37 +387,65 @@ const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, | ||||||
|     // Otherwise create a new mapping for this sampler
 |     // Otherwise create a new mapping for this sampler
 | ||||||
|     const auto next_index = static_cast<u32>(used_samplers.size()); |     const auto next_index = static_cast<u32>(used_samplers.size()); | ||||||
|     return &used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow, |     return &used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow, | ||||||
|                                        info.is_buffer); |                                        info.is_buffer, false); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, | const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var, | ||||||
|                                             std::optional<SamplerInfo> sampler_info) { |                                             std::optional<SamplerInfo> sampler_info) { | ||||||
|     const Node sampler_register = GetRegister(reg); |     const Node sampler_register = GetRegister(reg); | ||||||
|     const auto [base_sampler, buffer, offset] = |     const auto [base_node, tracked_sampler_info] = | ||||||
|         TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); |         TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); | ||||||
|     ASSERT(base_sampler != nullptr); |     ASSERT(base_node != nullptr); | ||||||
|     if (base_sampler == nullptr) { |     if (base_node == nullptr) { | ||||||
|         return nullptr; |         return nullptr; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     const auto info = GetSamplerInfo(sampler_info, offset, buffer); |     if (const auto bindless_sampler_info = | ||||||
|  |             std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) { | ||||||
|  |         const u32 buffer = bindless_sampler_info->GetIndex(); | ||||||
|  |         const u32 offset = bindless_sampler_info->GetOffset(); | ||||||
|  |         const auto info = GetSamplerInfo(sampler_info, offset, buffer); | ||||||
| 
 | 
 | ||||||
|     // If this sampler has already been used, return the existing mapping.
 |         // If this sampler has already been used, return the existing mapping.
 | ||||||
|     const auto it = |         const auto it = | ||||||
|         std::find_if(used_samplers.begin(), used_samplers.end(), |             std::find_if(used_samplers.begin(), used_samplers.end(), | ||||||
|                      [buffer = buffer, offset = offset](const Sampler& entry) { |                          [buffer = buffer, offset = offset](const Sampler& entry) { | ||||||
|                          return entry.GetBuffer() == buffer && entry.GetOffset() == offset; |                              return entry.GetBuffer() == buffer && entry.GetOffset() == offset; | ||||||
|                      }); |                          }); | ||||||
|     if (it != used_samplers.end()) { |         if (it != used_samplers.end()) { | ||||||
|         ASSERT(it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array && |             ASSERT(it->IsBindless() && it->GetType() == info.type && | ||||||
|                it->IsShadow() == info.is_shadow); |                    it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow); | ||||||
|         return &*it; |             return &*it; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         // Otherwise create a new mapping for this sampler
 | ||||||
|  |         const auto next_index = static_cast<u32>(used_samplers.size()); | ||||||
|  |         return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array, | ||||||
|  |                                            info.is_shadow, info.is_buffer, false); | ||||||
|  |     } else if (const auto array_sampler_info = | ||||||
|  |                    std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) { | ||||||
|  |         const u32 base_offset = array_sampler_info->GetBaseOffset() / 4; | ||||||
|  |         index_var = GetCustomVariable(array_sampler_info->GetIndexVar()); | ||||||
|  |         const auto info = GetSamplerInfo(sampler_info, base_offset); | ||||||
|  | 
 | ||||||
|  |         // If this sampler has already been used, return the existing mapping.
 | ||||||
|  |         const auto it = std::find_if( | ||||||
|  |             used_samplers.begin(), used_samplers.end(), | ||||||
|  |             [base_offset](const Sampler& entry) { return entry.GetOffset() == base_offset; }); | ||||||
|  |         if (it != used_samplers.end()) { | ||||||
|  |             ASSERT(!it->IsBindless() && it->GetType() == info.type && | ||||||
|  |                    it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow && | ||||||
|  |                    it->IsBuffer() == info.is_buffer && it->IsIndexed()); | ||||||
|  |             return &*it; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         uses_indexed_samplers = true; | ||||||
|  |         // Otherwise create a new mapping for this sampler
 | ||||||
|  |         const auto next_index = static_cast<u32>(used_samplers.size()); | ||||||
|  |         return &used_samplers.emplace_back(next_index, base_offset, info.type, info.is_array, | ||||||
|  |                                            info.is_shadow, info.is_buffer, true); | ||||||
|     } |     } | ||||||
| 
 |     return nullptr; | ||||||
|     // Otherwise create a new mapping for this sampler
 |  | ||||||
|     const auto next_index = static_cast<u32>(used_samplers.size()); |  | ||||||
|     return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array, |  | ||||||
|                                        info.is_shadow, info.is_buffer); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { | void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { | ||||||
|  | @ -499,8 +531,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | ||||||
|                          "This method is not supported."); |                          "This method is not supported."); | ||||||
| 
 | 
 | ||||||
|     const SamplerInfo info{texture_type, is_array, is_shadow, false}; |     const SamplerInfo info{texture_type, is_array, is_shadow, false}; | ||||||
|     const Sampler* sampler = |     Node index_var{}; | ||||||
|         is_bindless ? GetBindlessSampler(*bindless_reg, info) : GetSampler(instr.sampler, info); |     const Sampler* sampler = is_bindless ? GetBindlessSampler(*bindless_reg, index_var, info) | ||||||
|  |                                          : GetSampler(instr.sampler, info); | ||||||
|     Node4 values; |     Node4 values; | ||||||
|     if (sampler == nullptr) { |     if (sampler == nullptr) { | ||||||
|         for (u32 element = 0; element < values.size(); ++element) { |         for (u32 element = 0; element < values.size(); ++element) { | ||||||
|  | @ -548,7 +581,8 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | ||||||
| 
 | 
 | ||||||
|     for (u32 element = 0; element < values.size(); ++element) { |     for (u32 element = 0; element < values.size(); ++element) { | ||||||
|         auto copy_coords = coords; |         auto copy_coords = coords; | ||||||
|         MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, lod, {}, element}; |         MetaTexture meta{*sampler, array, depth_compare, aoffi,    {}, {}, bias, | ||||||
|  |                          lod,      {},    element,       index_var}; | ||||||
|         values[element] = Operation(read_method, meta, std::move(copy_coords)); |         values[element] = Operation(read_method, meta, std::move(copy_coords)); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -663,7 +697,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de | ||||||
|     u64 parameter_register = instr.gpr20.Value(); |     u64 parameter_register = instr.gpr20.Value(); | ||||||
| 
 | 
 | ||||||
|     const SamplerInfo info{texture_type, is_array, depth_compare, false}; |     const SamplerInfo info{texture_type, is_array, depth_compare, false}; | ||||||
|     const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, info) |     Node index_var{}; | ||||||
|  |     const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, index_var, info) | ||||||
|                                          : GetSampler(instr.sampler, info); |                                          : GetSampler(instr.sampler, info); | ||||||
|     Node4 values; |     Node4 values; | ||||||
|     if (sampler == nullptr) { |     if (sampler == nullptr) { | ||||||
|  | @ -692,7 +727,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de | ||||||
|     for (u32 element = 0; element < values.size(); ++element) { |     for (u32 element = 0; element < values.size(); ++element) { | ||||||
|         auto coords_copy = coords; |         auto coords_copy = coords; | ||||||
|         MetaTexture meta{ |         MetaTexture meta{ | ||||||
|             *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element}; |             *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element, | ||||||
|  |             index_var}; | ||||||
|         values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); |         values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -725,7 +761,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { | ||||||
|     Node4 values; |     Node4 values; | ||||||
|     for (u32 element = 0; element < values.size(); ++element) { |     for (u32 element = 0; element < values.size(); ++element) { | ||||||
|         auto coords_copy = coords; |         auto coords_copy = coords; | ||||||
|         MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element}; |         MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}}; | ||||||
|         values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); |         values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -775,7 +811,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is | ||||||
|     Node4 values; |     Node4 values; | ||||||
|     for (u32 element = 0; element < values.size(); ++element) { |     for (u32 element = 0; element < values.size(); ++element) { | ||||||
|         auto coords_copy = coords; |         auto coords_copy = coords; | ||||||
|         MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element}; |         MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element, {}}; | ||||||
|         values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); |         values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); | ||||||
|     } |     } | ||||||
|     return values; |     return values; | ||||||
|  |  | ||||||
|  | @ -212,6 +212,7 @@ enum class MetaStackClass { | ||||||
| class OperationNode; | class OperationNode; | ||||||
| class ConditionalNode; | class ConditionalNode; | ||||||
| class GprNode; | class GprNode; | ||||||
|  | class CustomVarNode; | ||||||
| class ImmediateNode; | class ImmediateNode; | ||||||
| class InternalFlagNode; | class InternalFlagNode; | ||||||
| class PredicateNode; | class PredicateNode; | ||||||
|  | @ -223,26 +224,32 @@ class SmemNode; | ||||||
| class GmemNode; | class GmemNode; | ||||||
| class CommentNode; | class CommentNode; | ||||||
| 
 | 
 | ||||||
| using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, | using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode, | ||||||
|                               InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode, |                               InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode, | ||||||
|                               LmemNode, SmemNode, GmemNode, CommentNode>; |                               LmemNode, SmemNode, GmemNode, CommentNode>; | ||||||
| using Node = std::shared_ptr<NodeData>; | using Node = std::shared_ptr<NodeData>; | ||||||
| using Node4 = std::array<Node, 4>; | using Node4 = std::array<Node, 4>; | ||||||
| using NodeBlock = std::vector<Node>; | using NodeBlock = std::vector<Node>; | ||||||
| 
 | 
 | ||||||
|  | class BindlessSamplerNode; | ||||||
|  | class ArraySamplerNode; | ||||||
|  | 
 | ||||||
|  | using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>; | ||||||
|  | using TrackSampler = std::shared_ptr<TrackSamplerData>; | ||||||
|  | 
 | ||||||
| class Sampler { | class Sampler { | ||||||
| public: | public: | ||||||
|     /// This constructor is for bound samplers
 |     /// This constructor is for bound samplers
 | ||||||
|     constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type, |     constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type, | ||||||
|                                bool is_array, bool is_shadow, bool is_buffer) |                                bool is_array, bool is_shadow, bool is_buffer, bool is_indexed) | ||||||
|         : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow}, |         : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow}, | ||||||
|           is_buffer{is_buffer} {} |           is_buffer{is_buffer}, is_indexed{is_indexed} {} | ||||||
| 
 | 
 | ||||||
|     /// This constructor is for bindless samplers
 |     /// This constructor is for bindless samplers
 | ||||||
|     constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, |     constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, | ||||||
|                                bool is_array, bool is_shadow, bool is_buffer) |                                bool is_array, bool is_shadow, bool is_buffer, bool is_indexed) | ||||||
|         : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, |         : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, | ||||||
|           is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true} {} |           is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {} | ||||||
| 
 | 
 | ||||||
|     constexpr u32 GetIndex() const { |     constexpr u32 GetIndex() const { | ||||||
|         return index; |         return index; | ||||||
|  | @ -276,16 +283,72 @@ public: | ||||||
|         return is_bindless; |         return is_bindless; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     constexpr bool IsIndexed() const { | ||||||
|  |         return is_indexed; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     constexpr u32 Size() const { | ||||||
|  |         return size; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     constexpr void SetSize(u32 new_size) { | ||||||
|  |         size = new_size; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
| private: | private: | ||||||
|     u32 index{};  ///< Emulated index given for the this sampler.
 |     u32 index{};  ///< Emulated index given for the this sampler.
 | ||||||
|     u32 offset{}; ///< Offset in the const buffer from where the sampler is being read.
 |     u32 offset{}; ///< Offset in the const buffer from where the sampler is being read.
 | ||||||
|     u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers).
 |     u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers).
 | ||||||
|  |     u32 size{};   ///< Size of the sampler if indexed.
 | ||||||
| 
 | 
 | ||||||
|     Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
 |     Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
 | ||||||
|     bool is_array{};    ///< Whether the texture is being sampled as an array texture or not.
 |     bool is_array{};    ///< Whether the texture is being sampled as an array texture or not.
 | ||||||
|     bool is_shadow{};   ///< Whether the texture is being sampled as a depth texture or not.
 |     bool is_shadow{};   ///< Whether the texture is being sampled as a depth texture or not.
 | ||||||
|     bool is_buffer{};   ///< Whether the texture is a texture buffer without sampler.
 |     bool is_buffer{};   ///< Whether the texture is a texture buffer without sampler.
 | ||||||
|     bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not.
 |     bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not.
 | ||||||
|  |     bool is_indexed{};  ///< Whether this sampler is an indexed array of textures.
 | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | /// Represents a tracked bindless sampler into a direct const buffer
 | ||||||
|  | class ArraySamplerNode final { | ||||||
|  | public: | ||||||
|  |     explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var) | ||||||
|  |         : index{index}, base_offset{base_offset}, bindless_var{bindless_var} {} | ||||||
|  | 
 | ||||||
|  |     constexpr u32 GetIndex() const { | ||||||
|  |         return index; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     constexpr u32 GetBaseOffset() const { | ||||||
|  |         return base_offset; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     constexpr u32 GetIndexVar() const { | ||||||
|  |         return bindless_var; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     u32 index; | ||||||
|  |     u32 base_offset; | ||||||
|  |     u32 bindless_var; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | /// Represents a tracked bindless sampler into a direct const buffer
 | ||||||
|  | class BindlessSamplerNode final { | ||||||
|  | public: | ||||||
|  |     explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {} | ||||||
|  | 
 | ||||||
|  |     constexpr u32 GetIndex() const { | ||||||
|  |         return index; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     constexpr u32 GetOffset() const { | ||||||
|  |         return offset; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     u32 index; | ||||||
|  |     u32 offset; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| class Image final { | class Image final { | ||||||
|  | @ -382,6 +445,7 @@ struct MetaTexture { | ||||||
|     Node lod; |     Node lod; | ||||||
|     Node component{}; |     Node component{}; | ||||||
|     u32 element{}; |     u32 element{}; | ||||||
|  |     Node index{}; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct MetaImage { | struct MetaImage { | ||||||
|  | @ -488,6 +552,19 @@ private: | ||||||
|     Tegra::Shader::Register index{}; |     Tegra::Shader::Register index{}; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | /// A custom variable
 | ||||||
|  | class CustomVarNode final { | ||||||
|  | public: | ||||||
|  |     explicit constexpr CustomVarNode(u32 index) : index{index} {} | ||||||
|  | 
 | ||||||
|  |     constexpr u32 GetIndex() const { | ||||||
|  |         return index; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     u32 index{}; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| /// A 32-bits value that represents an immediate value
 | /// A 32-bits value that represents an immediate value
 | ||||||
| class ImmediateNode final { | class ImmediateNode final { | ||||||
| public: | public: | ||||||
|  |  | ||||||
|  | @ -45,6 +45,12 @@ Node MakeNode(Args&&... args) { | ||||||
|     return std::make_shared<NodeData>(T(std::forward<Args>(args)...)); |     return std::make_shared<NodeData>(T(std::forward<Args>(args)...)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | template <typename T, typename... Args> | ||||||
|  | TrackSampler MakeTrackSampler(Args&&... args) { | ||||||
|  |     static_assert(std::is_convertible_v<T, TrackSamplerData>); | ||||||
|  |     return std::make_shared<TrackSamplerData>(T(std::forward<Args>(args)...)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| template <typename... Args> | template <typename... Args> | ||||||
| Node Operation(OperationCode code, Args&&... args) { | Node Operation(OperationCode code, Args&&... args) { | ||||||
|     if constexpr (sizeof...(args) == 0) { |     if constexpr (sizeof...(args) == 0) { | ||||||
|  |  | ||||||
|  | @ -27,6 +27,7 @@ ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSet | ||||||
|                    ConstBufferLocker& locker) |                    ConstBufferLocker& locker) | ||||||
|     : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { |     : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { | ||||||
|     Decode(); |     Decode(); | ||||||
|  |     PostDecode(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| ShaderIR::~ShaderIR() = default; | ShaderIR::~ShaderIR() = default; | ||||||
|  | @ -38,6 +39,10 @@ Node ShaderIR::GetRegister(Register reg) { | ||||||
|     return MakeNode<GprNode>(reg); |     return MakeNode<GprNode>(reg); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | Node ShaderIR::GetCustomVariable(u32 id) { | ||||||
|  |     return MakeNode<CustomVarNode>(id); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| Node ShaderIR::GetImmediate19(Instruction instr) { | Node ShaderIR::GetImmediate19(Instruction instr) { | ||||||
|     return Immediate(instr.alu.GetImm20_19()); |     return Immediate(instr.alu.GetImm20_19()); | ||||||
| } | } | ||||||
|  | @ -452,4 +457,8 @@ std::size_t ShaderIR::DeclareAmend(Node new_amend) { | ||||||
|     return id; |     return id; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | u32 ShaderIR::NewCustomVariable() { | ||||||
|  |     return num_custom_variables++; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| } // namespace VideoCommon::Shader
 | } // namespace VideoCommon::Shader
 | ||||||
|  |  | ||||||
|  | @ -180,6 +180,10 @@ public: | ||||||
|         return amend_code[index]; |         return amend_code[index]; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     u32 GetNumCustomVariables() const { | ||||||
|  |         return num_custom_variables; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
| private: | private: | ||||||
|     friend class ASTDecoder; |     friend class ASTDecoder; | ||||||
| 
 | 
 | ||||||
|  | @ -191,6 +195,7 @@ private: | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
|     void Decode(); |     void Decode(); | ||||||
|  |     void PostDecode(); | ||||||
| 
 | 
 | ||||||
|     NodeBlock DecodeRange(u32 begin, u32 end); |     NodeBlock DecodeRange(u32 begin, u32 end); | ||||||
|     void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); |     void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); | ||||||
|  | @ -235,6 +240,8 @@ private: | ||||||
| 
 | 
 | ||||||
|     /// Generates a node for a passed register.
 |     /// Generates a node for a passed register.
 | ||||||
|     Node GetRegister(Tegra::Shader::Register reg); |     Node GetRegister(Tegra::Shader::Register reg); | ||||||
|  |     /// Generates a node for a custom variable
 | ||||||
|  |     Node GetCustomVariable(u32 id); | ||||||
|     /// Generates a node representing a 19-bit immediate value
 |     /// Generates a node representing a 19-bit immediate value
 | ||||||
|     Node GetImmediate19(Tegra::Shader::Instruction instr); |     Node GetImmediate19(Tegra::Shader::Instruction instr); | ||||||
|     /// Generates a node representing a 32-bit immediate value
 |     /// Generates a node representing a 32-bit immediate value
 | ||||||
|  | @ -321,7 +328,7 @@ private: | ||||||
|                               std::optional<SamplerInfo> sampler_info = std::nullopt); |                               std::optional<SamplerInfo> sampler_info = std::nullopt); | ||||||
| 
 | 
 | ||||||
|     /// Accesses a texture sampler for a bindless texture.
 |     /// Accesses a texture sampler for a bindless texture.
 | ||||||
|     const Sampler* GetBindlessSampler(Tegra::Shader::Register reg, |     const Sampler* GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var, | ||||||
|                                       std::optional<SamplerInfo> sampler_info = std::nullopt); |                                       std::optional<SamplerInfo> sampler_info = std::nullopt); | ||||||
| 
 | 
 | ||||||
|     /// Accesses an image.
 |     /// Accesses an image.
 | ||||||
|  | @ -387,6 +394,9 @@ private: | ||||||
| 
 | 
 | ||||||
|     std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; |     std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; | ||||||
| 
 | 
 | ||||||
|  |     std::tuple<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code, | ||||||
|  |                                                         s64 cursor); | ||||||
|  | 
 | ||||||
|     std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; |     std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; | ||||||
| 
 | 
 | ||||||
|     std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, |     std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, | ||||||
|  | @ -399,6 +409,8 @@ private: | ||||||
|     /// Register new amending code and obtain the reference id.
 |     /// Register new amending code and obtain the reference id.
 | ||||||
|     std::size_t DeclareAmend(Node new_amend); |     std::size_t DeclareAmend(Node new_amend); | ||||||
| 
 | 
 | ||||||
|  |     u32 NewCustomVariable(); | ||||||
|  | 
 | ||||||
|     const ProgramCode& program_code; |     const ProgramCode& program_code; | ||||||
|     const u32 main_offset; |     const u32 main_offset; | ||||||
|     const CompilerSettings settings; |     const CompilerSettings settings; | ||||||
|  | @ -414,6 +426,7 @@ private: | ||||||
|     NodeBlock global_code; |     NodeBlock global_code; | ||||||
|     ASTManager program_manager{true, true}; |     ASTManager program_manager{true, true}; | ||||||
|     std::vector<Node> amend_code; |     std::vector<Node> amend_code; | ||||||
|  |     u32 num_custom_variables{}; | ||||||
| 
 | 
 | ||||||
|     std::set<u32> used_registers; |     std::set<u32> used_registers; | ||||||
|     std::set<Tegra::Shader::Pred> used_predicates; |     std::set<Tegra::Shader::Pred> used_predicates; | ||||||
|  | @ -431,6 +444,7 @@ private: | ||||||
|     bool uses_instance_id{}; |     bool uses_instance_id{}; | ||||||
|     bool uses_vertex_id{}; |     bool uses_vertex_id{}; | ||||||
|     bool uses_warps{}; |     bool uses_warps{}; | ||||||
|  |     bool uses_indexed_samplers{}; | ||||||
| 
 | 
 | ||||||
|     Tegra::Shader::Header header; |     Tegra::Shader::Header header; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | @ -8,6 +8,7 @@ | ||||||
| 
 | 
 | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "video_core/shader/node.h" | #include "video_core/shader/node.h" | ||||||
|  | #include "video_core/shader/node_helper.h" | ||||||
| #include "video_core/shader/shader_ir.h" | #include "video_core/shader/shader_ir.h" | ||||||
| 
 | 
 | ||||||
| namespace VideoCommon::Shader { | namespace VideoCommon::Shader { | ||||||
|  | @ -35,8 +36,113 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, | ||||||
|     } |     } | ||||||
|     return {}; |     return {}; | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& operation) { | ||||||
|  |     if (operation.GetCode() != OperationCode::UAdd) { | ||||||
|  |         return std::nullopt; | ||||||
|  |     } | ||||||
|  |     Node gpr{}; | ||||||
|  |     Node offset{}; | ||||||
|  |     ASSERT(operation.GetOperandsCount() == 2); | ||||||
|  |     for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) { | ||||||
|  |         Node operand = operation[i]; | ||||||
|  |         if (std::holds_alternative<ImmediateNode>(*operand)) { | ||||||
|  |             offset = operation[i]; | ||||||
|  |         } else if (std::holds_alternative<GprNode>(*operand)) { | ||||||
|  |             gpr = operation[i]; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     if (offset && gpr) { | ||||||
|  |         return std::make_pair(gpr, offset); | ||||||
|  |     } | ||||||
|  |     return std::nullopt; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | bool AmendNodeCv(std::size_t amend_index, Node node) { | ||||||
|  |     if (const auto operation = std::get_if<OperationNode>(&*node)) { | ||||||
|  |         operation->SetAmendIndex(amend_index); | ||||||
|  |         return true; | ||||||
|  |     } else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | ||||||
|  |         conditional->SetAmendIndex(amend_index); | ||||||
|  |         return true; | ||||||
|  |     } | ||||||
|  |     return false; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| } // Anonymous namespace
 | } // Anonymous namespace
 | ||||||
| 
 | 
 | ||||||
|  | std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, | ||||||
|  |                                                               s64 cursor) { | ||||||
|  |     if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { | ||||||
|  |         // Constant buffer found, test if it's an immediate
 | ||||||
|  |         const auto offset = cbuf->GetOffset(); | ||||||
|  |         if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { | ||||||
|  |             auto track = | ||||||
|  |                 MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); | ||||||
|  |             return {tracked, track}; | ||||||
|  |         } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { | ||||||
|  |             auto bound_buffer = locker.ObtainBoundBuffer(); | ||||||
|  |             if (!bound_buffer) { | ||||||
|  |                 return {}; | ||||||
|  |             } | ||||||
|  |             if (*bound_buffer != cbuf->GetIndex()) { | ||||||
|  |                 return {}; | ||||||
|  |             } | ||||||
|  |             auto pair = DecoupleIndirectRead(*operation); | ||||||
|  |             if (!pair) { | ||||||
|  |                 return {}; | ||||||
|  |             } | ||||||
|  |             auto [gpr, base_offset] = *pair; | ||||||
|  |             const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); | ||||||
|  |             auto gpu_driver = locker.AccessGuestDriverProfile(); | ||||||
|  |             if (gpu_driver == nullptr) { | ||||||
|  |                 return {}; | ||||||
|  |             } | ||||||
|  |             const u32 bindless_cv = NewCustomVariable(); | ||||||
|  |             const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr, | ||||||
|  |                                       Immediate(gpu_driver->GetTextureHandlerSize())); | ||||||
|  | 
 | ||||||
|  |             const Node cv_node = GetCustomVariable(bindless_cv); | ||||||
|  |             Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); | ||||||
|  |             const std::size_t amend_index = DeclareAmend(amend_op); | ||||||
|  |             AmendNodeCv(amend_index, code[cursor]); | ||||||
|  |             // TODO Implement Bindless Index custom variable
 | ||||||
|  |             auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(), | ||||||
|  |                                                             offset_inm->GetValue(), bindless_cv); | ||||||
|  |             return {tracked, track}; | ||||||
|  |         } | ||||||
|  |         return {}; | ||||||
|  |     } | ||||||
|  |     if (const auto gpr = std::get_if<GprNode>(&*tracked)) { | ||||||
|  |         if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { | ||||||
|  |             return {}; | ||||||
|  |         } | ||||||
|  |         // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
 | ||||||
|  |         // register that it uses as operand
 | ||||||
|  |         const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); | ||||||
|  |         if (!source) { | ||||||
|  |             return {}; | ||||||
|  |         } | ||||||
|  |         return TrackBindlessSampler(source, code, new_cursor); | ||||||
|  |     } | ||||||
|  |     if (const auto operation = std::get_if<OperationNode>(&*tracked)) { | ||||||
|  |         for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { | ||||||
|  |             if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor); | ||||||
|  |                 std::get<0>(found)) { | ||||||
|  |                 // Cbuf found in operand.
 | ||||||
|  |                 return found; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         return {}; | ||||||
|  |     } | ||||||
|  |     if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { | ||||||
|  |         const auto& conditional_code = conditional->GetCode(); | ||||||
|  |         return TrackBindlessSampler(tracked, conditional_code, | ||||||
|  |                                     static_cast<s64>(conditional_code.size())); | ||||||
|  |     } | ||||||
|  |     return {}; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, | std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, | ||||||
|                                                s64 cursor) const { |                                                s64 cursor) const { | ||||||
|     if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { |     if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 bunnei
						bunnei