forked from eden-emu/eden
		
	shader: Remove old shader management
This commit is contained in:
		
							parent
							
								
									2bb0069e4e
								
							
						
					
					
						commit
						65069df8aa
					
				
					 83 changed files with 57 additions and 19625 deletions
				
			
		|  | @ -51,61 +51,7 @@ endif() | ||||||
| # The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR) | # The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR) | ||||||
| set(VIDEO_CORE "${SRC_DIR}/src/video_core") | set(VIDEO_CORE "${SRC_DIR}/src/video_core") | ||||||
| set(HASH_FILES | set(HASH_FILES | ||||||
|     "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp" |     # ... | ||||||
|     "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h" |  | ||||||
|     "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" |  | ||||||
|     "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" |  | ||||||
|     "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" |  | ||||||
|     "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" |  | ||||||
|     "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" |  | ||||||
|     "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" |  | ||||||
|     "${VIDEO_CORE}/shader/decode/arithmetic.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/decode/bfe.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/decode/bfi.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/decode/conversion.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/decode/ffma.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/decode/float_set.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/decode/half_set.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/decode/hfma2.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/decode/image.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/decode/integer_set.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/decode/memory.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/decode/texture.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/decode/other.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/decode/shift.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/decode/video.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/decode/warp.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/decode/xmad.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/ast.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/ast.h" |  | ||||||
|     "${VIDEO_CORE}/shader/compiler_settings.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/compiler_settings.h" |  | ||||||
|     "${VIDEO_CORE}/shader/control_flow.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/control_flow.h" |  | ||||||
|     "${VIDEO_CORE}/shader/decode.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/expr.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/expr.h" |  | ||||||
|     "${VIDEO_CORE}/shader/node.h" |  | ||||||
|     "${VIDEO_CORE}/shader/node_helper.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/node_helper.h" |  | ||||||
|     "${VIDEO_CORE}/shader/registry.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/registry.h" |  | ||||||
|     "${VIDEO_CORE}/shader/shader_ir.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/shader_ir.h" |  | ||||||
|     "${VIDEO_CORE}/shader/track.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/transform_feedback.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/transform_feedback.h" |  | ||||||
| ) | ) | ||||||
| set(COMBINED "") | set(COMBINED "") | ||||||
| foreach (F IN LISTS HASH_FILES) | foreach (F IN LISTS HASH_FILES) | ||||||
|  |  | ||||||
							
								
								
									
										2
									
								
								externals/Vulkan-Headers
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								externals/Vulkan-Headers
									
										
									
									
										vendored
									
									
								
							|  | @ -1 +1 @@ | ||||||
| Subproject commit 8188e3fbbc105591064093440f88081fb957d4f0 | Subproject commit 07c4a37bcf41ea50aef6e98236abdfe8089fb4c6 | ||||||
							
								
								
									
										2
									
								
								externals/sirit
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								externals/sirit
									
										
									
									
										vendored
									
									
								
							|  | @ -1 +1 @@ | ||||||
| Subproject commit 200310e8faa756b9869dd6dfc902c255246ac74a | Subproject commit a39596358a3a5488c06554c0c15184a6af71e433 | ||||||
|  | @ -32,61 +32,7 @@ add_custom_command(OUTPUT scm_rev.cpp | ||||||
|     DEPENDS |     DEPENDS | ||||||
|       # WARNING! It was too much work to try and make a common location for this list, |       # WARNING! It was too much work to try and make a common location for this list, | ||||||
|       # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well |       # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well | ||||||
|       "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp" |       # ... | ||||||
|       "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h" |  | ||||||
|       "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" |  | ||||||
|       "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" |  | ||||||
|       "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" |  | ||||||
|       "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" |  | ||||||
|       "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" |  | ||||||
|       "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" |  | ||||||
|       "${VIDEO_CORE}/shader/decode/arithmetic.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/decode/bfe.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/decode/bfi.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/decode/conversion.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/decode/ffma.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/decode/float_set.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/decode/half_set.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/decode/hfma2.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/decode/image.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/decode/integer_set.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/decode/memory.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/decode/texture.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/decode/other.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/decode/shift.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/decode/video.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/decode/warp.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/decode/xmad.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/ast.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/ast.h" |  | ||||||
|       "${VIDEO_CORE}/shader/compiler_settings.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/compiler_settings.h" |  | ||||||
|       "${VIDEO_CORE}/shader/control_flow.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/control_flow.h" |  | ||||||
|       "${VIDEO_CORE}/shader/decode.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/expr.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/expr.h" |  | ||||||
|       "${VIDEO_CORE}/shader/node.h" |  | ||||||
|       "${VIDEO_CORE}/shader/node_helper.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/node_helper.h" |  | ||||||
|       "${VIDEO_CORE}/shader/registry.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/registry.h" |  | ||||||
|       "${VIDEO_CORE}/shader/shader_ir.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/shader_ir.h" |  | ||||||
|       "${VIDEO_CORE}/shader/track.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/transform_feedback.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/transform_feedback.h" |  | ||||||
|       # and also check that the scm_rev files haven't changed |       # and also check that the scm_rev files haven't changed | ||||||
|       "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" |       "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" | ||||||
|       "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h" |       "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h" | ||||||
|  |  | ||||||
|  | @ -29,7 +29,6 @@ add_library(video_core STATIC | ||||||
|     dirty_flags.h |     dirty_flags.h | ||||||
|     dma_pusher.cpp |     dma_pusher.cpp | ||||||
|     dma_pusher.h |     dma_pusher.h | ||||||
|     engines/const_buffer_engine_interface.h |  | ||||||
|     engines/const_buffer_info.h |     engines/const_buffer_info.h | ||||||
|     engines/engine_interface.h |     engines/engine_interface.h | ||||||
|     engines/engine_upload.cpp |     engines/engine_upload.cpp | ||||||
|  | @ -61,8 +60,6 @@ add_library(video_core STATIC | ||||||
|     gpu.h |     gpu.h | ||||||
|     gpu_thread.cpp |     gpu_thread.cpp | ||||||
|     gpu_thread.h |     gpu_thread.h | ||||||
|     guest_driver.cpp |  | ||||||
|     guest_driver.h |  | ||||||
|     memory_manager.cpp |     memory_manager.cpp | ||||||
|     memory_manager.h |     memory_manager.h | ||||||
|     query_cache.h |     query_cache.h | ||||||
|  | @ -71,8 +68,6 @@ add_library(video_core STATIC | ||||||
|     rasterizer_interface.h |     rasterizer_interface.h | ||||||
|     renderer_base.cpp |     renderer_base.cpp | ||||||
|     renderer_base.h |     renderer_base.h | ||||||
|     renderer_opengl/gl_arb_decompiler.cpp |  | ||||||
|     renderer_opengl/gl_arb_decompiler.h |  | ||||||
|     renderer_opengl/gl_buffer_cache.cpp |     renderer_opengl/gl_buffer_cache.cpp | ||||||
|     renderer_opengl/gl_buffer_cache.h |     renderer_opengl/gl_buffer_cache.h | ||||||
|     renderer_opengl/gl_device.cpp |     renderer_opengl/gl_device.cpp | ||||||
|  | @ -85,10 +80,6 @@ add_library(video_core STATIC | ||||||
|     renderer_opengl/gl_resource_manager.h |     renderer_opengl/gl_resource_manager.h | ||||||
|     renderer_opengl/gl_shader_cache.cpp |     renderer_opengl/gl_shader_cache.cpp | ||||||
|     renderer_opengl/gl_shader_cache.h |     renderer_opengl/gl_shader_cache.h | ||||||
|     renderer_opengl/gl_shader_decompiler.cpp |  | ||||||
|     renderer_opengl/gl_shader_decompiler.h |  | ||||||
|     renderer_opengl/gl_shader_disk_cache.cpp |  | ||||||
|     renderer_opengl/gl_shader_disk_cache.h |  | ||||||
|     renderer_opengl/gl_shader_manager.cpp |     renderer_opengl/gl_shader_manager.cpp | ||||||
|     renderer_opengl/gl_shader_manager.h |     renderer_opengl/gl_shader_manager.h | ||||||
|     renderer_opengl/gl_shader_util.cpp |     renderer_opengl/gl_shader_util.cpp | ||||||
|  | @ -128,8 +119,6 @@ add_library(video_core STATIC | ||||||
|     renderer_vulkan/vk_descriptor_pool.h |     renderer_vulkan/vk_descriptor_pool.h | ||||||
|     renderer_vulkan/vk_fence_manager.cpp |     renderer_vulkan/vk_fence_manager.cpp | ||||||
|     renderer_vulkan/vk_fence_manager.h |     renderer_vulkan/vk_fence_manager.h | ||||||
|     renderer_vulkan/vk_graphics_pipeline.cpp |  | ||||||
|     renderer_vulkan/vk_graphics_pipeline.h |  | ||||||
|     renderer_vulkan/vk_master_semaphore.cpp |     renderer_vulkan/vk_master_semaphore.cpp | ||||||
|     renderer_vulkan/vk_master_semaphore.h |     renderer_vulkan/vk_master_semaphore.h | ||||||
|     renderer_vulkan/vk_pipeline_cache.cpp |     renderer_vulkan/vk_pipeline_cache.cpp | ||||||
|  | @ -142,8 +131,6 @@ add_library(video_core STATIC | ||||||
|     renderer_vulkan/vk_resource_pool.h |     renderer_vulkan/vk_resource_pool.h | ||||||
|     renderer_vulkan/vk_scheduler.cpp |     renderer_vulkan/vk_scheduler.cpp | ||||||
|     renderer_vulkan/vk_scheduler.h |     renderer_vulkan/vk_scheduler.h | ||||||
|     renderer_vulkan/vk_shader_decompiler.cpp |  | ||||||
|     renderer_vulkan/vk_shader_decompiler.h |  | ||||||
|     renderer_vulkan/vk_shader_util.cpp |     renderer_vulkan/vk_shader_util.cpp | ||||||
|     renderer_vulkan/vk_shader_util.h |     renderer_vulkan/vk_shader_util.h | ||||||
|     renderer_vulkan/vk_staging_buffer_pool.cpp |     renderer_vulkan/vk_staging_buffer_pool.cpp | ||||||
|  | @ -159,57 +146,6 @@ add_library(video_core STATIC | ||||||
|     shader_cache.h |     shader_cache.h | ||||||
|     shader_notify.cpp |     shader_notify.cpp | ||||||
|     shader_notify.h |     shader_notify.h | ||||||
|     shader/decode/arithmetic.cpp |  | ||||||
|     shader/decode/arithmetic_immediate.cpp |  | ||||||
|     shader/decode/bfe.cpp |  | ||||||
|     shader/decode/bfi.cpp |  | ||||||
|     shader/decode/shift.cpp |  | ||||||
|     shader/decode/arithmetic_integer.cpp |  | ||||||
|     shader/decode/arithmetic_integer_immediate.cpp |  | ||||||
|     shader/decode/arithmetic_half.cpp |  | ||||||
|     shader/decode/arithmetic_half_immediate.cpp |  | ||||||
|     shader/decode/ffma.cpp |  | ||||||
|     shader/decode/hfma2.cpp |  | ||||||
|     shader/decode/conversion.cpp |  | ||||||
|     shader/decode/memory.cpp |  | ||||||
|     shader/decode/texture.cpp |  | ||||||
|     shader/decode/image.cpp |  | ||||||
|     shader/decode/float_set_predicate.cpp |  | ||||||
|     shader/decode/integer_set_predicate.cpp |  | ||||||
|     shader/decode/half_set_predicate.cpp |  | ||||||
|     shader/decode/predicate_set_register.cpp |  | ||||||
|     shader/decode/predicate_set_predicate.cpp |  | ||||||
|     shader/decode/register_set_predicate.cpp |  | ||||||
|     shader/decode/float_set.cpp |  | ||||||
|     shader/decode/integer_set.cpp |  | ||||||
|     shader/decode/half_set.cpp |  | ||||||
|     shader/decode/video.cpp |  | ||||||
|     shader/decode/warp.cpp |  | ||||||
|     shader/decode/xmad.cpp |  | ||||||
|     shader/decode/other.cpp |  | ||||||
|     shader/ast.cpp |  | ||||||
|     shader/ast.h |  | ||||||
|     shader/async_shaders.cpp |  | ||||||
|     shader/async_shaders.h |  | ||||||
|     shader/compiler_settings.cpp |  | ||||||
|     shader/compiler_settings.h |  | ||||||
|     shader/control_flow.cpp |  | ||||||
|     shader/control_flow.h |  | ||||||
|     shader/decode.cpp |  | ||||||
|     shader/expr.cpp |  | ||||||
|     shader/expr.h |  | ||||||
|     shader/memory_util.cpp |  | ||||||
|     shader/memory_util.h |  | ||||||
|     shader/node_helper.cpp |  | ||||||
|     shader/node_helper.h |  | ||||||
|     shader/node.h |  | ||||||
|     shader/registry.cpp |  | ||||||
|     shader/registry.h |  | ||||||
|     shader/shader_ir.cpp |  | ||||||
|     shader/shader_ir.h |  | ||||||
|     shader/track.cpp |  | ||||||
|     shader/transform_feedback.cpp |  | ||||||
|     shader/transform_feedback.h |  | ||||||
|     surface.cpp |     surface.cpp | ||||||
|     surface.h |     surface.h | ||||||
|     texture_cache/accelerated_swizzle.cpp |     texture_cache/accelerated_swizzle.cpp | ||||||
|  |  | ||||||
|  | @ -1,103 +0,0 @@ | ||||||
| // Copyright 2019 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #pragma once |  | ||||||
| 
 |  | ||||||
| #include <type_traits> |  | ||||||
| #include "common/bit_field.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/engines/shader_type.h" |  | ||||||
| #include "video_core/guest_driver.h" |  | ||||||
| #include "video_core/textures/texture.h" |  | ||||||
| 
 |  | ||||||
| namespace Tegra::Engines { |  | ||||||
| 
 |  | ||||||
| struct SamplerDescriptor { |  | ||||||
|     union { |  | ||||||
|         u32 raw = 0; |  | ||||||
|         BitField<0, 2, Tegra::Shader::TextureType> texture_type; |  | ||||||
|         BitField<2, 3, Tegra::Texture::ComponentType> r_type; |  | ||||||
|         BitField<5, 1, u32> is_array; |  | ||||||
|         BitField<6, 1, u32> is_buffer; |  | ||||||
|         BitField<7, 1, u32> is_shadow; |  | ||||||
|         BitField<8, 3, Tegra::Texture::ComponentType> g_type; |  | ||||||
|         BitField<11, 3, Tegra::Texture::ComponentType> b_type; |  | ||||||
|         BitField<14, 3, Tegra::Texture::ComponentType> a_type; |  | ||||||
|         BitField<17, 7, Tegra::Texture::TextureFormat> format; |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     bool operator==(const SamplerDescriptor& rhs) const noexcept { |  | ||||||
|         return raw == rhs.raw; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool operator!=(const SamplerDescriptor& rhs) const noexcept { |  | ||||||
|         return !operator==(rhs); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     static SamplerDescriptor FromTIC(const Tegra::Texture::TICEntry& tic) { |  | ||||||
|         using Tegra::Shader::TextureType; |  | ||||||
|         SamplerDescriptor result; |  | ||||||
| 
 |  | ||||||
|         result.format.Assign(tic.format.Value()); |  | ||||||
|         result.r_type.Assign(tic.r_type.Value()); |  | ||||||
|         result.g_type.Assign(tic.g_type.Value()); |  | ||||||
|         result.b_type.Assign(tic.b_type.Value()); |  | ||||||
|         result.a_type.Assign(tic.a_type.Value()); |  | ||||||
| 
 |  | ||||||
|         switch (tic.texture_type.Value()) { |  | ||||||
|         case Tegra::Texture::TextureType::Texture1D: |  | ||||||
|             result.texture_type.Assign(TextureType::Texture1D); |  | ||||||
|             return result; |  | ||||||
|         case Tegra::Texture::TextureType::Texture2D: |  | ||||||
|             result.texture_type.Assign(TextureType::Texture2D); |  | ||||||
|             return result; |  | ||||||
|         case Tegra::Texture::TextureType::Texture3D: |  | ||||||
|             result.texture_type.Assign(TextureType::Texture3D); |  | ||||||
|             return result; |  | ||||||
|         case Tegra::Texture::TextureType::TextureCubemap: |  | ||||||
|             result.texture_type.Assign(TextureType::TextureCube); |  | ||||||
|             return result; |  | ||||||
|         case Tegra::Texture::TextureType::Texture1DArray: |  | ||||||
|             result.texture_type.Assign(TextureType::Texture1D); |  | ||||||
|             result.is_array.Assign(1); |  | ||||||
|             return result; |  | ||||||
|         case Tegra::Texture::TextureType::Texture2DArray: |  | ||||||
|             result.texture_type.Assign(TextureType::Texture2D); |  | ||||||
|             result.is_array.Assign(1); |  | ||||||
|             return result; |  | ||||||
|         case Tegra::Texture::TextureType::Texture1DBuffer: |  | ||||||
|             result.texture_type.Assign(TextureType::Texture1D); |  | ||||||
|             result.is_buffer.Assign(1); |  | ||||||
|             return result; |  | ||||||
|         case Tegra::Texture::TextureType::Texture2DNoMipmap: |  | ||||||
|             result.texture_type.Assign(TextureType::Texture2D); |  | ||||||
|             return result; |  | ||||||
|         case Tegra::Texture::TextureType::TextureCubeArray: |  | ||||||
|             result.texture_type.Assign(TextureType::TextureCube); |  | ||||||
|             result.is_array.Assign(1); |  | ||||||
|             return result; |  | ||||||
|         default: |  | ||||||
|             result.texture_type.Assign(TextureType::Texture2D); |  | ||||||
|             return result; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| }; |  | ||||||
| static_assert(std::is_trivially_copyable_v<SamplerDescriptor>); |  | ||||||
| 
 |  | ||||||
| class ConstBufferEngineInterface { |  | ||||||
| public: |  | ||||||
|     virtual ~ConstBufferEngineInterface() = default; |  | ||||||
|     virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0; |  | ||||||
|     virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0; |  | ||||||
|     virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, |  | ||||||
|                                                     u64 offset) const = 0; |  | ||||||
|     virtual SamplerDescriptor AccessSampler(u32 handle) const = 0; |  | ||||||
|     virtual u32 GetBoundBuffer() const = 0; |  | ||||||
| 
 |  | ||||||
|     virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0; |  | ||||||
|     virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| } // namespace Tegra::Engines
 |  | ||||||
|  | @ -57,53 +57,11 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { |  | ||||||
|     ASSERT(stage == ShaderType::Compute); |  | ||||||
|     const auto& buffer = launch_description.const_buffer_config[const_buffer]; |  | ||||||
|     u32 result; |  | ||||||
|     std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32)); |  | ||||||
|     return result; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const { |  | ||||||
|     return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer, |  | ||||||
|                                                        u64 offset) const { |  | ||||||
|     ASSERT(stage == ShaderType::Compute); |  | ||||||
|     const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer]; |  | ||||||
|     const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset; |  | ||||||
|     return AccessSampler(memory_manager.Read<u32>(tex_info_address)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const { |  | ||||||
|     const Texture::TextureHandle tex_handle{handle}; |  | ||||||
|     const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); |  | ||||||
|     const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); |  | ||||||
| 
 |  | ||||||
|     SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); |  | ||||||
|     result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); |  | ||||||
|     return result; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() { |  | ||||||
|     return rasterizer->AccessGuestDriverProfile(); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const { |  | ||||||
|     return rasterizer->AccessGuestDriverProfile(); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void KeplerCompute::ProcessLaunch() { | void KeplerCompute::ProcessLaunch() { | ||||||
|     const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); |     const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); | ||||||
|     memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, |     memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, | ||||||
|                                    LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); |                                    LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); | ||||||
| 
 |     rasterizer->DispatchCompute(); | ||||||
|     const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start; |  | ||||||
|     LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr); |  | ||||||
| 
 |  | ||||||
|     rasterizer->DispatchCompute(code_addr); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { | Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { | ||||||
|  |  | ||||||
|  | @ -10,7 +10,6 @@ | ||||||
| #include "common/bit_field.h" | #include "common/bit_field.h" | ||||||
| #include "common/common_funcs.h" | #include "common/common_funcs.h" | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "video_core/engines/const_buffer_engine_interface.h" |  | ||||||
| #include "video_core/engines/engine_interface.h" | #include "video_core/engines/engine_interface.h" | ||||||
| #include "video_core/engines/engine_upload.h" | #include "video_core/engines/engine_upload.h" | ||||||
| #include "video_core/engines/shader_type.h" | #include "video_core/engines/shader_type.h" | ||||||
|  | @ -40,7 +39,7 @@ namespace Tegra::Engines { | ||||||
| #define KEPLER_COMPUTE_REG_INDEX(field_name)                                                       \ | #define KEPLER_COMPUTE_REG_INDEX(field_name)                                                       \ | ||||||
|     (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) |     (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) | ||||||
| 
 | 
 | ||||||
| class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface { | class KeplerCompute final : public EngineInterface { | ||||||
| public: | public: | ||||||
|     explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager); |     explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager); | ||||||
|     ~KeplerCompute(); |     ~KeplerCompute(); | ||||||
|  | @ -209,23 +208,6 @@ public: | ||||||
|     void CallMultiMethod(u32 method, const u32* base_start, u32 amount, |     void CallMultiMethod(u32 method, const u32* base_start, u32 amount, | ||||||
|                          u32 methods_pending) override; |                          u32 methods_pending) override; | ||||||
| 
 | 
 | ||||||
|     u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; |  | ||||||
| 
 |  | ||||||
|     SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; |  | ||||||
| 
 |  | ||||||
|     SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, |  | ||||||
|                                             u64 offset) const override; |  | ||||||
| 
 |  | ||||||
|     SamplerDescriptor AccessSampler(u32 handle) const override; |  | ||||||
| 
 |  | ||||||
|     u32 GetBoundBuffer() const override { |  | ||||||
|         return regs.tex_cb_index; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; |  | ||||||
| 
 |  | ||||||
|     const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; |  | ||||||
| 
 |  | ||||||
| private: | private: | ||||||
|     void ProcessLaunch(); |     void ProcessLaunch(); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -670,42 +670,4 @@ void Maxwell3D::ProcessClearBuffers() { | ||||||
|     rasterizer->Clear(); |     rasterizer->Clear(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { |  | ||||||
|     ASSERT(stage != ShaderType::Compute); |  | ||||||
|     const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)]; |  | ||||||
|     const auto& buffer = shader_stage.const_buffers[const_buffer]; |  | ||||||
|     return memory_manager.Read<u32>(buffer.address + offset); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { |  | ||||||
|     return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer, |  | ||||||
|                                                    u64 offset) const { |  | ||||||
|     ASSERT(stage != ShaderType::Compute); |  | ||||||
|     const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; |  | ||||||
|     const auto& tex_info_buffer = shader.const_buffers[const_buffer]; |  | ||||||
|     const GPUVAddr tex_info_address = tex_info_buffer.address + offset; |  | ||||||
|     return AccessSampler(memory_manager.Read<u32>(tex_info_address)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const { |  | ||||||
|     const Texture::TextureHandle tex_handle{handle}; |  | ||||||
|     const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); |  | ||||||
|     const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); |  | ||||||
| 
 |  | ||||||
|     SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); |  | ||||||
|     result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); |  | ||||||
|     return result; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() { |  | ||||||
|     return rasterizer->AccessGuestDriverProfile(); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const { |  | ||||||
|     return rasterizer->AccessGuestDriverProfile(); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace Tegra::Engines
 | } // namespace Tegra::Engines
 | ||||||
|  |  | ||||||
|  | @ -17,7 +17,6 @@ | ||||||
| #include "common/common_funcs.h" | #include "common/common_funcs.h" | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "common/math_util.h" | #include "common/math_util.h" | ||||||
| #include "video_core/engines/const_buffer_engine_interface.h" |  | ||||||
| #include "video_core/engines/const_buffer_info.h" | #include "video_core/engines/const_buffer_info.h" | ||||||
| #include "video_core/engines/engine_interface.h" | #include "video_core/engines/engine_interface.h" | ||||||
| #include "video_core/engines/engine_upload.h" | #include "video_core/engines/engine_upload.h" | ||||||
|  | @ -49,7 +48,7 @@ namespace Tegra::Engines { | ||||||
| #define MAXWELL3D_REG_INDEX(field_name)                                                            \ | #define MAXWELL3D_REG_INDEX(field_name)                                                            \ | ||||||
|     (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) |     (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) | ||||||
| 
 | 
 | ||||||
| class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface { | class Maxwell3D final : public EngineInterface { | ||||||
| public: | public: | ||||||
|     explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager); |     explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager); | ||||||
|     ~Maxwell3D(); |     ~Maxwell3D(); | ||||||
|  | @ -1424,23 +1423,6 @@ public: | ||||||
| 
 | 
 | ||||||
|     void FlushMMEInlineDraw(); |     void FlushMMEInlineDraw(); | ||||||
| 
 | 
 | ||||||
|     u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; |  | ||||||
| 
 |  | ||||||
|     SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; |  | ||||||
| 
 |  | ||||||
|     SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, |  | ||||||
|                                             u64 offset) const override; |  | ||||||
| 
 |  | ||||||
|     SamplerDescriptor AccessSampler(u32 handle) const override; |  | ||||||
| 
 |  | ||||||
|     u32 GetBoundBuffer() const override { |  | ||||||
|         return regs.tex_cb_index; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; |  | ||||||
| 
 |  | ||||||
|     const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; |  | ||||||
| 
 |  | ||||||
|     bool ShouldExecute() const { |     bool ShouldExecute() const { | ||||||
|         return execute_on; |         return execute_on; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  | @ -1,37 +0,0 @@ | ||||||
| // Copyright 2020 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include <algorithm> |  | ||||||
| #include <limits> |  | ||||||
| #include <vector> |  | ||||||
| 
 |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/guest_driver.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCore { |  | ||||||
| 
 |  | ||||||
| void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32> bound_offsets) { |  | ||||||
|     if (texture_handler_size) { |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|     const std::size_t size = bound_offsets.size(); |  | ||||||
|     if (size < 2) { |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|     std::sort(bound_offsets.begin(), bound_offsets.end(), std::less{}); |  | ||||||
|     u32 min_val = std::numeric_limits<u32>::max(); |  | ||||||
|     for (std::size_t i = 1; i < size; ++i) { |  | ||||||
|         if (bound_offsets[i] == bound_offsets[i - 1]) { |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
|         const u32 new_min = bound_offsets[i] - bound_offsets[i - 1]; |  | ||||||
|         min_val = std::min(min_val, new_min); |  | ||||||
|     } |  | ||||||
|     if (min_val > 2) { |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|     texture_handler_size = min_texture_handler_size * min_val; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCore
 |  | ||||||
|  | @ -1,46 +0,0 @@ | ||||||
| // Copyright 2020 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #pragma once |  | ||||||
| 
 |  | ||||||
| #include <optional> |  | ||||||
| #include <vector> |  | ||||||
| 
 |  | ||||||
| #include "common/common_types.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCore { |  | ||||||
| 
 |  | ||||||
| /**
 |  | ||||||
|  * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect |  | ||||||
|  * information necessary for impossible to avoid HLE methods like shader tracks as they are |  | ||||||
|  * Entscheidungsproblems. |  | ||||||
|  */ |  | ||||||
| class GuestDriverProfile { |  | ||||||
| public: |  | ||||||
|     explicit GuestDriverProfile() = default; |  | ||||||
|     explicit GuestDriverProfile(std::optional<u32> texture_handler_size_) |  | ||||||
|         : texture_handler_size{texture_handler_size_} {} |  | ||||||
| 
 |  | ||||||
|     void DeduceTextureHandlerSize(std::vector<u32> bound_offsets); |  | ||||||
| 
 |  | ||||||
|     u32 GetTextureHandlerSize() const { |  | ||||||
|         return texture_handler_size.value_or(default_texture_handler_size); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool IsTextureHandlerSizeKnown() const { |  | ||||||
|         return texture_handler_size.has_value(); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     // Minimum size of texture handler any driver can use.
 |  | ||||||
|     static constexpr u32 min_texture_handler_size = 4; |  | ||||||
| 
 |  | ||||||
|     // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily use 4 bytes instead.
 |  | ||||||
|     // Thus, certain drivers may squish the size.
 |  | ||||||
|     static constexpr u32 default_texture_handler_size = 8; |  | ||||||
| 
 |  | ||||||
|     std::optional<u32> texture_handler_size = default_texture_handler_size; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCore
 |  | ||||||
|  | @ -11,7 +11,6 @@ | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "video_core/engines/fermi_2d.h" | #include "video_core/engines/fermi_2d.h" | ||||||
| #include "video_core/gpu.h" | #include "video_core/gpu.h" | ||||||
| #include "video_core/guest_driver.h" |  | ||||||
| 
 | 
 | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
| class MemoryManager; | class MemoryManager; | ||||||
|  | @ -45,7 +44,7 @@ public: | ||||||
|     virtual void Clear() = 0; |     virtual void Clear() = 0; | ||||||
| 
 | 
 | ||||||
|     /// Dispatches a compute shader invocation
 |     /// Dispatches a compute shader invocation
 | ||||||
|     virtual void DispatchCompute(GPUVAddr code_addr) = 0; |     virtual void DispatchCompute() = 0; | ||||||
| 
 | 
 | ||||||
|     /// Resets the counter of a query
 |     /// Resets the counter of a query
 | ||||||
|     virtual void ResetCounter(QueryType type) = 0; |     virtual void ResetCounter(QueryType type) = 0; | ||||||
|  | @ -136,18 +135,5 @@ public: | ||||||
|     /// Initialize disk cached resources for the game being emulated
 |     /// Initialize disk cached resources for the game being emulated
 | ||||||
|     virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, |     virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | ||||||
|                                    const DiskResourceLoadCallback& callback) {} |                                    const DiskResourceLoadCallback& callback) {} | ||||||
| 
 |  | ||||||
|     /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
 |  | ||||||
|     [[nodiscard]] GuestDriverProfile& AccessGuestDriverProfile() { |  | ||||||
|         return guest_driver_profile; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
 |  | ||||||
|     [[nodiscard]] const GuestDriverProfile& AccessGuestDriverProfile() const { |  | ||||||
|         return guest_driver_profile; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     GuestDriverProfile guest_driver_profile{}; |  | ||||||
| }; | }; | ||||||
| } // namespace VideoCore
 | } // namespace VideoCore
 | ||||||
|  |  | ||||||
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							|  | @ -1,29 +0,0 @@ | ||||||
| // Copyright 2020 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #pragma once |  | ||||||
| 
 |  | ||||||
| #include <string> |  | ||||||
| #include <string_view> |  | ||||||
| 
 |  | ||||||
| #include "common/common_types.h" |  | ||||||
| 
 |  | ||||||
| namespace Tegra::Engines { |  | ||||||
| enum class ShaderType : u32; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| class ShaderIR; |  | ||||||
| class Registry; |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
| 
 |  | ||||||
| namespace OpenGL { |  | ||||||
| 
 |  | ||||||
| class Device; |  | ||||||
| 
 |  | ||||||
| std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, |  | ||||||
|                                     const VideoCommon::Shader::Registry& registry, |  | ||||||
|                                     Tegra::Engines::ShaderType stage, std::string_view identifier); |  | ||||||
| 
 |  | ||||||
| } // namespace OpenGL
 |  | ||||||
|  | @ -54,40 +54,6 @@ namespace { | ||||||
| 
 | 
 | ||||||
| constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; | constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; | ||||||
| 
 | 
 | ||||||
| struct TextureHandle { |  | ||||||
|     constexpr TextureHandle(u32 data, bool via_header_index) { |  | ||||||
|         const Tegra::Texture::TextureHandle handle{data}; |  | ||||||
|         image = handle.tic_id; |  | ||||||
|         sampler = via_header_index ? image : handle.tsc_id.Value(); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     u32 image; |  | ||||||
|     u32 sampler; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| template <typename Engine, typename Entry> |  | ||||||
| TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, |  | ||||||
|                              ShaderType shader_type, size_t index = 0) { |  | ||||||
|     if constexpr (std::is_same_v<Entry, SamplerEntry>) { |  | ||||||
|         if (entry.is_separated) { |  | ||||||
|             const u32 buffer_1 = entry.buffer; |  | ||||||
|             const u32 buffer_2 = entry.secondary_buffer; |  | ||||||
|             const u32 offset_1 = entry.offset; |  | ||||||
|             const u32 offset_2 = entry.secondary_offset; |  | ||||||
|             const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); |  | ||||||
|             const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); |  | ||||||
|             return TextureHandle(handle_1 | handle_2, via_header_index); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     if (entry.is_bindless) { |  | ||||||
|         const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); |  | ||||||
|         return TextureHandle(raw, via_header_index); |  | ||||||
|     } |  | ||||||
|     const u32 buffer = engine.GetBoundBuffer(); |  | ||||||
|     const u64 offset = (entry.offset + index) * sizeof(u32); |  | ||||||
|     return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /// Translates hardware transform feedback indices
 | /// Translates hardware transform feedback indices
 | ||||||
| /// @param location Hardware location
 | /// @param location Hardware location
 | ||||||
| /// @return Pair of ARB_transform_feedback3 token stream first and third arguments
 | /// @return Pair of ARB_transform_feedback3 token stream first and third arguments
 | ||||||
|  | @ -119,44 +85,6 @@ std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) { | ||||||
| void oglEnable(GLenum cap, bool state) { | void oglEnable(GLenum cap, bool state) { | ||||||
|     (state ? glEnable : glDisable)(cap); |     (state ? glEnable : glDisable)(cap); | ||||||
| } | } | ||||||
| 
 |  | ||||||
| ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { |  | ||||||
|     if (entry.is_buffer) { |  | ||||||
|         return ImageViewType::Buffer; |  | ||||||
|     } |  | ||||||
|     switch (entry.type) { |  | ||||||
|     case Tegra::Shader::TextureType::Texture1D: |  | ||||||
|         return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; |  | ||||||
|     case Tegra::Shader::TextureType::Texture2D: |  | ||||||
|         return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; |  | ||||||
|     case Tegra::Shader::TextureType::Texture3D: |  | ||||||
|         return ImageViewType::e3D; |  | ||||||
|     case Tegra::Shader::TextureType::TextureCube: |  | ||||||
|         return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; |  | ||||||
|     } |  | ||||||
|     UNREACHABLE(); |  | ||||||
|     return ImageViewType::e2D; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { |  | ||||||
|     switch (entry.type) { |  | ||||||
|     case Tegra::Shader::ImageType::Texture1D: |  | ||||||
|         return ImageViewType::e1D; |  | ||||||
|     case Tegra::Shader::ImageType::Texture1DArray: |  | ||||||
|         return ImageViewType::e1DArray; |  | ||||||
|     case Tegra::Shader::ImageType::Texture2D: |  | ||||||
|         return ImageViewType::e2D; |  | ||||||
|     case Tegra::Shader::ImageType::Texture2DArray: |  | ||||||
|         return ImageViewType::e2DArray; |  | ||||||
|     case Tegra::Shader::ImageType::Texture3D: |  | ||||||
|         return ImageViewType::e3D; |  | ||||||
|     case Tegra::Shader::ImageType::TextureBuffer: |  | ||||||
|         return ImageViewType::Buffer; |  | ||||||
|     } |  | ||||||
|     UNREACHABLE(); |  | ||||||
|     return ImageViewType::e2D; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // Anonymous namespace
 | } // Anonymous namespace
 | ||||||
| 
 | 
 | ||||||
| RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | ||||||
|  | @ -172,12 +100,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra | ||||||
|       buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), |       buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), | ||||||
|       shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), |       shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), | ||||||
|       query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), |       query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), | ||||||
|       fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), |       fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} | ||||||
|       async_shaders(emu_window_) { |  | ||||||
|     if (device.UseAsynchronousShaders()) { |  | ||||||
|         async_shaders.AllocateWorkers(); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 | 
 | ||||||
| RasterizerOpenGL::~RasterizerOpenGL() = default; | RasterizerOpenGL::~RasterizerOpenGL() = default; | ||||||
| 
 | 
 | ||||||
|  | @ -244,117 +167,8 @@ void RasterizerOpenGL::SyncVertexInstances() { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerOpenGL::SetupShaders(bool is_indexed) { |  | ||||||
|     u32 clip_distances = 0; |  | ||||||
| 
 |  | ||||||
|     std::array<Shader*, Maxwell::MaxShaderStage> shaders{}; |  | ||||||
|     image_view_indices.clear(); |  | ||||||
|     sampler_handles.clear(); |  | ||||||
| 
 |  | ||||||
|     texture_cache.SynchronizeGraphicsDescriptors(); |  | ||||||
| 
 |  | ||||||
|     for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |  | ||||||
|         const auto& shader_config = maxwell3d.regs.shader_config[index]; |  | ||||||
|         const auto program{static_cast<Maxwell::ShaderProgram>(index)}; |  | ||||||
| 
 |  | ||||||
|         // Skip stages that are not enabled
 |  | ||||||
|         if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { |  | ||||||
|             switch (program) { |  | ||||||
|             case Maxwell::ShaderProgram::Geometry: |  | ||||||
|                 program_manager.UseGeometryShader(0); |  | ||||||
|                 break; |  | ||||||
|             case Maxwell::ShaderProgram::Fragment: |  | ||||||
|                 program_manager.UseFragmentShader(0); |  | ||||||
|                 break; |  | ||||||
|             default: |  | ||||||
|                 break; |  | ||||||
|             } |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
|         // Currently this stages are not supported in the OpenGL backend.
 |  | ||||||
|         // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
 |  | ||||||
|         if (program == Maxwell::ShaderProgram::TesselationControl || |  | ||||||
|             program == Maxwell::ShaderProgram::TesselationEval) { |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         Shader* const shader = shader_cache.GetStageProgram(program, async_shaders); |  | ||||||
|         const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; |  | ||||||
|         switch (program) { |  | ||||||
|         case Maxwell::ShaderProgram::VertexA: |  | ||||||
|         case Maxwell::ShaderProgram::VertexB: |  | ||||||
|             program_manager.UseVertexShader(program_handle); |  | ||||||
|             break; |  | ||||||
|         case Maxwell::ShaderProgram::Geometry: |  | ||||||
|             program_manager.UseGeometryShader(program_handle); |  | ||||||
|             break; |  | ||||||
|         case Maxwell::ShaderProgram::Fragment: |  | ||||||
|             program_manager.UseFragmentShader(program_handle); |  | ||||||
|             break; |  | ||||||
|         default: |  | ||||||
|             UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, |  | ||||||
|                               shader_config.enable.Value(), shader_config.offset); |  | ||||||
|             break; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         // Stage indices are 0 - 5
 |  | ||||||
|         const size_t stage = index == 0 ? 0 : index - 1; |  | ||||||
|         shaders[stage] = shader; |  | ||||||
| 
 |  | ||||||
|         SetupDrawTextures(shader, stage); |  | ||||||
|         SetupDrawImages(shader, stage); |  | ||||||
| 
 |  | ||||||
|         buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers); |  | ||||||
| 
 |  | ||||||
|         buffer_cache.UnbindGraphicsStorageBuffers(stage); |  | ||||||
|         u32 ssbo_index = 0; |  | ||||||
|         for (const auto& buffer : shader->GetEntries().global_memory_entries) { |  | ||||||
|             buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, |  | ||||||
|                                                    buffer.cbuf_offset, buffer.is_written); |  | ||||||
|             ++ssbo_index; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         // Workaround for Intel drivers.
 |  | ||||||
|         // When a clip distance is enabled but not set in the shader it crops parts of the screen
 |  | ||||||
|         // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
 |  | ||||||
|         // clip distances only when it's written by a shader stage.
 |  | ||||||
|         clip_distances |= shader->GetEntries().clip_distances; |  | ||||||
| 
 |  | ||||||
|         // When VertexA is enabled, we have dual vertex shaders
 |  | ||||||
|         if (program == Maxwell::ShaderProgram::VertexA) { |  | ||||||
|             // VertexB was combined with VertexA, so we skip the VertexB iteration
 |  | ||||||
|             ++index; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     SyncClipEnabled(clip_distances); |  | ||||||
|     maxwell3d.dirty.flags[Dirty::Shaders] = false; |  | ||||||
| 
 |  | ||||||
|     buffer_cache.UpdateGraphicsBuffers(is_indexed); |  | ||||||
| 
 |  | ||||||
|     const std::span indices_span(image_view_indices.data(), image_view_indices.size()); |  | ||||||
|     texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); |  | ||||||
| 
 |  | ||||||
|     buffer_cache.BindHostGeometryBuffers(is_indexed); |  | ||||||
| 
 |  | ||||||
|     size_t image_view_index = 0; |  | ||||||
|     size_t texture_index = 0; |  | ||||||
|     size_t image_index = 0; |  | ||||||
|     for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { |  | ||||||
|         const Shader* const shader = shaders[stage]; |  | ||||||
|         if (!shader) { |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
|         buffer_cache.BindHostStageBuffers(stage); |  | ||||||
|         const auto& base = device.GetBaseBindings(stage); |  | ||||||
|         BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index, |  | ||||||
|                      texture_index, image_index); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading, | void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading, | ||||||
|                                          const VideoCore::DiskResourceLoadCallback& callback) { |                                          const VideoCore::DiskResourceLoadCallback& callback) {} | ||||||
|     shader_cache.LoadDiskCache(title_id, stop_loading, callback); |  | ||||||
| } |  | ||||||
| 
 | 
 | ||||||
| void RasterizerOpenGL::Clear() { | void RasterizerOpenGL::Clear() { | ||||||
|     MICROPROFILE_SCOPE(OpenGL_Clears); |     MICROPROFILE_SCOPE(OpenGL_Clears); | ||||||
|  | @ -434,7 +248,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | ||||||
| 
 | 
 | ||||||
|     // Setup shaders and their used resources.
 |     // Setup shaders and their used resources.
 | ||||||
|     std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; |     std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||||||
|     SetupShaders(is_indexed); |  | ||||||
| 
 | 
 | ||||||
|     texture_cache.UpdateRenderTargets(false); |     texture_cache.UpdateRenderTargets(false); | ||||||
|     state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); |     state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); | ||||||
|  | @ -488,27 +301,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | ||||||
|     gpu.TickWork(); |     gpu.TickWork(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | void RasterizerOpenGL::DispatchCompute() { | ||||||
|     Shader* const kernel = shader_cache.GetComputeKernel(code_addr); |     UNREACHABLE_MSG("Not implemented"); | ||||||
| 
 |  | ||||||
|     std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; |  | ||||||
|     BindComputeTextures(kernel); |  | ||||||
| 
 |  | ||||||
|     const auto& entries = kernel->GetEntries(); |  | ||||||
|     buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); |  | ||||||
|     buffer_cache.UnbindComputeStorageBuffers(); |  | ||||||
|     u32 ssbo_index = 0; |  | ||||||
|     for (const auto& buffer : entries.global_memory_entries) { |  | ||||||
|         buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, |  | ||||||
|                                               buffer.is_written); |  | ||||||
|         ++ssbo_index; |  | ||||||
|     } |  | ||||||
|     buffer_cache.UpdateComputeBuffers(); |  | ||||||
|     buffer_cache.BindHostComputeBuffers(); |  | ||||||
| 
 |  | ||||||
|     const auto& launch_desc = kepler_compute.launch_description; |  | ||||||
|     glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); |  | ||||||
|     ++num_queued_commands; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { | void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { | ||||||
|  | @ -726,106 +520,6 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | ||||||
|     return true; |     return true; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerOpenGL::BindComputeTextures(Shader* kernel) { |  | ||||||
|     image_view_indices.clear(); |  | ||||||
|     sampler_handles.clear(); |  | ||||||
| 
 |  | ||||||
|     texture_cache.SynchronizeComputeDescriptors(); |  | ||||||
| 
 |  | ||||||
|     SetupComputeTextures(kernel); |  | ||||||
|     SetupComputeImages(kernel); |  | ||||||
| 
 |  | ||||||
|     const std::span indices_span(image_view_indices.data(), image_view_indices.size()); |  | ||||||
|     texture_cache.FillComputeImageViews(indices_span, image_view_ids); |  | ||||||
| 
 |  | ||||||
|     program_manager.BindCompute(kernel->GetHandle()); |  | ||||||
|     size_t image_view_index = 0; |  | ||||||
|     size_t texture_index = 0; |  | ||||||
|     size_t image_index = 0; |  | ||||||
|     BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture, |  | ||||||
|                                     GLuint base_image, size_t& image_view_index, |  | ||||||
|                                     size_t& texture_index, size_t& image_index) { |  | ||||||
|     const GLuint* const samplers = sampler_handles.data() + texture_index; |  | ||||||
|     const GLuint* const textures = texture_handles.data() + texture_index; |  | ||||||
|     const GLuint* const images = image_handles.data() + image_index; |  | ||||||
| 
 |  | ||||||
|     const size_t num_samplers = entries.samplers.size(); |  | ||||||
|     for (const auto& sampler : entries.samplers) { |  | ||||||
|         for (size_t i = 0; i < sampler.size; ++i) { |  | ||||||
|             const ImageViewId image_view_id = image_view_ids[image_view_index++]; |  | ||||||
|             const ImageView& image_view = texture_cache.GetImageView(image_view_id); |  | ||||||
|             const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler)); |  | ||||||
|             texture_handles[texture_index++] = handle; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     const size_t num_images = entries.images.size(); |  | ||||||
|     for (size_t unit = 0; unit < num_images; ++unit) { |  | ||||||
|         // TODO: Mark as modified
 |  | ||||||
|         const ImageViewId image_view_id = image_view_ids[image_view_index++]; |  | ||||||
|         const ImageView& image_view = texture_cache.GetImageView(image_view_id); |  | ||||||
|         const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit])); |  | ||||||
|         image_handles[image_index] = handle; |  | ||||||
|         ++image_index; |  | ||||||
|     } |  | ||||||
|     if (num_samplers > 0) { |  | ||||||
|         glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers); |  | ||||||
|         glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures); |  | ||||||
|     } |  | ||||||
|     if (num_images > 0) { |  | ||||||
|         glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) { |  | ||||||
|     const bool via_header_index = |  | ||||||
|         maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; |  | ||||||
|     for (const auto& entry : shader->GetEntries().samplers) { |  | ||||||
|         const auto shader_type = static_cast<ShaderType>(stage_index); |  | ||||||
|         for (size_t index = 0; index < entry.size; ++index) { |  | ||||||
|             const auto handle = |  | ||||||
|                 GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index); |  | ||||||
|             const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); |  | ||||||
|             sampler_handles.push_back(sampler->Handle()); |  | ||||||
|             image_view_indices.push_back(handle.image); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) { |  | ||||||
|     const bool via_header_index = kepler_compute.launch_description.linked_tsc; |  | ||||||
|     for (const auto& entry : kernel->GetEntries().samplers) { |  | ||||||
|         for (size_t i = 0; i < entry.size; ++i) { |  | ||||||
|             const auto handle = |  | ||||||
|                 GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i); |  | ||||||
|             const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); |  | ||||||
|             sampler_handles.push_back(sampler->Handle()); |  | ||||||
|             image_view_indices.push_back(handle.image); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) { |  | ||||||
|     const bool via_header_index = |  | ||||||
|         maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; |  | ||||||
|     for (const auto& entry : shader->GetEntries().images) { |  | ||||||
|         const auto shader_type = static_cast<ShaderType>(stage_index); |  | ||||||
|         const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type); |  | ||||||
|         image_view_indices.push_back(handle.image); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void RasterizerOpenGL::SetupComputeImages(const Shader* shader) { |  | ||||||
|     const bool via_header_index = kepler_compute.launch_description.linked_tsc; |  | ||||||
|     for (const auto& entry : shader->GetEntries().images) { |  | ||||||
|         const auto handle = |  | ||||||
|             GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute); |  | ||||||
|         image_view_indices.push_back(handle.image); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void RasterizerOpenGL::SyncState() { | void RasterizerOpenGL::SyncState() { | ||||||
|     SyncViewport(); |     SyncViewport(); | ||||||
|     SyncRasterizeEnable(); |     SyncRasterizeEnable(); | ||||||
|  |  | ||||||
|  | @ -28,11 +28,9 @@ | ||||||
| #include "video_core/renderer_opengl/gl_query_cache.h" | #include "video_core/renderer_opengl/gl_query_cache.h" | ||||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||||
| #include "video_core/renderer_opengl/gl_shader_cache.h" | #include "video_core/renderer_opengl/gl_shader_cache.h" | ||||||
| #include "video_core/renderer_opengl/gl_shader_decompiler.h" |  | ||||||
| #include "video_core/renderer_opengl/gl_shader_manager.h" | #include "video_core/renderer_opengl/gl_shader_manager.h" | ||||||
| #include "video_core/renderer_opengl/gl_state_tracker.h" | #include "video_core/renderer_opengl/gl_state_tracker.h" | ||||||
| #include "video_core/renderer_opengl/gl_texture_cache.h" | #include "video_core/renderer_opengl/gl_texture_cache.h" | ||||||
| #include "video_core/shader/async_shaders.h" |  | ||||||
| #include "video_core/textures/texture.h" | #include "video_core/textures/texture.h" | ||||||
| 
 | 
 | ||||||
| namespace Core::Memory { | namespace Core::Memory { | ||||||
|  | @ -81,7 +79,7 @@ public: | ||||||
| 
 | 
 | ||||||
|     void Draw(bool is_indexed, bool is_instanced) override; |     void Draw(bool is_indexed, bool is_instanced) override; | ||||||
|     void Clear() override; |     void Clear() override; | ||||||
|     void DispatchCompute(GPUVAddr code_addr) override; |     void DispatchCompute() override; | ||||||
|     void ResetCounter(VideoCore::QueryType type) override; |     void ResetCounter(VideoCore::QueryType type) override; | ||||||
|     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; |     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | ||||||
|     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; |     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | ||||||
|  | @ -118,36 +116,11 @@ public: | ||||||
|         return num_queued_commands > 0; |         return num_queued_commands > 0; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { |  | ||||||
|         return async_shaders; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { |  | ||||||
|         return async_shaders; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: | private: | ||||||
|     static constexpr size_t MAX_TEXTURES = 192; |     static constexpr size_t MAX_TEXTURES = 192; | ||||||
|     static constexpr size_t MAX_IMAGES = 48; |     static constexpr size_t MAX_IMAGES = 48; | ||||||
|     static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; |     static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; | ||||||
| 
 | 
 | ||||||
|     void BindComputeTextures(Shader* kernel); |  | ||||||
| 
 |  | ||||||
|     void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image, |  | ||||||
|                       size_t& image_view_index, size_t& texture_index, size_t& image_index); |  | ||||||
| 
 |  | ||||||
|     /// Configures the current textures to use for the draw command.
 |  | ||||||
|     void SetupDrawTextures(const Shader* shader, size_t stage_index); |  | ||||||
| 
 |  | ||||||
|     /// Configures the textures used in a compute shader.
 |  | ||||||
|     void SetupComputeTextures(const Shader* kernel); |  | ||||||
| 
 |  | ||||||
|     /// Configures images in a graphics shader.
 |  | ||||||
|     void SetupDrawImages(const Shader* shader, size_t stage_index); |  | ||||||
| 
 |  | ||||||
|     /// Configures images in a compute shader.
 |  | ||||||
|     void SetupComputeImages(const Shader* shader); |  | ||||||
| 
 |  | ||||||
|     /// Syncs state to match guest's
 |     /// Syncs state to match guest's
 | ||||||
|     void SyncState(); |     void SyncState(); | ||||||
| 
 | 
 | ||||||
|  | @ -230,8 +203,6 @@ private: | ||||||
|     /// End a transform feedback
 |     /// End a transform feedback
 | ||||||
|     void EndTransformFeedback(); |     void EndTransformFeedback(); | ||||||
| 
 | 
 | ||||||
|     void SetupShaders(bool is_indexed); |  | ||||||
| 
 |  | ||||||
|     Tegra::GPU& gpu; |     Tegra::GPU& gpu; | ||||||
|     Tegra::Engines::Maxwell3D& maxwell3d; |     Tegra::Engines::Maxwell3D& maxwell3d; | ||||||
|     Tegra::Engines::KeplerCompute& kepler_compute; |     Tegra::Engines::KeplerCompute& kepler_compute; | ||||||
|  | @ -251,8 +222,6 @@ private: | ||||||
|     AccelerateDMA accelerate_dma; |     AccelerateDMA accelerate_dma; | ||||||
|     FenceManagerOpenGL fence_manager; |     FenceManagerOpenGL fence_manager; | ||||||
| 
 | 
 | ||||||
|     VideoCommon::Shader::AsyncShaders async_shaders; |  | ||||||
| 
 |  | ||||||
|     boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; |     boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; | ||||||
|     std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; |     std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; | ||||||
|     boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; |     boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; | ||||||
|  |  | ||||||
|  | @ -20,307 +20,19 @@ | ||||||
| #include "video_core/engines/maxwell_3d.h" | #include "video_core/engines/maxwell_3d.h" | ||||||
| #include "video_core/engines/shader_type.h" | #include "video_core/engines/shader_type.h" | ||||||
| #include "video_core/memory_manager.h" | #include "video_core/memory_manager.h" | ||||||
| #include "video_core/renderer_opengl/gl_arb_decompiler.h" |  | ||||||
| #include "video_core/renderer_opengl/gl_rasterizer.h" | #include "video_core/renderer_opengl/gl_rasterizer.h" | ||||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||||
| #include "video_core/renderer_opengl/gl_shader_cache.h" | #include "video_core/renderer_opengl/gl_shader_cache.h" | ||||||
| #include "video_core/renderer_opengl/gl_shader_decompiler.h" |  | ||||||
| #include "video_core/renderer_opengl/gl_shader_disk_cache.h" |  | ||||||
| #include "video_core/renderer_opengl/gl_state_tracker.h" | #include "video_core/renderer_opengl/gl_state_tracker.h" | ||||||
| #include "video_core/shader/memory_util.h" |  | ||||||
| #include "video_core/shader/registry.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| #include "video_core/shader_cache.h" | #include "video_core/shader_cache.h" | ||||||
| #include "video_core/shader_notify.h" | #include "video_core/shader_notify.h" | ||||||
| 
 | 
 | ||||||
| namespace OpenGL { | namespace OpenGL { | ||||||
| 
 | 
 | ||||||
| using Tegra::Engines::ShaderType; | Shader::Shader() = default; | ||||||
| using VideoCommon::Shader::GetShaderAddress; |  | ||||||
| using VideoCommon::Shader::GetShaderCode; |  | ||||||
| using VideoCommon::Shader::GetUniqueIdentifier; |  | ||||||
| using VideoCommon::Shader::KERNEL_MAIN_OFFSET; |  | ||||||
| using VideoCommon::Shader::ProgramCode; |  | ||||||
| using VideoCommon::Shader::Registry; |  | ||||||
| using VideoCommon::Shader::ShaderIR; |  | ||||||
| using VideoCommon::Shader::STAGE_MAIN_OFFSET; |  | ||||||
| 
 |  | ||||||
| namespace { |  | ||||||
| 
 |  | ||||||
| constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{}; |  | ||||||
| 
 |  | ||||||
| /// Gets the shader type from a Maxwell program type
 |  | ||||||
| constexpr GLenum GetGLShaderType(ShaderType shader_type) { |  | ||||||
|     switch (shader_type) { |  | ||||||
|     case ShaderType::Vertex: |  | ||||||
|         return GL_VERTEX_SHADER; |  | ||||||
|     case ShaderType::Geometry: |  | ||||||
|         return GL_GEOMETRY_SHADER; |  | ||||||
|     case ShaderType::Fragment: |  | ||||||
|         return GL_FRAGMENT_SHADER; |  | ||||||
|     case ShaderType::Compute: |  | ||||||
|         return GL_COMPUTE_SHADER; |  | ||||||
|     default: |  | ||||||
|         return GL_NONE; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| constexpr const char* GetShaderTypeName(ShaderType shader_type) { |  | ||||||
|     switch (shader_type) { |  | ||||||
|     case ShaderType::Vertex: |  | ||||||
|         return "VS"; |  | ||||||
|     case ShaderType::TesselationControl: |  | ||||||
|         return "HS"; |  | ||||||
|     case ShaderType::TesselationEval: |  | ||||||
|         return "DS"; |  | ||||||
|     case ShaderType::Geometry: |  | ||||||
|         return "GS"; |  | ||||||
|     case ShaderType::Fragment: |  | ||||||
|         return "FS"; |  | ||||||
|     case ShaderType::Compute: |  | ||||||
|         return "CS"; |  | ||||||
|     } |  | ||||||
|     return "UNK"; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) { |  | ||||||
|     switch (program_type) { |  | ||||||
|     case Maxwell::ShaderProgram::VertexA: |  | ||||||
|     case Maxwell::ShaderProgram::VertexB: |  | ||||||
|         return ShaderType::Vertex; |  | ||||||
|     case Maxwell::ShaderProgram::TesselationControl: |  | ||||||
|         return ShaderType::TesselationControl; |  | ||||||
|     case Maxwell::ShaderProgram::TesselationEval: |  | ||||||
|         return ShaderType::TesselationEval; |  | ||||||
|     case Maxwell::ShaderProgram::Geometry: |  | ||||||
|         return ShaderType::Geometry; |  | ||||||
|     case Maxwell::ShaderProgram::Fragment: |  | ||||||
|         return ShaderType::Fragment; |  | ||||||
|     } |  | ||||||
|     return {}; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| constexpr GLenum AssemblyEnum(ShaderType shader_type) { |  | ||||||
|     switch (shader_type) { |  | ||||||
|     case ShaderType::Vertex: |  | ||||||
|         return GL_VERTEX_PROGRAM_NV; |  | ||||||
|     case ShaderType::TesselationControl: |  | ||||||
|         return GL_TESS_CONTROL_PROGRAM_NV; |  | ||||||
|     case ShaderType::TesselationEval: |  | ||||||
|         return GL_TESS_EVALUATION_PROGRAM_NV; |  | ||||||
|     case ShaderType::Geometry: |  | ||||||
|         return GL_GEOMETRY_PROGRAM_NV; |  | ||||||
|     case ShaderType::Fragment: |  | ||||||
|         return GL_FRAGMENT_PROGRAM_NV; |  | ||||||
|     case ShaderType::Compute: |  | ||||||
|         return GL_COMPUTE_PROGRAM_NV; |  | ||||||
|     } |  | ||||||
|     return {}; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { |  | ||||||
|     return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { |  | ||||||
|     const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; |  | ||||||
|     const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer, |  | ||||||
|                                                            entry.graphics_info, entry.compute_info}; |  | ||||||
|     auto registry = std::make_shared<Registry>(entry.type, info); |  | ||||||
|     for (const auto& [address, value] : entry.keys) { |  | ||||||
|         const auto [buffer, offset] = address; |  | ||||||
|         registry->InsertKey(buffer, offset, value); |  | ||||||
|     } |  | ||||||
|     for (const auto& [offset, sampler] : entry.bound_samplers) { |  | ||||||
|         registry->InsertBoundSampler(offset, sampler); |  | ||||||
|     } |  | ||||||
|     for (const auto& [key, sampler] : entry.bindless_samplers) { |  | ||||||
|         const auto [buffer, offset] = key; |  | ||||||
|         registry->InsertBindlessSampler(buffer, offset, sampler); |  | ||||||
|     } |  | ||||||
|     return registry; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::unordered_set<GLenum> GetSupportedFormats() { |  | ||||||
|     GLint num_formats; |  | ||||||
|     glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); |  | ||||||
| 
 |  | ||||||
|     std::vector<GLint> formats(num_formats); |  | ||||||
|     glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); |  | ||||||
| 
 |  | ||||||
|     std::unordered_set<GLenum> supported_formats; |  | ||||||
|     for (const GLint format : formats) { |  | ||||||
|         supported_formats.insert(static_cast<GLenum>(format)); |  | ||||||
|     } |  | ||||||
|     return supported_formats; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // Anonymous namespace
 |  | ||||||
| 
 |  | ||||||
| ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier, |  | ||||||
|                              const ShaderIR& ir, const Registry& registry, bool hint_retrievable) { |  | ||||||
|     if (device.UseDriverCache()) { |  | ||||||
|         // Ignore hint retrievable if we are using the driver cache
 |  | ||||||
|         hint_retrievable = false; |  | ||||||
|     } |  | ||||||
|     const std::string shader_id = MakeShaderID(unique_identifier, shader_type); |  | ||||||
|     LOG_INFO(Render_OpenGL, "{}", shader_id); |  | ||||||
| 
 |  | ||||||
|     auto program = std::make_shared<ProgramHandle>(); |  | ||||||
| 
 |  | ||||||
|     if (device.UseAssemblyShaders()) { |  | ||||||
|         const std::string arb = |  | ||||||
|             DecompileAssemblyShader(device, ir, registry, shader_type, shader_id); |  | ||||||
| 
 |  | ||||||
|         GLuint& arb_prog = program->assembly_program.handle; |  | ||||||
| 
 |  | ||||||
| // Commented out functions signal OpenGL errors but are compatible with apitrace.
 |  | ||||||
| // Use them only to capture and replay on apitrace.
 |  | ||||||
| #if 0 |  | ||||||
|         glGenProgramsNV(1, &arb_prog); |  | ||||||
|         glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast<GLsizei>(arb.size()), |  | ||||||
|                         reinterpret_cast<const GLubyte*>(arb.data())); |  | ||||||
| #else |  | ||||||
|         glGenProgramsARB(1, &arb_prog); |  | ||||||
|         glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB, |  | ||||||
|                                 static_cast<GLsizei>(arb.size()), arb.data()); |  | ||||||
| #endif |  | ||||||
|         const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV)); |  | ||||||
|         if (err && *err) { |  | ||||||
|             LOG_CRITICAL(Render_OpenGL, "{}", err); |  | ||||||
|             LOG_INFO(Render_OpenGL, "\n{}", arb); |  | ||||||
|         } |  | ||||||
|     } else { |  | ||||||
|         const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id); |  | ||||||
|         OGLShader shader; |  | ||||||
|         shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); |  | ||||||
| 
 |  | ||||||
|         program->source_program.Create(true, hint_retrievable, shader.handle); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return program; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Shader::Shader(std::shared_ptr<Registry> registry_, ShaderEntries entries_, |  | ||||||
|                ProgramSharedPtr program_, bool is_built_) |  | ||||||
|     : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)}, |  | ||||||
|       is_built{is_built_} { |  | ||||||
|     handle = program->assembly_program.handle; |  | ||||||
|     if (handle == 0) { |  | ||||||
|         handle = program->source_program.handle; |  | ||||||
|     } |  | ||||||
|     if (is_built) { |  | ||||||
|         ASSERT(handle != 0); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 | 
 | ||||||
| Shader::~Shader() = default; | Shader::~Shader() = default; | ||||||
| 
 | 
 | ||||||
| GLuint Shader::GetHandle() const { |  | ||||||
|     DEBUG_ASSERT(registry->IsConsistent()); |  | ||||||
|     return handle; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool Shader::IsBuilt() const { |  | ||||||
|     return is_built; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void Shader::AsyncOpenGLBuilt(OGLProgram new_program) { |  | ||||||
|     program->source_program = std::move(new_program); |  | ||||||
|     handle = program->source_program.handle; |  | ||||||
|     is_built = true; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) { |  | ||||||
|     program->assembly_program = std::move(new_program); |  | ||||||
|     handle = program->assembly_program.handle; |  | ||||||
|     is_built = true; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::unique_ptr<Shader> Shader::CreateStageFromMemory( |  | ||||||
|     const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code, |  | ||||||
|     ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) { |  | ||||||
|     const auto shader_type = GetShaderType(program_type); |  | ||||||
| 
 |  | ||||||
|     auto& gpu = params.gpu; |  | ||||||
|     gpu.ShaderNotify().MarkSharderBuilding(); |  | ||||||
| 
 |  | ||||||
|     auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D()); |  | ||||||
|     if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) { |  | ||||||
|         const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); |  | ||||||
|         // TODO(Rodrigo): Handle VertexA shaders
 |  | ||||||
|         // std::optional<ShaderIR> ir_b;
 |  | ||||||
|         // if (!code_b.empty()) {
 |  | ||||||
|         //     ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
 |  | ||||||
|         // }
 |  | ||||||
|         auto program = |  | ||||||
|             BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); |  | ||||||
|         ShaderDiskCacheEntry entry; |  | ||||||
|         entry.type = shader_type; |  | ||||||
|         entry.code = std::move(code); |  | ||||||
|         entry.code_b = std::move(code_b); |  | ||||||
|         entry.unique_identifier = params.unique_identifier; |  | ||||||
|         entry.bound_buffer = registry->GetBoundBuffer(); |  | ||||||
|         entry.graphics_info = registry->GetGraphicsInfo(); |  | ||||||
|         entry.keys = registry->GetKeys(); |  | ||||||
|         entry.bound_samplers = registry->GetBoundSamplers(); |  | ||||||
|         entry.bindless_samplers = registry->GetBindlessSamplers(); |  | ||||||
|         params.disk_cache.SaveEntry(std::move(entry)); |  | ||||||
| 
 |  | ||||||
|         gpu.ShaderNotify().MarkShaderComplete(); |  | ||||||
| 
 |  | ||||||
|         return std::unique_ptr<Shader>(new Shader(std::move(registry), |  | ||||||
|                                                   MakeEntries(params.device, ir, shader_type), |  | ||||||
|                                                   std::move(program), true)); |  | ||||||
|     } else { |  | ||||||
|         // Required for entries
 |  | ||||||
|         const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); |  | ||||||
|         auto entries = MakeEntries(params.device, ir, shader_type); |  | ||||||
| 
 |  | ||||||
|         async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier, |  | ||||||
|                                         std::move(code), std::move(code_b), STAGE_MAIN_OFFSET, |  | ||||||
|                                         COMPILER_SETTINGS, *registry, cpu_addr); |  | ||||||
| 
 |  | ||||||
|         auto program = std::make_shared<ProgramHandle>(); |  | ||||||
|         return std::unique_ptr<Shader>( |  | ||||||
|             new Shader(std::move(registry), std::move(entries), std::move(program), false)); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params, |  | ||||||
|                                                        ProgramCode code) { |  | ||||||
|     auto& gpu = params.gpu; |  | ||||||
|     gpu.ShaderNotify().MarkSharderBuilding(); |  | ||||||
| 
 |  | ||||||
|     auto registry = std::make_shared<Registry>(ShaderType::Compute, params.engine); |  | ||||||
|     const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); |  | ||||||
|     const u64 uid = params.unique_identifier; |  | ||||||
|     auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry); |  | ||||||
| 
 |  | ||||||
|     ShaderDiskCacheEntry entry; |  | ||||||
|     entry.type = ShaderType::Compute; |  | ||||||
|     entry.code = std::move(code); |  | ||||||
|     entry.unique_identifier = uid; |  | ||||||
|     entry.bound_buffer = registry->GetBoundBuffer(); |  | ||||||
|     entry.compute_info = registry->GetComputeInfo(); |  | ||||||
|     entry.keys = registry->GetKeys(); |  | ||||||
|     entry.bound_samplers = registry->GetBoundSamplers(); |  | ||||||
|     entry.bindless_samplers = registry->GetBindlessSamplers(); |  | ||||||
|     params.disk_cache.SaveEntry(std::move(entry)); |  | ||||||
| 
 |  | ||||||
|     gpu.ShaderNotify().MarkShaderComplete(); |  | ||||||
| 
 |  | ||||||
|     return std::unique_ptr<Shader>(new Shader(std::move(registry), |  | ||||||
|                                               MakeEntries(params.device, ir, ShaderType::Compute), |  | ||||||
|                                               std::move(program))); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params, |  | ||||||
|                                                 const PrecompiledShader& precompiled_shader) { |  | ||||||
|     return std::unique_ptr<Shader>(new Shader( |  | ||||||
|         precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, | ||||||
|                                      Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |                                      Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | ||||||
|                                      Tegra::Engines::Maxwell3D& maxwell3d_, |                                      Tegra::Engines::Maxwell3D& maxwell3d_, | ||||||
|  | @ -331,278 +43,4 @@ ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, | ||||||
| 
 | 
 | ||||||
| ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; | ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; | ||||||
| 
 | 
 | ||||||
| void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, std::stop_token stop_loading, |  | ||||||
|                                       const VideoCore::DiskResourceLoadCallback& callback) { |  | ||||||
|     disk_cache.BindTitleID(title_id); |  | ||||||
|     const std::optional transferable = disk_cache.LoadTransferable(); |  | ||||||
| 
 |  | ||||||
|     LOG_INFO(Render_OpenGL, "Total Shader Count: {}", |  | ||||||
|              transferable.has_value() ? transferable->size() : 0); |  | ||||||
| 
 |  | ||||||
|     if (!transferable) { |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     std::vector<ShaderDiskCachePrecompiled> gl_cache; |  | ||||||
|     if (!device.UseAssemblyShaders() && !device.UseDriverCache()) { |  | ||||||
|         // Only load precompiled cache when we are not using assembly shaders
 |  | ||||||
|         gl_cache = disk_cache.LoadPrecompiled(); |  | ||||||
|     } |  | ||||||
|     const auto supported_formats = GetSupportedFormats(); |  | ||||||
| 
 |  | ||||||
|     // Track if precompiled cache was altered during loading to know if we have to
 |  | ||||||
|     // serialize the virtual precompiled cache file back to the hard drive
 |  | ||||||
|     bool precompiled_cache_altered = false; |  | ||||||
| 
 |  | ||||||
|     // Inform the frontend about shader build initialization
 |  | ||||||
|     if (callback) { |  | ||||||
|         callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size()); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     std::mutex mutex; |  | ||||||
|     std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex
 |  | ||||||
|     std::atomic_bool gl_cache_failed = false; |  | ||||||
| 
 |  | ||||||
|     const auto find_precompiled = [&gl_cache](u64 id) { |  | ||||||
|         return std::ranges::find(gl_cache, id, &ShaderDiskCachePrecompiled::unique_identifier); |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, |  | ||||||
|                             std::size_t end) { |  | ||||||
|         const auto scope = context->Acquire(); |  | ||||||
| 
 |  | ||||||
|         for (std::size_t i = begin; i < end; ++i) { |  | ||||||
|             if (stop_loading.stop_requested()) { |  | ||||||
|                 return; |  | ||||||
|             } |  | ||||||
|             const auto& entry = (*transferable)[i]; |  | ||||||
|             const u64 uid = entry.unique_identifier; |  | ||||||
|             const auto it = find_precompiled(uid); |  | ||||||
|             const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr; |  | ||||||
| 
 |  | ||||||
|             const bool is_compute = entry.type == ShaderType::Compute; |  | ||||||
|             const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; |  | ||||||
|             auto registry = MakeRegistry(entry); |  | ||||||
|             const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry); |  | ||||||
| 
 |  | ||||||
|             ProgramSharedPtr program; |  | ||||||
|             if (precompiled_entry) { |  | ||||||
|                 // If the shader is precompiled, attempt to load it with
 |  | ||||||
|                 program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats); |  | ||||||
|                 if (!program) { |  | ||||||
|                     gl_cache_failed = true; |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|             if (!program) { |  | ||||||
|                 // Otherwise compile it from GLSL
 |  | ||||||
|                 program = BuildShader(device, entry.type, uid, ir, *registry, true); |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             PrecompiledShader shader; |  | ||||||
|             shader.program = std::move(program); |  | ||||||
|             shader.registry = std::move(registry); |  | ||||||
|             shader.entries = MakeEntries(device, ir, entry.type); |  | ||||||
| 
 |  | ||||||
|             std::scoped_lock lock{mutex}; |  | ||||||
|             if (callback) { |  | ||||||
|                 callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, |  | ||||||
|                          transferable->size()); |  | ||||||
|             } |  | ||||||
|             runtime_cache.emplace(entry.unique_identifier, std::move(shader)); |  | ||||||
|         } |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())}; |  | ||||||
|     const std::size_t bucket_size{transferable->size() / num_workers}; |  | ||||||
|     std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); |  | ||||||
|     std::vector<std::thread> threads(num_workers); |  | ||||||
|     for (std::size_t i = 0; i < num_workers; ++i) { |  | ||||||
|         const bool is_last_worker = i + 1 == num_workers; |  | ||||||
|         const std::size_t start{bucket_size * i}; |  | ||||||
|         const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size}; |  | ||||||
| 
 |  | ||||||
|         // On some platforms the shared context has to be created from the GUI thread
 |  | ||||||
|         contexts[i] = emu_window.CreateSharedContext(); |  | ||||||
|         threads[i] = std::thread(worker, contexts[i].get(), start, end); |  | ||||||
|     } |  | ||||||
|     for (auto& thread : threads) { |  | ||||||
|         thread.join(); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     if (gl_cache_failed) { |  | ||||||
|         // Invalidate the precompiled cache if a shader dumped shader was rejected
 |  | ||||||
|         disk_cache.InvalidatePrecompiled(); |  | ||||||
|         precompiled_cache_altered = true; |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|     if (stop_loading.stop_requested()) { |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     if (device.UseAssemblyShaders() || device.UseDriverCache()) { |  | ||||||
|         // Don't store precompiled binaries for assembly shaders or when using the driver cache
 |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
 |  | ||||||
|     // before precompiling them
 |  | ||||||
| 
 |  | ||||||
|     for (std::size_t i = 0; i < transferable->size(); ++i) { |  | ||||||
|         const u64 id = (*transferable)[i].unique_identifier; |  | ||||||
|         const auto it = find_precompiled(id); |  | ||||||
|         if (it == gl_cache.end()) { |  | ||||||
|             const GLuint program = runtime_cache.at(id).program->source_program.handle; |  | ||||||
|             disk_cache.SavePrecompiled(id, program); |  | ||||||
|             precompiled_cache_altered = true; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     if (precompiled_cache_altered) { |  | ||||||
|         disk_cache.SaveVirtualPrecompiledFile(); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram( |  | ||||||
|     const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, |  | ||||||
|     const std::unordered_set<GLenum>& supported_formats) { |  | ||||||
|     if (!supported_formats.contains(precompiled_entry.binary_format)) { |  | ||||||
|         LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing"); |  | ||||||
|         return {}; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     auto program = std::make_shared<ProgramHandle>(); |  | ||||||
|     GLuint& handle = program->source_program.handle; |  | ||||||
|     handle = glCreateProgram(); |  | ||||||
|     glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE); |  | ||||||
|     glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(), |  | ||||||
|                     static_cast<GLsizei>(precompiled_entry.binary.size())); |  | ||||||
| 
 |  | ||||||
|     GLint link_status; |  | ||||||
|     glGetProgramiv(handle, GL_LINK_STATUS, &link_status); |  | ||||||
|     if (link_status == GL_FALSE) { |  | ||||||
|         LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing"); |  | ||||||
|         return {}; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return program; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program, |  | ||||||
|                                            VideoCommon::Shader::AsyncShaders& async_shaders) { |  | ||||||
|     if (!maxwell3d.dirty.flags[Dirty::Shaders]) { |  | ||||||
|         auto* last_shader = last_shaders[static_cast<std::size_t>(program)]; |  | ||||||
|         if (last_shader->IsBuilt()) { |  | ||||||
|             return last_shader; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const GPUVAddr address{GetShaderAddress(maxwell3d, program)}; |  | ||||||
| 
 |  | ||||||
|     if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) { |  | ||||||
|         auto completed_work = async_shaders.GetCompletedWork(); |  | ||||||
|         for (auto& work : completed_work) { |  | ||||||
|             Shader* shader = TryGet(work.cpu_address); |  | ||||||
|             gpu.ShaderNotify().MarkShaderComplete(); |  | ||||||
|             if (shader == nullptr) { |  | ||||||
|                 continue; |  | ||||||
|             } |  | ||||||
|             using namespace VideoCommon::Shader; |  | ||||||
|             if (work.backend == AsyncShaders::Backend::OpenGL) { |  | ||||||
|                 shader->AsyncOpenGLBuilt(std::move(work.program.opengl)); |  | ||||||
|             } else if (work.backend == AsyncShaders::Backend::GLASM) { |  | ||||||
|                 shader->AsyncGLASMBuilt(std::move(work.program.glasm)); |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             auto& registry = shader->GetRegistry(); |  | ||||||
| 
 |  | ||||||
|             ShaderDiskCacheEntry entry; |  | ||||||
|             entry.type = work.shader_type; |  | ||||||
|             entry.code = std::move(work.code); |  | ||||||
|             entry.code_b = std::move(work.code_b); |  | ||||||
|             entry.unique_identifier = work.uid; |  | ||||||
|             entry.bound_buffer = registry.GetBoundBuffer(); |  | ||||||
|             entry.graphics_info = registry.GetGraphicsInfo(); |  | ||||||
|             entry.keys = registry.GetKeys(); |  | ||||||
|             entry.bound_samplers = registry.GetBoundSamplers(); |  | ||||||
|             entry.bindless_samplers = registry.GetBindlessSamplers(); |  | ||||||
|             disk_cache.SaveEntry(std::move(entry)); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     // Look up shader in the cache based on address
 |  | ||||||
|     const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(address)}; |  | ||||||
|     if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) { |  | ||||||
|         return last_shaders[static_cast<std::size_t>(program)] = shader; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const u8* const host_ptr{gpu_memory.GetPointer(address)}; |  | ||||||
| 
 |  | ||||||
|     // No shader found - create a new one
 |  | ||||||
|     ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)}; |  | ||||||
|     ProgramCode code_b; |  | ||||||
|     if (program == Maxwell::ShaderProgram::VertexA) { |  | ||||||
|         const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)}; |  | ||||||
|         const u8* host_ptr_b = gpu_memory.GetPointer(address_b); |  | ||||||
|         code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false); |  | ||||||
|     } |  | ||||||
|     const std::size_t code_size = code.size() * sizeof(u64); |  | ||||||
| 
 |  | ||||||
|     const u64 unique_identifier = GetUniqueIdentifier( |  | ||||||
|         GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); |  | ||||||
| 
 |  | ||||||
|     const ShaderParameters params{gpu,       maxwell3d, disk_cache,       device, |  | ||||||
|                                   *cpu_addr, host_ptr,  unique_identifier}; |  | ||||||
| 
 |  | ||||||
|     std::unique_ptr<Shader> shader; |  | ||||||
|     const auto found = runtime_cache.find(unique_identifier); |  | ||||||
|     if (found == runtime_cache.end()) { |  | ||||||
|         shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b), |  | ||||||
|                                                async_shaders, cpu_addr.value_or(0)); |  | ||||||
|     } else { |  | ||||||
|         shader = Shader::CreateFromCache(params, found->second); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     Shader* const result = shader.get(); |  | ||||||
|     if (cpu_addr) { |  | ||||||
|         Register(std::move(shader), *cpu_addr, code_size); |  | ||||||
|     } else { |  | ||||||
|         null_shader = std::move(shader); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return last_shaders[static_cast<std::size_t>(program)] = result; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { |  | ||||||
|     const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)}; |  | ||||||
| 
 |  | ||||||
|     if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) { |  | ||||||
|         return kernel; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     // No kernel found, create a new one
 |  | ||||||
|     const u8* host_ptr{gpu_memory.GetPointer(code_addr)}; |  | ||||||
|     ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)}; |  | ||||||
|     const std::size_t code_size{code.size() * sizeof(u64)}; |  | ||||||
|     const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; |  | ||||||
| 
 |  | ||||||
|     const ShaderParameters params{gpu,       kepler_compute, disk_cache,       device, |  | ||||||
|                                   *cpu_addr, host_ptr,       unique_identifier}; |  | ||||||
| 
 |  | ||||||
|     std::unique_ptr<Shader> kernel; |  | ||||||
|     const auto found = runtime_cache.find(unique_identifier); |  | ||||||
|     if (found == runtime_cache.end()) { |  | ||||||
|         kernel = Shader::CreateKernelFromMemory(params, std::move(code)); |  | ||||||
|     } else { |  | ||||||
|         kernel = Shader::CreateFromCache(params, found->second); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     Shader* const result = kernel.get(); |  | ||||||
|     if (cpu_addr) { |  | ||||||
|         Register(std::move(kernel), *cpu_addr, code_size); |  | ||||||
|     } else { |  | ||||||
|         null_kernel = std::move(kernel); |  | ||||||
|     } |  | ||||||
|     return result; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace OpenGL
 | } // namespace OpenGL
 | ||||||
|  |  | ||||||
|  | @ -19,10 +19,6 @@ | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "video_core/engines/shader_type.h" | #include "video_core/engines/shader_type.h" | ||||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||||
| #include "video_core/renderer_opengl/gl_shader_decompiler.h" |  | ||||||
| #include "video_core/renderer_opengl/gl_shader_disk_cache.h" |  | ||||||
| #include "video_core/shader/registry.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| #include "video_core/shader_cache.h" | #include "video_core/shader_cache.h" | ||||||
| 
 | 
 | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
|  | @ -33,10 +29,6 @@ namespace Core::Frontend { | ||||||
| class EmuWindow; | class EmuWindow; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| class AsyncShaders; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| namespace OpenGL { | namespace OpenGL { | ||||||
| 
 | 
 | ||||||
| class Device; | class Device; | ||||||
|  | @ -44,77 +36,10 @@ class RasterizerOpenGL; | ||||||
| 
 | 
 | ||||||
| using Maxwell = Tegra::Engines::Maxwell3D::Regs; | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||||||
| 
 | 
 | ||||||
| struct ProgramHandle { | class Shader { | ||||||
|     OGLProgram source_program; |  | ||||||
|     OGLAssemblyProgram assembly_program; |  | ||||||
| }; |  | ||||||
| using ProgramSharedPtr = std::shared_ptr<ProgramHandle>; |  | ||||||
| 
 |  | ||||||
| struct PrecompiledShader { |  | ||||||
|     ProgramSharedPtr program; |  | ||||||
|     std::shared_ptr<VideoCommon::Shader::Registry> registry; |  | ||||||
|     ShaderEntries entries; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| struct ShaderParameters { |  | ||||||
|     Tegra::GPU& gpu; |  | ||||||
|     Tegra::Engines::ConstBufferEngineInterface& engine; |  | ||||||
|     ShaderDiskCacheOpenGL& disk_cache; |  | ||||||
|     const Device& device; |  | ||||||
|     VAddr cpu_addr; |  | ||||||
|     const u8* host_ptr; |  | ||||||
|     u64 unique_identifier; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type, |  | ||||||
|                              u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir, |  | ||||||
|                              const VideoCommon::Shader::Registry& registry, |  | ||||||
|                              bool hint_retrievable = false); |  | ||||||
| 
 |  | ||||||
| class Shader final { |  | ||||||
| public: | public: | ||||||
|  |     explicit Shader(); | ||||||
|     ~Shader(); |     ~Shader(); | ||||||
| 
 |  | ||||||
|     /// Gets the GL program handle for the shader
 |  | ||||||
|     GLuint GetHandle() const; |  | ||||||
| 
 |  | ||||||
|     bool IsBuilt() const; |  | ||||||
| 
 |  | ||||||
|     /// Gets the shader entries for the shader
 |  | ||||||
|     const ShaderEntries& GetEntries() const { |  | ||||||
|         return entries; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const VideoCommon::Shader::Registry& GetRegistry() const { |  | ||||||
|         return *registry; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     /// Mark a OpenGL shader as built
 |  | ||||||
|     void AsyncOpenGLBuilt(OGLProgram new_program); |  | ||||||
| 
 |  | ||||||
|     /// Mark a GLASM shader as built
 |  | ||||||
|     void AsyncGLASMBuilt(OGLAssemblyProgram new_program); |  | ||||||
| 
 |  | ||||||
|     static std::unique_ptr<Shader> CreateStageFromMemory( |  | ||||||
|         const ShaderParameters& params, Maxwell::ShaderProgram program_type, |  | ||||||
|         ProgramCode program_code, ProgramCode program_code_b, |  | ||||||
|         VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr); |  | ||||||
| 
 |  | ||||||
|     static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params, |  | ||||||
|                                                           ProgramCode code); |  | ||||||
| 
 |  | ||||||
|     static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params, |  | ||||||
|                                                    const PrecompiledShader& precompiled_shader); |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries, |  | ||||||
|                     ProgramSharedPtr program, bool is_built_ = true); |  | ||||||
| 
 |  | ||||||
|     std::shared_ptr<VideoCommon::Shader::Registry> registry; |  | ||||||
|     ShaderEntries entries; |  | ||||||
|     ProgramSharedPtr program; |  | ||||||
|     GLuint handle = 0; |  | ||||||
|     bool is_built{}; |  | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { | class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { | ||||||
|  | @ -126,36 +51,13 @@ public: | ||||||
|                                Tegra::MemoryManager& gpu_memory_, const Device& device_); |                                Tegra::MemoryManager& gpu_memory_, const Device& device_); | ||||||
|     ~ShaderCacheOpenGL() override; |     ~ShaderCacheOpenGL() override; | ||||||
| 
 | 
 | ||||||
|     /// Loads disk cache for the current game
 |  | ||||||
|     void LoadDiskCache(u64 title_id, std::stop_token stop_loading, |  | ||||||
|                        const VideoCore::DiskResourceLoadCallback& callback); |  | ||||||
| 
 |  | ||||||
|     /// Gets the current specified shader stage program
 |  | ||||||
|     Shader* GetStageProgram(Maxwell::ShaderProgram program, |  | ||||||
|                             VideoCommon::Shader::AsyncShaders& async_shaders); |  | ||||||
| 
 |  | ||||||
|     /// Gets a compute kernel in the passed address
 |  | ||||||
|     Shader* GetComputeKernel(GPUVAddr code_addr); |  | ||||||
| 
 |  | ||||||
| private: | private: | ||||||
|     ProgramSharedPtr GeneratePrecompiledProgram( |  | ||||||
|         const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, |  | ||||||
|         const std::unordered_set<GLenum>& supported_formats); |  | ||||||
| 
 |  | ||||||
|     Core::Frontend::EmuWindow& emu_window; |     Core::Frontend::EmuWindow& emu_window; | ||||||
|     Tegra::GPU& gpu; |     Tegra::GPU& gpu; | ||||||
|     Tegra::MemoryManager& gpu_memory; |     Tegra::MemoryManager& gpu_memory; | ||||||
|     Tegra::Engines::Maxwell3D& maxwell3d; |     Tegra::Engines::Maxwell3D& maxwell3d; | ||||||
|     Tegra::Engines::KeplerCompute& kepler_compute; |     Tegra::Engines::KeplerCompute& kepler_compute; | ||||||
|     const Device& device; |     const Device& device; | ||||||
| 
 |  | ||||||
|     ShaderDiskCacheOpenGL disk_cache; |  | ||||||
|     std::unordered_map<u64, PrecompiledShader> runtime_cache; |  | ||||||
| 
 |  | ||||||
|     std::unique_ptr<Shader> null_shader; |  | ||||||
|     std::unique_ptr<Shader> null_kernel; |  | ||||||
| 
 |  | ||||||
|     std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{}; |  | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| } // namespace OpenGL
 | } // namespace OpenGL
 | ||||||
|  |  | ||||||
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							|  | @ -1,69 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #pragma once |  | ||||||
| 
 |  | ||||||
| #include <array> |  | ||||||
| #include <string> |  | ||||||
| #include <string_view> |  | ||||||
| #include <utility> |  | ||||||
| #include <vector> |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/engines/maxwell_3d.h" |  | ||||||
| #include "video_core/engines/shader_type.h" |  | ||||||
| #include "video_core/shader/registry.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace OpenGL { |  | ||||||
| 
 |  | ||||||
| class Device; |  | ||||||
| 
 |  | ||||||
| using Maxwell = Tegra::Engines::Maxwell3D::Regs; |  | ||||||
| using SamplerEntry = VideoCommon::Shader::SamplerEntry; |  | ||||||
| using ImageEntry = VideoCommon::Shader::ImageEntry; |  | ||||||
| 
 |  | ||||||
| class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { |  | ||||||
| public: |  | ||||||
|     explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_) |  | ||||||
|         : ConstBuffer{max_offset_, is_indirect_}, index{index_} {} |  | ||||||
| 
 |  | ||||||
|     u32 GetIndex() const { |  | ||||||
|         return index; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     u32 index = 0; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| struct GlobalMemoryEntry { |  | ||||||
|     constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_, |  | ||||||
|                                          bool is_written_) |  | ||||||
|         : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{ |  | ||||||
|                                                                                      is_written_} {} |  | ||||||
| 
 |  | ||||||
|     u32 cbuf_index = 0; |  | ||||||
|     u32 cbuf_offset = 0; |  | ||||||
|     bool is_read = false; |  | ||||||
|     bool is_written = false; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| struct ShaderEntries { |  | ||||||
|     std::vector<ConstBufferEntry> const_buffers; |  | ||||||
|     std::vector<GlobalMemoryEntry> global_memory_entries; |  | ||||||
|     std::vector<SamplerEntry> samplers; |  | ||||||
|     std::vector<ImageEntry> images; |  | ||||||
|     std::size_t shader_length{}; |  | ||||||
|     u32 clip_distances{}; |  | ||||||
|     u32 enabled_uniform_buffers{}; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir, |  | ||||||
|                           Tegra::Engines::ShaderType stage); |  | ||||||
| 
 |  | ||||||
| std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, |  | ||||||
|                             const VideoCommon::Shader::Registry& registry, |  | ||||||
|                             Tegra::Engines::ShaderType stage, std::string_view identifier, |  | ||||||
|                             std::string_view suffix = {}); |  | ||||||
| 
 |  | ||||||
| } // namespace OpenGL
 |  | ||||||
|  | @ -1,482 +0,0 @@ | ||||||
| // Copyright 2019 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include <cstring> |  | ||||||
| 
 |  | ||||||
| #include <fmt/format.h> |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "common/fs/file.h" |  | ||||||
| #include "common/fs/fs.h" |  | ||||||
| #include "common/fs/path_util.h" |  | ||||||
| #include "common/logging/log.h" |  | ||||||
| #include "common/scm_rev.h" |  | ||||||
| #include "common/settings.h" |  | ||||||
| #include "common/zstd_compression.h" |  | ||||||
| #include "core/core.h" |  | ||||||
| #include "core/hle/kernel/k_process.h" |  | ||||||
| #include "video_core/engines/shader_type.h" |  | ||||||
| #include "video_core/renderer_opengl/gl_shader_cache.h" |  | ||||||
| #include "video_core/renderer_opengl/gl_shader_disk_cache.h" |  | ||||||
| 
 |  | ||||||
| namespace OpenGL { |  | ||||||
| 
 |  | ||||||
| using Tegra::Engines::ShaderType; |  | ||||||
| using VideoCommon::Shader::BindlessSamplerMap; |  | ||||||
| using VideoCommon::Shader::BoundSamplerMap; |  | ||||||
| using VideoCommon::Shader::KeyMap; |  | ||||||
| using VideoCommon::Shader::SeparateSamplerKey; |  | ||||||
| using ShaderCacheVersionHash = std::array<u8, 64>; |  | ||||||
| 
 |  | ||||||
| struct ConstBufferKey { |  | ||||||
|     u32 cbuf = 0; |  | ||||||
|     u32 offset = 0; |  | ||||||
|     u32 value = 0; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| struct BoundSamplerEntry { |  | ||||||
|     u32 offset = 0; |  | ||||||
|     Tegra::Engines::SamplerDescriptor sampler; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| struct SeparateSamplerEntry { |  | ||||||
|     u32 cbuf1 = 0; |  | ||||||
|     u32 cbuf2 = 0; |  | ||||||
|     u32 offset1 = 0; |  | ||||||
|     u32 offset2 = 0; |  | ||||||
|     Tegra::Engines::SamplerDescriptor sampler; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| struct BindlessSamplerEntry { |  | ||||||
|     u32 cbuf = 0; |  | ||||||
|     u32 offset = 0; |  | ||||||
|     Tegra::Engines::SamplerDescriptor sampler; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| namespace { |  | ||||||
| 
 |  | ||||||
| constexpr u32 NativeVersion = 21; |  | ||||||
| 
 |  | ||||||
| ShaderCacheVersionHash GetShaderCacheVersionHash() { |  | ||||||
|     ShaderCacheVersionHash hash{}; |  | ||||||
|     const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size()); |  | ||||||
|     std::memcpy(hash.data(), Common::g_shader_cache_version, length); |  | ||||||
|     return hash; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // Anonymous namespace
 |  | ||||||
| 
 |  | ||||||
| ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default; |  | ||||||
| 
 |  | ||||||
| ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default; |  | ||||||
| 
 |  | ||||||
| bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) { |  | ||||||
|     if (!file.ReadObject(type)) { |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
|     u32 code_size; |  | ||||||
|     u32 code_size_b; |  | ||||||
|     if (!file.ReadObject(code_size) || !file.ReadObject(code_size_b)) { |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
|     code.resize(code_size); |  | ||||||
|     code_b.resize(code_size_b); |  | ||||||
|     if (file.Read(code) != code_size) { |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
|     if (HasProgramA() && file.Read(code_b) != code_size_b) { |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     u8 is_texture_handler_size_known; |  | ||||||
|     u32 texture_handler_size_value; |  | ||||||
|     u32 num_keys; |  | ||||||
|     u32 num_bound_samplers; |  | ||||||
|     u32 num_separate_samplers; |  | ||||||
|     u32 num_bindless_samplers; |  | ||||||
|     if (!file.ReadObject(unique_identifier) || !file.ReadObject(bound_buffer) || |  | ||||||
|         !file.ReadObject(is_texture_handler_size_known) || |  | ||||||
|         !file.ReadObject(texture_handler_size_value) || !file.ReadObject(graphics_info) || |  | ||||||
|         !file.ReadObject(compute_info) || !file.ReadObject(num_keys) || |  | ||||||
|         !file.ReadObject(num_bound_samplers) || !file.ReadObject(num_separate_samplers) || |  | ||||||
|         !file.ReadObject(num_bindless_samplers)) { |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
|     if (is_texture_handler_size_known) { |  | ||||||
|         texture_handler_size = texture_handler_size_value; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     std::vector<ConstBufferKey> flat_keys(num_keys); |  | ||||||
|     std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers); |  | ||||||
|     std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers); |  | ||||||
|     std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers); |  | ||||||
|     if (file.Read(flat_keys) != flat_keys.size() || |  | ||||||
|         file.Read(flat_bound_samplers) != flat_bound_samplers.size() || |  | ||||||
|         file.Read(flat_separate_samplers) != flat_separate_samplers.size() || |  | ||||||
|         file.Read(flat_bindless_samplers) != flat_bindless_samplers.size()) { |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
|     for (const auto& entry : flat_keys) { |  | ||||||
|         keys.insert({{entry.cbuf, entry.offset}, entry.value}); |  | ||||||
|     } |  | ||||||
|     for (const auto& entry : flat_bound_samplers) { |  | ||||||
|         bound_samplers.emplace(entry.offset, entry.sampler); |  | ||||||
|     } |  | ||||||
|     for (const auto& entry : flat_separate_samplers) { |  | ||||||
|         SeparateSamplerKey key; |  | ||||||
|         key.buffers = {entry.cbuf1, entry.cbuf2}; |  | ||||||
|         key.offsets = {entry.offset1, entry.offset2}; |  | ||||||
|         separate_samplers.emplace(key, entry.sampler); |  | ||||||
|     } |  | ||||||
|     for (const auto& entry : flat_bindless_samplers) { |  | ||||||
|         bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler}); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return true; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const { |  | ||||||
|     if (!file.WriteObject(static_cast<u32>(type)) || |  | ||||||
|         !file.WriteObject(static_cast<u32>(code.size())) || |  | ||||||
|         !file.WriteObject(static_cast<u32>(code_b.size()))) { |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
|     if (file.Write(code) != code.size()) { |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
|     if (HasProgramA() && file.Write(code_b) != code_b.size()) { |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     if (!file.WriteObject(unique_identifier) || !file.WriteObject(bound_buffer) || |  | ||||||
|         !file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) || |  | ||||||
|         !file.WriteObject(texture_handler_size.value_or(0)) || !file.WriteObject(graphics_info) || |  | ||||||
|         !file.WriteObject(compute_info) || !file.WriteObject(static_cast<u32>(keys.size())) || |  | ||||||
|         !file.WriteObject(static_cast<u32>(bound_samplers.size())) || |  | ||||||
|         !file.WriteObject(static_cast<u32>(separate_samplers.size())) || |  | ||||||
|         !file.WriteObject(static_cast<u32>(bindless_samplers.size()))) { |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     std::vector<ConstBufferKey> flat_keys; |  | ||||||
|     flat_keys.reserve(keys.size()); |  | ||||||
|     for (const auto& [address, value] : keys) { |  | ||||||
|         flat_keys.push_back(ConstBufferKey{address.first, address.second, value}); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     std::vector<BoundSamplerEntry> flat_bound_samplers; |  | ||||||
|     flat_bound_samplers.reserve(bound_samplers.size()); |  | ||||||
|     for (const auto& [address, sampler] : bound_samplers) { |  | ||||||
|         flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler}); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     std::vector<SeparateSamplerEntry> flat_separate_samplers; |  | ||||||
|     flat_separate_samplers.reserve(separate_samplers.size()); |  | ||||||
|     for (const auto& [key, sampler] : separate_samplers) { |  | ||||||
|         SeparateSamplerEntry entry; |  | ||||||
|         std::tie(entry.cbuf1, entry.cbuf2) = key.buffers; |  | ||||||
|         std::tie(entry.offset1, entry.offset2) = key.offsets; |  | ||||||
|         entry.sampler = sampler; |  | ||||||
|         flat_separate_samplers.push_back(entry); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     std::vector<BindlessSamplerEntry> flat_bindless_samplers; |  | ||||||
|     flat_bindless_samplers.reserve(bindless_samplers.size()); |  | ||||||
|     for (const auto& [address, sampler] : bindless_samplers) { |  | ||||||
|         flat_bindless_samplers.push_back( |  | ||||||
|             BindlessSamplerEntry{address.first, address.second, sampler}); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return file.Write(flat_keys) == flat_keys.size() && |  | ||||||
|            file.Write(flat_bound_samplers) == flat_bound_samplers.size() && |  | ||||||
|            file.Write(flat_separate_samplers) == flat_separate_samplers.size() && |  | ||||||
|            file.Write(flat_bindless_samplers) == flat_bindless_samplers.size(); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default; |  | ||||||
| 
 |  | ||||||
| ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; |  | ||||||
| 
 |  | ||||||
| void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) { |  | ||||||
|     title_id = title_id_; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() { |  | ||||||
|     // Skip games without title id
 |  | ||||||
|     const bool has_title_id = title_id != 0; |  | ||||||
|     if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) { |  | ||||||
|         return std::nullopt; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     Common::FS::IOFile file{GetTransferablePath(), Common::FS::FileAccessMode::Read, |  | ||||||
|                             Common::FS::FileType::BinaryFile}; |  | ||||||
|     if (!file.IsOpen()) { |  | ||||||
|         LOG_INFO(Render_OpenGL, "No transferable shader cache found"); |  | ||||||
|         is_usable = true; |  | ||||||
|         return std::nullopt; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     u32 version{}; |  | ||||||
|     if (!file.ReadObject(version)) { |  | ||||||
|         LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it"); |  | ||||||
|         return std::nullopt; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     if (version < NativeVersion) { |  | ||||||
|         LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing"); |  | ||||||
|         file.Close(); |  | ||||||
|         InvalidateTransferable(); |  | ||||||
|         is_usable = true; |  | ||||||
|         return std::nullopt; |  | ||||||
|     } |  | ||||||
|     if (version > NativeVersion) { |  | ||||||
|         LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version " |  | ||||||
|                                    "of the emulator, skipping"); |  | ||||||
|         return std::nullopt; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     // Version is valid, load the shaders
 |  | ||||||
|     std::vector<ShaderDiskCacheEntry> entries; |  | ||||||
|     while (static_cast<u64>(file.Tell()) < file.GetSize()) { |  | ||||||
|         ShaderDiskCacheEntry& entry = entries.emplace_back(); |  | ||||||
|         if (!entry.Load(file)) { |  | ||||||
|             LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping"); |  | ||||||
|             return std::nullopt; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     is_usable = true; |  | ||||||
|     return {std::move(entries)}; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() { |  | ||||||
|     if (!is_usable) { |  | ||||||
|         return {}; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     Common::FS::IOFile file{GetPrecompiledPath(), Common::FS::FileAccessMode::Read, |  | ||||||
|                             Common::FS::FileType::BinaryFile}; |  | ||||||
|     if (!file.IsOpen()) { |  | ||||||
|         LOG_INFO(Render_OpenGL, "No precompiled shader cache found"); |  | ||||||
|         return {}; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     if (const auto result = LoadPrecompiledFile(file)) { |  | ||||||
|         return *result; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     LOG_INFO(Render_OpenGL, "Failed to load precompiled cache"); |  | ||||||
|     file.Close(); |  | ||||||
|     InvalidatePrecompiled(); |  | ||||||
|     return {}; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile( |  | ||||||
|     Common::FS::IOFile& file) { |  | ||||||
|     // Read compressed file from disk and decompress to virtual precompiled cache file
 |  | ||||||
|     std::vector<u8> compressed(file.GetSize()); |  | ||||||
|     if (file.Read(compressed) != file.GetSize()) { |  | ||||||
|         return std::nullopt; |  | ||||||
|     } |  | ||||||
|     const std::vector<u8> decompressed = Common::Compression::DecompressDataZSTD(compressed); |  | ||||||
|     SaveArrayToPrecompiled(decompressed.data(), decompressed.size()); |  | ||||||
|     precompiled_cache_virtual_file_offset = 0; |  | ||||||
| 
 |  | ||||||
|     ShaderCacheVersionHash file_hash{}; |  | ||||||
|     if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) { |  | ||||||
|         precompiled_cache_virtual_file_offset = 0; |  | ||||||
|         return std::nullopt; |  | ||||||
|     } |  | ||||||
|     if (GetShaderCacheVersionHash() != file_hash) { |  | ||||||
|         LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator"); |  | ||||||
|         precompiled_cache_virtual_file_offset = 0; |  | ||||||
|         return std::nullopt; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     std::vector<ShaderDiskCachePrecompiled> entries; |  | ||||||
|     while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { |  | ||||||
|         u32 binary_size; |  | ||||||
|         auto& entry = entries.emplace_back(); |  | ||||||
|         if (!LoadObjectFromPrecompiled(entry.unique_identifier) || |  | ||||||
|             !LoadObjectFromPrecompiled(entry.binary_format) || |  | ||||||
|             !LoadObjectFromPrecompiled(binary_size)) { |  | ||||||
|             return std::nullopt; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         entry.binary.resize(binary_size); |  | ||||||
|         if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) { |  | ||||||
|             return std::nullopt; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     return entries; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ShaderDiskCacheOpenGL::InvalidateTransferable() { |  | ||||||
|     if (!Common::FS::RemoveFile(GetTransferablePath())) { |  | ||||||
|         LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}", |  | ||||||
|                   Common::FS::PathToUTF8String(GetTransferablePath())); |  | ||||||
|     } |  | ||||||
|     InvalidatePrecompiled(); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ShaderDiskCacheOpenGL::InvalidatePrecompiled() { |  | ||||||
|     // Clear virtaul precompiled cache file
 |  | ||||||
|     precompiled_cache_virtual_file.Resize(0); |  | ||||||
| 
 |  | ||||||
|     if (!Common::FS::RemoveFile(GetPrecompiledPath())) { |  | ||||||
|         LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", |  | ||||||
|                   Common::FS::PathToUTF8String(GetPrecompiledPath())); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) { |  | ||||||
|     if (!is_usable) { |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const u64 id = entry.unique_identifier; |  | ||||||
|     if (stored_transferable.contains(id)) { |  | ||||||
|         // The shader already exists
 |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     Common::FS::IOFile file = AppendTransferableFile(); |  | ||||||
|     if (!file.IsOpen()) { |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|     if (!entry.Save(file)) { |  | ||||||
|         LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing"); |  | ||||||
|         file.Close(); |  | ||||||
|         InvalidateTransferable(); |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     stored_transferable.insert(id); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) { |  | ||||||
|     if (!is_usable) { |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header
 |  | ||||||
|     // when writing the dump. This should be done the moment I get access to write to the virtual
 |  | ||||||
|     // file.
 |  | ||||||
|     if (precompiled_cache_virtual_file.GetSize() == 0) { |  | ||||||
|         SavePrecompiledHeaderToVirtualPrecompiledCache(); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     GLint binary_length; |  | ||||||
|     glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); |  | ||||||
| 
 |  | ||||||
|     GLenum binary_format; |  | ||||||
|     std::vector<u8> binary(binary_length); |  | ||||||
|     glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); |  | ||||||
| 
 |  | ||||||
|     if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) || |  | ||||||
|         !SaveObjectToPrecompiled(static_cast<u32>(binary.size())) || |  | ||||||
|         !SaveArrayToPrecompiled(binary.data(), binary.size())) { |  | ||||||
|         LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing", |  | ||||||
|                   unique_identifier); |  | ||||||
|         InvalidatePrecompiled(); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { |  | ||||||
|     if (!EnsureDirectories()) { |  | ||||||
|         return {}; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const auto transferable_path{GetTransferablePath()}; |  | ||||||
|     const bool existed = Common::FS::Exists(transferable_path); |  | ||||||
| 
 |  | ||||||
|     Common::FS::IOFile file{transferable_path, Common::FS::FileAccessMode::Append, |  | ||||||
|                             Common::FS::FileType::BinaryFile}; |  | ||||||
|     if (!file.IsOpen()) { |  | ||||||
|         LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}", |  | ||||||
|                   Common::FS::PathToUTF8String(transferable_path)); |  | ||||||
|         return {}; |  | ||||||
|     } |  | ||||||
|     if (!existed || file.GetSize() == 0) { |  | ||||||
|         // If the file didn't exist, write its version
 |  | ||||||
|         if (!file.WriteObject(NativeVersion)) { |  | ||||||
|             LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}", |  | ||||||
|                       Common::FS::PathToUTF8String(transferable_path)); |  | ||||||
|             return {}; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     return file; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() { |  | ||||||
|     const auto hash{GetShaderCacheVersionHash()}; |  | ||||||
|     if (!SaveArrayToPrecompiled(hash.data(), hash.size())) { |  | ||||||
|         LOG_ERROR( |  | ||||||
|             Render_OpenGL, |  | ||||||
|             "Failed to write precompiled cache version hash to virtual precompiled cache file"); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { |  | ||||||
|     precompiled_cache_virtual_file_offset = 0; |  | ||||||
|     const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes(); |  | ||||||
|     const std::vector<u8> compressed = |  | ||||||
|         Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size()); |  | ||||||
| 
 |  | ||||||
|     const auto precompiled_path = GetPrecompiledPath(); |  | ||||||
|     Common::FS::IOFile file{precompiled_path, Common::FS::FileAccessMode::Write, |  | ||||||
|                             Common::FS::FileType::BinaryFile}; |  | ||||||
| 
 |  | ||||||
|     if (!file.IsOpen()) { |  | ||||||
|         LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", |  | ||||||
|                   Common::FS::PathToUTF8String(precompiled_path)); |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|     if (file.Write(compressed) != compressed.size()) { |  | ||||||
|         LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}", |  | ||||||
|                   Common::FS::PathToUTF8String(precompiled_path)); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool ShaderDiskCacheOpenGL::EnsureDirectories() const { |  | ||||||
|     const auto CreateDir = [](const std::filesystem::path& dir) { |  | ||||||
|         if (!Common::FS::CreateDir(dir)) { |  | ||||||
|             LOG_ERROR(Render_OpenGL, "Failed to create directory={}", |  | ||||||
|                       Common::FS::PathToUTF8String(dir)); |  | ||||||
|             return false; |  | ||||||
|         } |  | ||||||
|         return true; |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     return CreateDir(Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)) && |  | ||||||
|            CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) && |  | ||||||
|            CreateDir(GetPrecompiledDir()); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::filesystem::path ShaderDiskCacheOpenGL::GetTransferablePath() const { |  | ||||||
|     return GetTransferableDir() / fmt::format("{}.bin", GetTitleID()); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledPath() const { |  | ||||||
|     return GetPrecompiledDir() / fmt::format("{}.bin", GetTitleID()); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::filesystem::path ShaderDiskCacheOpenGL::GetTransferableDir() const { |  | ||||||
|     return GetBaseDir() / "transferable"; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledDir() const { |  | ||||||
|     return GetBaseDir() / "precompiled"; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::filesystem::path ShaderDiskCacheOpenGL::GetBaseDir() const { |  | ||||||
|     return Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir) / "opengl"; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::string ShaderDiskCacheOpenGL::GetTitleID() const { |  | ||||||
|     return fmt::format("{:016X}", title_id); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace OpenGL
 |  | ||||||
|  | @ -1,176 +0,0 @@ | ||||||
| // Copyright 2019 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #pragma once |  | ||||||
| 
 |  | ||||||
| #include <filesystem> |  | ||||||
| #include <optional> |  | ||||||
| #include <string> |  | ||||||
| #include <tuple> |  | ||||||
| #include <type_traits> |  | ||||||
| #include <unordered_map> |  | ||||||
| #include <unordered_set> |  | ||||||
| #include <utility> |  | ||||||
| #include <vector> |  | ||||||
| 
 |  | ||||||
| #include <glad/glad.h> |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "core/file_sys/vfs_vector.h" |  | ||||||
| #include "video_core/engines/shader_type.h" |  | ||||||
| #include "video_core/shader/registry.h" |  | ||||||
| 
 |  | ||||||
| namespace Common::FS { |  | ||||||
| class IOFile; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| namespace OpenGL { |  | ||||||
| 
 |  | ||||||
| using ProgramCode = std::vector<u64>; |  | ||||||
| 
 |  | ||||||
| /// Describes a shader and how it's used by the guest GPU
 |  | ||||||
| struct ShaderDiskCacheEntry { |  | ||||||
|     ShaderDiskCacheEntry(); |  | ||||||
|     ~ShaderDiskCacheEntry(); |  | ||||||
| 
 |  | ||||||
|     bool Load(Common::FS::IOFile& file); |  | ||||||
| 
 |  | ||||||
|     bool Save(Common::FS::IOFile& file) const; |  | ||||||
| 
 |  | ||||||
|     bool HasProgramA() const { |  | ||||||
|         return !code.empty() && !code_b.empty(); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     Tegra::Engines::ShaderType type{}; |  | ||||||
|     ProgramCode code; |  | ||||||
|     ProgramCode code_b; |  | ||||||
| 
 |  | ||||||
|     u64 unique_identifier = 0; |  | ||||||
|     std::optional<u32> texture_handler_size; |  | ||||||
|     u32 bound_buffer = 0; |  | ||||||
|     VideoCommon::Shader::GraphicsInfo graphics_info; |  | ||||||
|     VideoCommon::Shader::ComputeInfo compute_info; |  | ||||||
|     VideoCommon::Shader::KeyMap keys; |  | ||||||
|     VideoCommon::Shader::BoundSamplerMap bound_samplers; |  | ||||||
|     VideoCommon::Shader::SeparateSamplerMap separate_samplers; |  | ||||||
|     VideoCommon::Shader::BindlessSamplerMap bindless_samplers; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /// Contains an OpenGL dumped binary program
 |  | ||||||
| struct ShaderDiskCachePrecompiled { |  | ||||||
|     u64 unique_identifier = 0; |  | ||||||
|     GLenum binary_format = 0; |  | ||||||
|     std::vector<u8> binary; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| class ShaderDiskCacheOpenGL { |  | ||||||
| public: |  | ||||||
|     explicit ShaderDiskCacheOpenGL(); |  | ||||||
|     ~ShaderDiskCacheOpenGL(); |  | ||||||
| 
 |  | ||||||
|     /// Binds a title ID for all future operations.
 |  | ||||||
|     void BindTitleID(u64 title_id); |  | ||||||
| 
 |  | ||||||
|     /// Loads transferable cache. If file has a old version or on failure, it deletes the file.
 |  | ||||||
|     std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable(); |  | ||||||
| 
 |  | ||||||
|     /// Loads current game's precompiled cache. Invalidates on failure.
 |  | ||||||
|     std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled(); |  | ||||||
| 
 |  | ||||||
|     /// Removes the transferable (and precompiled) cache file.
 |  | ||||||
|     void InvalidateTransferable(); |  | ||||||
| 
 |  | ||||||
|     /// Removes the precompiled cache file and clears virtual precompiled cache file.
 |  | ||||||
|     void InvalidatePrecompiled(); |  | ||||||
| 
 |  | ||||||
|     /// Saves a raw dump to the transferable file. Checks for collisions.
 |  | ||||||
|     void SaveEntry(const ShaderDiskCacheEntry& entry); |  | ||||||
| 
 |  | ||||||
|     /// Saves a dump entry to the precompiled file. Does not check for collisions.
 |  | ||||||
|     void SavePrecompiled(u64 unique_identifier, GLuint program); |  | ||||||
| 
 |  | ||||||
|     /// Serializes virtual precompiled shader cache file to real file
 |  | ||||||
|     void SaveVirtualPrecompiledFile(); |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     /// Loads the transferable cache. Returns empty on failure.
 |  | ||||||
|     std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile( |  | ||||||
|         Common::FS::IOFile& file); |  | ||||||
| 
 |  | ||||||
|     /// Opens current game's transferable file and write it's header if it doesn't exist
 |  | ||||||
|     Common::FS::IOFile AppendTransferableFile() const; |  | ||||||
| 
 |  | ||||||
|     /// Save precompiled header to precompiled_cache_in_memory
 |  | ||||||
|     void SavePrecompiledHeaderToVirtualPrecompiledCache(); |  | ||||||
| 
 |  | ||||||
|     /// Create shader disk cache directories. Returns true on success.
 |  | ||||||
|     bool EnsureDirectories() const; |  | ||||||
| 
 |  | ||||||
|     /// Gets current game's transferable file path
 |  | ||||||
|     std::filesystem::path GetTransferablePath() const; |  | ||||||
| 
 |  | ||||||
|     /// Gets current game's precompiled file path
 |  | ||||||
|     std::filesystem::path GetPrecompiledPath() const; |  | ||||||
| 
 |  | ||||||
|     /// Get user's transferable directory path
 |  | ||||||
|     std::filesystem::path GetTransferableDir() const; |  | ||||||
| 
 |  | ||||||
|     /// Get user's precompiled directory path
 |  | ||||||
|     std::filesystem::path GetPrecompiledDir() const; |  | ||||||
| 
 |  | ||||||
|     /// Get user's shader directory path
 |  | ||||||
|     std::filesystem::path GetBaseDir() const; |  | ||||||
| 
 |  | ||||||
|     /// Get current game's title id
 |  | ||||||
|     std::string GetTitleID() const; |  | ||||||
| 
 |  | ||||||
|     template <typename T> |  | ||||||
|     bool SaveArrayToPrecompiled(const T* data, std::size_t length) { |  | ||||||
|         const std::size_t write_length = precompiled_cache_virtual_file.WriteArray( |  | ||||||
|             data, length, precompiled_cache_virtual_file_offset); |  | ||||||
|         precompiled_cache_virtual_file_offset += write_length; |  | ||||||
|         return write_length == sizeof(T) * length; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     template <typename T> |  | ||||||
|     bool LoadArrayFromPrecompiled(T* data, std::size_t length) { |  | ||||||
|         const std::size_t read_length = precompiled_cache_virtual_file.ReadArray( |  | ||||||
|             data, length, precompiled_cache_virtual_file_offset); |  | ||||||
|         precompiled_cache_virtual_file_offset += read_length; |  | ||||||
|         return read_length == sizeof(T) * length; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     template <typename T> |  | ||||||
|     bool SaveObjectToPrecompiled(const T& object) { |  | ||||||
|         return SaveArrayToPrecompiled(&object, 1); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool SaveObjectToPrecompiled(bool object) { |  | ||||||
|         const auto value = static_cast<u8>(object); |  | ||||||
|         return SaveArrayToPrecompiled(&value, 1); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     template <typename T> |  | ||||||
|     bool LoadObjectFromPrecompiled(T& object) { |  | ||||||
|         return LoadArrayFromPrecompiled(&object, 1); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     // Stores whole precompiled cache which will be read from or saved to the precompiled chache
 |  | ||||||
|     // file
 |  | ||||||
|     FileSys::VectorVfsFile precompiled_cache_virtual_file; |  | ||||||
|     // Stores the current offset of the precompiled cache file for IO purposes
 |  | ||||||
|     std::size_t precompiled_cache_virtual_file_offset = 0; |  | ||||||
| 
 |  | ||||||
|     // Stored transferable shaders
 |  | ||||||
|     std::unordered_set<u64> stored_transferable; |  | ||||||
| 
 |  | ||||||
|     /// Title ID to operate on
 |  | ||||||
|     u64 title_id = 0; |  | ||||||
| 
 |  | ||||||
|     // The cache has been loaded at boot
 |  | ||||||
|     bool is_usable = false; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| } // namespace OpenGL
 |  | ||||||
|  | @ -323,7 +323,6 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi | ||||||
|     cmdbuf.SetScissor(0, scissor); |     cmdbuf.SetScissor(0, scissor); | ||||||
|     cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); |     cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); | ||||||
| } | } | ||||||
| 
 |  | ||||||
| } // Anonymous namespace
 | } // Anonymous namespace
 | ||||||
| 
 | 
 | ||||||
| BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, | BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, | ||||||
|  |  | ||||||
|  | @ -8,146 +8,14 @@ | ||||||
| #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||||||
| #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | ||||||
| #include "video_core/renderer_vulkan/vk_scheduler.h" | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||||||
| #include "video_core/renderer_vulkan/vk_shader_decompiler.h" |  | ||||||
| #include "video_core/renderer_vulkan/vk_update_descriptor.h" | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||||||
| #include "video_core/vulkan_common/vulkan_device.h" | #include "video_core/vulkan_common/vulkan_device.h" | ||||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||||
| 
 | 
 | ||||||
| namespace Vulkan { | namespace Vulkan { | ||||||
| 
 | 
 | ||||||
| VKComputePipeline::VKComputePipeline(const Device& device_, VKScheduler& scheduler_, | ComputePipeline::ComputePipeline() = default; | ||||||
|                                      VKDescriptorPool& descriptor_pool_, |  | ||||||
|                                      VKUpdateDescriptorQueue& update_descriptor_queue_, |  | ||||||
|                                      const SPIRVShader& shader_) |  | ||||||
|     : device{device_}, scheduler{scheduler_}, entries{shader_.entries}, |  | ||||||
|       descriptor_set_layout{CreateDescriptorSetLayout()}, |  | ||||||
|       descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, |  | ||||||
|       update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, |  | ||||||
|       descriptor_template{CreateDescriptorUpdateTemplate()}, |  | ||||||
|       shader_module{CreateShaderModule(shader_.code)}, pipeline{CreatePipeline()} {} |  | ||||||
| 
 | 
 | ||||||
| VKComputePipeline::~VKComputePipeline() = default; | ComputePipeline::~ComputePipeline() = default; | ||||||
| 
 |  | ||||||
| VkDescriptorSet VKComputePipeline::CommitDescriptorSet() { |  | ||||||
|     if (!descriptor_template) { |  | ||||||
|         return {}; |  | ||||||
|     } |  | ||||||
|     const VkDescriptorSet set = descriptor_allocator.Commit(); |  | ||||||
|     update_descriptor_queue.Send(*descriptor_template, set); |  | ||||||
|     return set; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { |  | ||||||
|     std::vector<VkDescriptorSetLayoutBinding> bindings; |  | ||||||
|     u32 binding = 0; |  | ||||||
|     const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) { |  | ||||||
|         // TODO(Rodrigo): Maybe make individual bindings here?
 |  | ||||||
|         for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) { |  | ||||||
|             bindings.push_back({ |  | ||||||
|                 .binding = binding++, |  | ||||||
|                 .descriptorType = descriptor_type, |  | ||||||
|                 .descriptorCount = 1, |  | ||||||
|                 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |  | ||||||
|                 .pImmutableSamplers = nullptr, |  | ||||||
|             }); |  | ||||||
|         } |  | ||||||
|     }; |  | ||||||
|     add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size()); |  | ||||||
|     add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size()); |  | ||||||
|     add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size()); |  | ||||||
|     add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size()); |  | ||||||
|     add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size()); |  | ||||||
|     add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size()); |  | ||||||
| 
 |  | ||||||
|     return device.GetLogical().CreateDescriptorSetLayout({ |  | ||||||
|         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, |  | ||||||
|         .pNext = nullptr, |  | ||||||
|         .flags = 0, |  | ||||||
|         .bindingCount = static_cast<u32>(bindings.size()), |  | ||||||
|         .pBindings = bindings.data(), |  | ||||||
|     }); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const { |  | ||||||
|     return device.GetLogical().CreatePipelineLayout({ |  | ||||||
|         .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, |  | ||||||
|         .pNext = nullptr, |  | ||||||
|         .flags = 0, |  | ||||||
|         .setLayoutCount = 1, |  | ||||||
|         .pSetLayouts = descriptor_set_layout.address(), |  | ||||||
|         .pushConstantRangeCount = 0, |  | ||||||
|         .pPushConstantRanges = nullptr, |  | ||||||
|     }); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const { |  | ||||||
|     std::vector<VkDescriptorUpdateTemplateEntryKHR> template_entries; |  | ||||||
|     u32 binding = 0; |  | ||||||
|     u32 offset = 0; |  | ||||||
|     FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries); |  | ||||||
|     if (template_entries.empty()) { |  | ||||||
|         // If the shader doesn't use descriptor sets, skip template creation.
 |  | ||||||
|         return {}; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ |  | ||||||
|         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, |  | ||||||
|         .pNext = nullptr, |  | ||||||
|         .flags = 0, |  | ||||||
|         .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()), |  | ||||||
|         .pDescriptorUpdateEntries = template_entries.data(), |  | ||||||
|         .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, |  | ||||||
|         .descriptorSetLayout = *descriptor_set_layout, |  | ||||||
|         .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, |  | ||||||
|         .pipelineLayout = *layout, |  | ||||||
|         .set = DESCRIPTOR_SET, |  | ||||||
|     }); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const { |  | ||||||
|     device.SaveShader(code); |  | ||||||
| 
 |  | ||||||
|     return device.GetLogical().CreateShaderModule({ |  | ||||||
|         .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, |  | ||||||
|         .pNext = nullptr, |  | ||||||
|         .flags = 0, |  | ||||||
|         .codeSize = code.size() * sizeof(u32), |  | ||||||
|         .pCode = code.data(), |  | ||||||
|     }); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| vk::Pipeline VKComputePipeline::CreatePipeline() const { |  | ||||||
| 
 |  | ||||||
|     VkComputePipelineCreateInfo ci{ |  | ||||||
|         .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, |  | ||||||
|         .pNext = nullptr, |  | ||||||
|         .flags = 0, |  | ||||||
|         .stage = |  | ||||||
|             { |  | ||||||
|                 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, |  | ||||||
|                 .pNext = nullptr, |  | ||||||
|                 .flags = 0, |  | ||||||
|                 .stage = VK_SHADER_STAGE_COMPUTE_BIT, |  | ||||||
|                 .module = *shader_module, |  | ||||||
|                 .pName = "main", |  | ||||||
|                 .pSpecializationInfo = nullptr, |  | ||||||
|             }, |  | ||||||
|         .layout = *layout, |  | ||||||
|         .basePipelineHandle = nullptr, |  | ||||||
|         .basePipelineIndex = 0, |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ |  | ||||||
|         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, |  | ||||||
|         .pNext = nullptr, |  | ||||||
|         .requiredSubgroupSize = GuestWarpSize, |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) { |  | ||||||
|         ci.stage.pNext = &subgroup_size_ci; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return device.GetLogical().CreateComputePipeline(ci); |  | ||||||
| } |  | ||||||
| 
 | 
 | ||||||
| } // namespace Vulkan
 | } // namespace Vulkan
 | ||||||
|  |  | ||||||
|  | @ -6,7 +6,6 @@ | ||||||
| 
 | 
 | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||||||
| #include "video_core/renderer_vulkan/vk_shader_decompiler.h" |  | ||||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||||
| 
 | 
 | ||||||
| namespace Vulkan { | namespace Vulkan { | ||||||
|  | @ -15,50 +14,10 @@ class Device; | ||||||
| class VKScheduler; | class VKScheduler; | ||||||
| class VKUpdateDescriptorQueue; | class VKUpdateDescriptorQueue; | ||||||
| 
 | 
 | ||||||
| class VKComputePipeline final { | class ComputePipeline { | ||||||
| public: | public: | ||||||
|     explicit VKComputePipeline(const Device& device_, VKScheduler& scheduler_, |     explicit ComputePipeline(); | ||||||
|                                VKDescriptorPool& descriptor_pool_, |     ~ComputePipeline(); | ||||||
|                                VKUpdateDescriptorQueue& update_descriptor_queue_, |  | ||||||
|                                const SPIRVShader& shader_); |  | ||||||
|     ~VKComputePipeline(); |  | ||||||
| 
 |  | ||||||
|     VkDescriptorSet CommitDescriptorSet(); |  | ||||||
| 
 |  | ||||||
|     VkPipeline GetHandle() const { |  | ||||||
|         return *pipeline; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     VkPipelineLayout GetLayout() const { |  | ||||||
|         return *layout; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const ShaderEntries& GetEntries() const { |  | ||||||
|         return entries; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     vk::DescriptorSetLayout CreateDescriptorSetLayout() const; |  | ||||||
| 
 |  | ||||||
|     vk::PipelineLayout CreatePipelineLayout() const; |  | ||||||
| 
 |  | ||||||
|     vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate() const; |  | ||||||
| 
 |  | ||||||
|     vk::ShaderModule CreateShaderModule(const std::vector<u32>& code) const; |  | ||||||
| 
 |  | ||||||
|     vk::Pipeline CreatePipeline() const; |  | ||||||
| 
 |  | ||||||
|     const Device& device; |  | ||||||
|     VKScheduler& scheduler; |  | ||||||
|     ShaderEntries entries; |  | ||||||
| 
 |  | ||||||
|     vk::DescriptorSetLayout descriptor_set_layout; |  | ||||||
|     DescriptorAllocator descriptor_allocator; |  | ||||||
|     VKUpdateDescriptorQueue& update_descriptor_queue; |  | ||||||
|     vk::PipelineLayout layout; |  | ||||||
|     vk::DescriptorUpdateTemplateKHR descriptor_template; |  | ||||||
|     vk::ShaderModule shader_module; |  | ||||||
|     vk::Pipeline pipeline; |  | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| } // namespace Vulkan
 | } // namespace Vulkan
 | ||||||
|  |  | ||||||
|  | @ -1,484 +0,0 @@ | ||||||
| // Copyright 2019 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include <algorithm> |  | ||||||
| #include <array> |  | ||||||
| #include <cstring> |  | ||||||
| #include <vector> |  | ||||||
| 
 |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "common/microprofile.h" |  | ||||||
| #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |  | ||||||
| #include "video_core/renderer_vulkan/maxwell_to_vk.h" |  | ||||||
| #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |  | ||||||
| #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" |  | ||||||
| #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |  | ||||||
| #include "video_core/renderer_vulkan/vk_scheduler.h" |  | ||||||
| #include "video_core/renderer_vulkan/vk_update_descriptor.h" |  | ||||||
| #include "video_core/vulkan_common/vulkan_device.h" |  | ||||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" |  | ||||||
| 
 |  | ||||||
| namespace Vulkan { |  | ||||||
| 
 |  | ||||||
| MICROPROFILE_DECLARE(Vulkan_PipelineCache); |  | ||||||
| 
 |  | ||||||
| namespace { |  | ||||||
| 
 |  | ||||||
| template <class StencilFace> |  | ||||||
| VkStencilOpState GetStencilFaceState(const StencilFace& face) { |  | ||||||
|     return { |  | ||||||
|         .failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()), |  | ||||||
|         .passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()), |  | ||||||
|         .depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()), |  | ||||||
|         .compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()), |  | ||||||
|         .compareMask = 0, |  | ||||||
|         .writeMask = 0, |  | ||||||
|         .reference = 0, |  | ||||||
|     }; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { |  | ||||||
|     static constexpr std::array unsupported_topologies = { |  | ||||||
|         VK_PRIMITIVE_TOPOLOGY_POINT_LIST, |  | ||||||
|         VK_PRIMITIVE_TOPOLOGY_LINE_LIST, |  | ||||||
|         VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, |  | ||||||
|         VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, |  | ||||||
|         VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY, |  | ||||||
|         VK_PRIMITIVE_TOPOLOGY_PATCH_LIST}; |  | ||||||
|     return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies), |  | ||||||
|                      topology) == std::end(unsupported_topologies); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { |  | ||||||
|     union Swizzle { |  | ||||||
|         u32 raw; |  | ||||||
|         BitField<0, 3, Maxwell::ViewportSwizzle> x; |  | ||||||
|         BitField<4, 3, Maxwell::ViewportSwizzle> y; |  | ||||||
|         BitField<8, 3, Maxwell::ViewportSwizzle> z; |  | ||||||
|         BitField<12, 3, Maxwell::ViewportSwizzle> w; |  | ||||||
|     }; |  | ||||||
|     const Swizzle unpacked{swizzle}; |  | ||||||
| 
 |  | ||||||
|     return { |  | ||||||
|         .x = MaxwellToVK::ViewportSwizzle(unpacked.x), |  | ||||||
|         .y = MaxwellToVK::ViewportSwizzle(unpacked.y), |  | ||||||
|         .z = MaxwellToVK::ViewportSwizzle(unpacked.z), |  | ||||||
|         .w = MaxwellToVK::ViewportSwizzle(unpacked.w), |  | ||||||
|     }; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) { |  | ||||||
|     switch (msaa_mode) { |  | ||||||
|     case Tegra::Texture::MsaaMode::Msaa1x1: |  | ||||||
|         return VK_SAMPLE_COUNT_1_BIT; |  | ||||||
|     case Tegra::Texture::MsaaMode::Msaa2x1: |  | ||||||
|     case Tegra::Texture::MsaaMode::Msaa2x1_D3D: |  | ||||||
|         return VK_SAMPLE_COUNT_2_BIT; |  | ||||||
|     case Tegra::Texture::MsaaMode::Msaa2x2: |  | ||||||
|     case Tegra::Texture::MsaaMode::Msaa2x2_VC4: |  | ||||||
|     case Tegra::Texture::MsaaMode::Msaa2x2_VC12: |  | ||||||
|         return VK_SAMPLE_COUNT_4_BIT; |  | ||||||
|     case Tegra::Texture::MsaaMode::Msaa4x2: |  | ||||||
|     case Tegra::Texture::MsaaMode::Msaa4x2_D3D: |  | ||||||
|     case Tegra::Texture::MsaaMode::Msaa4x2_VC8: |  | ||||||
|     case Tegra::Texture::MsaaMode::Msaa4x2_VC24: |  | ||||||
|         return VK_SAMPLE_COUNT_8_BIT; |  | ||||||
|     case Tegra::Texture::MsaaMode::Msaa4x4: |  | ||||||
|         return VK_SAMPLE_COUNT_16_BIT; |  | ||||||
|     default: |  | ||||||
|         UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode)); |  | ||||||
|         return VK_SAMPLE_COUNT_1_BIT; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // Anonymous namespace
 |  | ||||||
| 
 |  | ||||||
| VKGraphicsPipeline::VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, |  | ||||||
|                                        VKDescriptorPool& descriptor_pool_, |  | ||||||
|                                        VKUpdateDescriptorQueue& update_descriptor_queue_, |  | ||||||
|                                        const GraphicsPipelineCacheKey& key, |  | ||||||
|                                        vk::Span<VkDescriptorSetLayoutBinding> bindings, |  | ||||||
|                                        const SPIRVProgram& program, u32 num_color_buffers) |  | ||||||
|     : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()}, |  | ||||||
|       descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, |  | ||||||
|       descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, |  | ||||||
|       update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, |  | ||||||
|       descriptor_template{CreateDescriptorUpdateTemplate(program)}, |  | ||||||
|       modules(CreateShaderModules(program)), |  | ||||||
|       pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {} |  | ||||||
| 
 |  | ||||||
| VKGraphicsPipeline::~VKGraphicsPipeline() = default; |  | ||||||
| 
 |  | ||||||
| VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() { |  | ||||||
|     if (!descriptor_template) { |  | ||||||
|         return {}; |  | ||||||
|     } |  | ||||||
|     const VkDescriptorSet set = descriptor_allocator.Commit(); |  | ||||||
|     update_descriptor_queue.Send(*descriptor_template, set); |  | ||||||
|     return set; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| vk::DescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout( |  | ||||||
|     vk::Span<VkDescriptorSetLayoutBinding> bindings) const { |  | ||||||
|     const VkDescriptorSetLayoutCreateInfo ci{ |  | ||||||
|         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, |  | ||||||
|         .pNext = nullptr, |  | ||||||
|         .flags = 0, |  | ||||||
|         .bindingCount = bindings.size(), |  | ||||||
|         .pBindings = bindings.data(), |  | ||||||
|     }; |  | ||||||
|     return device.GetLogical().CreateDescriptorSetLayout(ci); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| vk::PipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const { |  | ||||||
|     const VkPipelineLayoutCreateInfo ci{ |  | ||||||
|         .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, |  | ||||||
|         .pNext = nullptr, |  | ||||||
|         .flags = 0, |  | ||||||
|         .setLayoutCount = 1, |  | ||||||
|         .pSetLayouts = descriptor_set_layout.address(), |  | ||||||
|         .pushConstantRangeCount = 0, |  | ||||||
|         .pPushConstantRanges = nullptr, |  | ||||||
|     }; |  | ||||||
|     return device.GetLogical().CreatePipelineLayout(ci); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| vk::DescriptorUpdateTemplateKHR VKGraphicsPipeline::CreateDescriptorUpdateTemplate( |  | ||||||
|     const SPIRVProgram& program) const { |  | ||||||
|     std::vector<VkDescriptorUpdateTemplateEntry> template_entries; |  | ||||||
|     u32 binding = 0; |  | ||||||
|     u32 offset = 0; |  | ||||||
|     for (const auto& stage : program) { |  | ||||||
|         if (stage) { |  | ||||||
|             FillDescriptorUpdateTemplateEntries(stage->entries, binding, offset, template_entries); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     if (template_entries.empty()) { |  | ||||||
|         // If the shader doesn't use descriptor sets, skip template creation.
 |  | ||||||
|         return {}; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const VkDescriptorUpdateTemplateCreateInfoKHR ci{ |  | ||||||
|         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, |  | ||||||
|         .pNext = nullptr, |  | ||||||
|         .flags = 0, |  | ||||||
|         .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()), |  | ||||||
|         .pDescriptorUpdateEntries = template_entries.data(), |  | ||||||
|         .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, |  | ||||||
|         .descriptorSetLayout = *descriptor_set_layout, |  | ||||||
|         .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, |  | ||||||
|         .pipelineLayout = *layout, |  | ||||||
|         .set = DESCRIPTOR_SET, |  | ||||||
|     }; |  | ||||||
|     return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules( |  | ||||||
|     const SPIRVProgram& program) const { |  | ||||||
|     VkShaderModuleCreateInfo ci{ |  | ||||||
|         .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, |  | ||||||
|         .pNext = nullptr, |  | ||||||
|         .flags = 0, |  | ||||||
|         .codeSize = 0, |  | ||||||
|         .pCode = nullptr, |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     std::vector<vk::ShaderModule> shader_modules; |  | ||||||
|     shader_modules.reserve(Maxwell::MaxShaderStage); |  | ||||||
|     for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) { |  | ||||||
|         const auto& stage = program[i]; |  | ||||||
|         if (!stage) { |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         device.SaveShader(stage->code); |  | ||||||
| 
 |  | ||||||
|         ci.codeSize = stage->code.size() * sizeof(u32); |  | ||||||
|         ci.pCode = stage->code.data(); |  | ||||||
|         shader_modules.push_back(device.GetLogical().CreateShaderModule(ci)); |  | ||||||
|     } |  | ||||||
|     return shader_modules; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, |  | ||||||
|                                                 VkRenderPass renderpass, |  | ||||||
|                                                 u32 num_color_buffers) const { |  | ||||||
|     const auto& state = cache_key.fixed_state; |  | ||||||
|     const auto& viewport_swizzles = state.viewport_swizzles; |  | ||||||
| 
 |  | ||||||
|     FixedPipelineState::DynamicState dynamic; |  | ||||||
|     if (device.IsExtExtendedDynamicStateSupported()) { |  | ||||||
|         // Insert dummy values, as long as they are valid they don't matter as extended dynamic
 |  | ||||||
|         // state is ignored
 |  | ||||||
|         dynamic.raw1 = 0; |  | ||||||
|         dynamic.raw2 = 0; |  | ||||||
|         dynamic.vertex_strides.fill(0); |  | ||||||
|     } else { |  | ||||||
|         dynamic = state.dynamic_state; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     std::vector<VkVertexInputBindingDescription> vertex_bindings; |  | ||||||
|     std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; |  | ||||||
|     for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { |  | ||||||
|         const bool instanced = state.binding_divisors[index] != 0; |  | ||||||
|         const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; |  | ||||||
|         vertex_bindings.push_back({ |  | ||||||
|             .binding = static_cast<u32>(index), |  | ||||||
|             .stride = dynamic.vertex_strides[index], |  | ||||||
|             .inputRate = rate, |  | ||||||
|         }); |  | ||||||
|         if (instanced) { |  | ||||||
|             vertex_binding_divisors.push_back({ |  | ||||||
|                 .binding = static_cast<u32>(index), |  | ||||||
|                 .divisor = state.binding_divisors[index], |  | ||||||
|             }); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     std::vector<VkVertexInputAttributeDescription> vertex_attributes; |  | ||||||
|     const auto& input_attributes = program[0]->entries.attributes; |  | ||||||
|     for (std::size_t index = 0; index < state.attributes.size(); ++index) { |  | ||||||
|         const auto& attribute = state.attributes[index]; |  | ||||||
|         if (!attribute.enabled) { |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
|         if (!input_attributes.contains(static_cast<u32>(index))) { |  | ||||||
|             // Skip attributes not used by the vertex shaders.
 |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
|         vertex_attributes.push_back({ |  | ||||||
|             .location = static_cast<u32>(index), |  | ||||||
|             .binding = attribute.buffer, |  | ||||||
|             .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), |  | ||||||
|             .offset = attribute.offset, |  | ||||||
|         }); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     VkPipelineVertexInputStateCreateInfo vertex_input_ci{ |  | ||||||
|         .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, |  | ||||||
|         .pNext = nullptr, |  | ||||||
|         .flags = 0, |  | ||||||
|         .vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()), |  | ||||||
|         .pVertexBindingDescriptions = vertex_bindings.data(), |  | ||||||
|         .vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()), |  | ||||||
|         .pVertexAttributeDescriptions = vertex_attributes.data(), |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{ |  | ||||||
|         .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT, |  | ||||||
|         .pNext = nullptr, |  | ||||||
|         .vertexBindingDivisorCount = static_cast<u32>(vertex_binding_divisors.size()), |  | ||||||
|         .pVertexBindingDivisors = vertex_binding_divisors.data(), |  | ||||||
|     }; |  | ||||||
|     if (!vertex_binding_divisors.empty()) { |  | ||||||
|         vertex_input_ci.pNext = &input_divisor_ci; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); |  | ||||||
|     const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ |  | ||||||
|         .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, |  | ||||||
|         .pNext = nullptr, |  | ||||||
|         .flags = 0, |  | ||||||
|         .topology = MaxwellToVK::PrimitiveTopology(device, state.topology), |  | ||||||
|         .primitiveRestartEnable = state.primitive_restart_enable != 0 && |  | ||||||
|                                   SupportsPrimitiveRestart(input_assembly_topology), |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     const VkPipelineTessellationStateCreateInfo tessellation_ci{ |  | ||||||
|         .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, |  | ||||||
|         .pNext = nullptr, |  | ||||||
|         .flags = 0, |  | ||||||
|         .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     VkPipelineViewportStateCreateInfo viewport_ci{ |  | ||||||
|         .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, |  | ||||||
|         .pNext = nullptr, |  | ||||||
|         .flags = 0, |  | ||||||
|         .viewportCount = Maxwell::NumViewports, |  | ||||||
|         .pViewports = nullptr, |  | ||||||
|         .scissorCount = Maxwell::NumViewports, |  | ||||||
|         .pScissors = nullptr, |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles; |  | ||||||
|     std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); |  | ||||||
|     VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ |  | ||||||
|         .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, |  | ||||||
|         .pNext = nullptr, |  | ||||||
|         .flags = 0, |  | ||||||
|         .viewportCount = Maxwell::NumViewports, |  | ||||||
|         .pViewportSwizzles = swizzles.data(), |  | ||||||
|     }; |  | ||||||
|     if (device.IsNvViewportSwizzleSupported()) { |  | ||||||
|         viewport_ci.pNext = &swizzle_ci; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const VkPipelineRasterizationStateCreateInfo rasterization_ci{ |  | ||||||
|         .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, |  | ||||||
|         .pNext = nullptr, |  | ||||||
|         .flags = 0, |  | ||||||
|         .depthClampEnable = |  | ||||||
|             static_cast<VkBool32>(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), |  | ||||||
|         .rasterizerDiscardEnable = |  | ||||||
|             static_cast<VkBool32>(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), |  | ||||||
|         .polygonMode = VK_POLYGON_MODE_FILL, |  | ||||||
|         .cullMode = static_cast<VkCullModeFlags>( |  | ||||||
|             dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), |  | ||||||
|         .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), |  | ||||||
|         .depthBiasEnable = state.depth_bias_enable, |  | ||||||
|         .depthBiasConstantFactor = 0.0f, |  | ||||||
|         .depthBiasClamp = 0.0f, |  | ||||||
|         .depthBiasSlopeFactor = 0.0f, |  | ||||||
|         .lineWidth = 1.0f, |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     const VkPipelineMultisampleStateCreateInfo multisample_ci{ |  | ||||||
|         .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, |  | ||||||
|         .pNext = nullptr, |  | ||||||
|         .flags = 0, |  | ||||||
|         .rasterizationSamples = ConvertMsaaMode(state.msaa_mode), |  | ||||||
|         .sampleShadingEnable = VK_FALSE, |  | ||||||
|         .minSampleShading = 0.0f, |  | ||||||
|         .pSampleMask = nullptr, |  | ||||||
|         .alphaToCoverageEnable = VK_FALSE, |  | ||||||
|         .alphaToOneEnable = VK_FALSE, |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{ |  | ||||||
|         .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, |  | ||||||
|         .pNext = nullptr, |  | ||||||
|         .flags = 0, |  | ||||||
|         .depthTestEnable = dynamic.depth_test_enable, |  | ||||||
|         .depthWriteEnable = dynamic.depth_write_enable, |  | ||||||
|         .depthCompareOp = dynamic.depth_test_enable |  | ||||||
|                               ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) |  | ||||||
|                               : VK_COMPARE_OP_ALWAYS, |  | ||||||
|         .depthBoundsTestEnable = dynamic.depth_bounds_enable, |  | ||||||
|         .stencilTestEnable = dynamic.stencil_enable, |  | ||||||
|         .front = GetStencilFaceState(dynamic.front), |  | ||||||
|         .back = GetStencilFaceState(dynamic.back), |  | ||||||
|         .minDepthBounds = 0.0f, |  | ||||||
|         .maxDepthBounds = 0.0f, |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; |  | ||||||
|     for (std::size_t index = 0; index < num_color_buffers; ++index) { |  | ||||||
|         static constexpr std::array COMPONENT_TABLE{ |  | ||||||
|             VK_COLOR_COMPONENT_R_BIT, |  | ||||||
|             VK_COLOR_COMPONENT_G_BIT, |  | ||||||
|             VK_COLOR_COMPONENT_B_BIT, |  | ||||||
|             VK_COLOR_COMPONENT_A_BIT, |  | ||||||
|         }; |  | ||||||
|         const auto& blend = state.attachments[index]; |  | ||||||
| 
 |  | ||||||
|         VkColorComponentFlags color_components = 0; |  | ||||||
|         for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) { |  | ||||||
|             if (blend.Mask()[i]) { |  | ||||||
|                 color_components |= COMPONENT_TABLE[i]; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         cb_attachments[index] = { |  | ||||||
|             .blendEnable = blend.enable != 0, |  | ||||||
|             .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()), |  | ||||||
|             .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()), |  | ||||||
|             .colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()), |  | ||||||
|             .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()), |  | ||||||
|             .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()), |  | ||||||
|             .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()), |  | ||||||
|             .colorWriteMask = color_components, |  | ||||||
|         }; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const VkPipelineColorBlendStateCreateInfo color_blend_ci{ |  | ||||||
|         .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, |  | ||||||
|         .pNext = nullptr, |  | ||||||
|         .flags = 0, |  | ||||||
|         .logicOpEnable = VK_FALSE, |  | ||||||
|         .logicOp = VK_LOGIC_OP_COPY, |  | ||||||
|         .attachmentCount = num_color_buffers, |  | ||||||
|         .pAttachments = cb_attachments.data(), |  | ||||||
|         .blendConstants = {}, |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     std::vector dynamic_states{ |  | ||||||
|         VK_DYNAMIC_STATE_VIEWPORT,           VK_DYNAMIC_STATE_SCISSOR, |  | ||||||
|         VK_DYNAMIC_STATE_DEPTH_BIAS,         VK_DYNAMIC_STATE_BLEND_CONSTANTS, |  | ||||||
|         VK_DYNAMIC_STATE_DEPTH_BOUNDS,       VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, |  | ||||||
|         VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, |  | ||||||
|     }; |  | ||||||
|     if (device.IsExtExtendedDynamicStateSupported()) { |  | ||||||
|         static constexpr std::array extended{ |  | ||||||
|             VK_DYNAMIC_STATE_CULL_MODE_EXT, |  | ||||||
|             VK_DYNAMIC_STATE_FRONT_FACE_EXT, |  | ||||||
|             VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT, |  | ||||||
|             VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT, |  | ||||||
|             VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT, |  | ||||||
|             VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT, |  | ||||||
|             VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT, |  | ||||||
|             VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, |  | ||||||
|             VK_DYNAMIC_STATE_STENCIL_OP_EXT, |  | ||||||
|         }; |  | ||||||
|         dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ |  | ||||||
|         .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, |  | ||||||
|         .pNext = nullptr, |  | ||||||
|         .flags = 0, |  | ||||||
|         .dynamicStateCount = static_cast<u32>(dynamic_states.size()), |  | ||||||
|         .pDynamicStates = dynamic_states.data(), |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ |  | ||||||
|         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, |  | ||||||
|         .pNext = nullptr, |  | ||||||
|         .requiredSubgroupSize = GuestWarpSize, |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     std::vector<VkPipelineShaderStageCreateInfo> shader_stages; |  | ||||||
|     std::size_t module_index = 0; |  | ||||||
|     for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { |  | ||||||
|         if (!program[stage]) { |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         VkPipelineShaderStageCreateInfo& stage_ci = shader_stages.emplace_back(); |  | ||||||
|         stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; |  | ||||||
|         stage_ci.pNext = nullptr; |  | ||||||
|         stage_ci.flags = 0; |  | ||||||
|         stage_ci.stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage)); |  | ||||||
|         stage_ci.module = *modules[module_index++]; |  | ||||||
|         stage_ci.pName = "main"; |  | ||||||
|         stage_ci.pSpecializationInfo = nullptr; |  | ||||||
| 
 |  | ||||||
|         if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { |  | ||||||
|             stage_ci.pNext = &subgroup_size_ci; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{ |  | ||||||
|         .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, |  | ||||||
|         .pNext = nullptr, |  | ||||||
|         .flags = 0, |  | ||||||
|         .stageCount = static_cast<u32>(shader_stages.size()), |  | ||||||
|         .pStages = shader_stages.data(), |  | ||||||
|         .pVertexInputState = &vertex_input_ci, |  | ||||||
|         .pInputAssemblyState = &input_assembly_ci, |  | ||||||
|         .pTessellationState = &tessellation_ci, |  | ||||||
|         .pViewportState = &viewport_ci, |  | ||||||
|         .pRasterizationState = &rasterization_ci, |  | ||||||
|         .pMultisampleState = &multisample_ci, |  | ||||||
|         .pDepthStencilState = &depth_stencil_ci, |  | ||||||
|         .pColorBlendState = &color_blend_ci, |  | ||||||
|         .pDynamicState = &dynamic_state_ci, |  | ||||||
|         .layout = *layout, |  | ||||||
|         .renderPass = renderpass, |  | ||||||
|         .subpass = 0, |  | ||||||
|         .basePipelineHandle = nullptr, |  | ||||||
|         .basePipelineIndex = 0, |  | ||||||
|     }); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace Vulkan
 |  | ||||||
|  | @ -1,103 +0,0 @@ | ||||||
| // Copyright 2019 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #pragma once |  | ||||||
| 
 |  | ||||||
| #include <array> |  | ||||||
| #include <optional> |  | ||||||
| #include <vector> |  | ||||||
| 
 |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/engines/maxwell_3d.h" |  | ||||||
| #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |  | ||||||
| #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |  | ||||||
| #include "video_core/renderer_vulkan/vk_shader_decompiler.h" |  | ||||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" |  | ||||||
| 
 |  | ||||||
| namespace Vulkan { |  | ||||||
| 
 |  | ||||||
| using Maxwell = Tegra::Engines::Maxwell3D::Regs; |  | ||||||
| 
 |  | ||||||
| struct GraphicsPipelineCacheKey { |  | ||||||
|     VkRenderPass renderpass; |  | ||||||
|     std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; |  | ||||||
|     FixedPipelineState fixed_state; |  | ||||||
| 
 |  | ||||||
|     std::size_t Hash() const noexcept; |  | ||||||
| 
 |  | ||||||
|     bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; |  | ||||||
| 
 |  | ||||||
|     bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { |  | ||||||
|         return !operator==(rhs); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     std::size_t Size() const noexcept { |  | ||||||
|         return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size(); |  | ||||||
|     } |  | ||||||
| }; |  | ||||||
| static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); |  | ||||||
| static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); |  | ||||||
| static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); |  | ||||||
| 
 |  | ||||||
| class Device; |  | ||||||
| class VKDescriptorPool; |  | ||||||
| class VKScheduler; |  | ||||||
| class VKUpdateDescriptorQueue; |  | ||||||
| 
 |  | ||||||
| using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderStage>; |  | ||||||
| 
 |  | ||||||
| class VKGraphicsPipeline final { |  | ||||||
| public: |  | ||||||
|     explicit VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, |  | ||||||
|                                 VKDescriptorPool& descriptor_pool, |  | ||||||
|                                 VKUpdateDescriptorQueue& update_descriptor_queue_, |  | ||||||
|                                 const GraphicsPipelineCacheKey& key, |  | ||||||
|                                 vk::Span<VkDescriptorSetLayoutBinding> bindings, |  | ||||||
|                                 const SPIRVProgram& program, u32 num_color_buffers); |  | ||||||
|     ~VKGraphicsPipeline(); |  | ||||||
| 
 |  | ||||||
|     VkDescriptorSet CommitDescriptorSet(); |  | ||||||
| 
 |  | ||||||
|     VkPipeline GetHandle() const { |  | ||||||
|         return *pipeline; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     VkPipelineLayout GetLayout() const { |  | ||||||
|         return *layout; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     GraphicsPipelineCacheKey GetCacheKey() const { |  | ||||||
|         return cache_key; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     vk::DescriptorSetLayout CreateDescriptorSetLayout( |  | ||||||
|         vk::Span<VkDescriptorSetLayoutBinding> bindings) const; |  | ||||||
| 
 |  | ||||||
|     vk::PipelineLayout CreatePipelineLayout() const; |  | ||||||
| 
 |  | ||||||
|     vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( |  | ||||||
|         const SPIRVProgram& program) const; |  | ||||||
| 
 |  | ||||||
|     std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const; |  | ||||||
| 
 |  | ||||||
|     vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass, |  | ||||||
|                                 u32 num_color_buffers) const; |  | ||||||
| 
 |  | ||||||
|     const Device& device; |  | ||||||
|     VKScheduler& scheduler; |  | ||||||
|     const GraphicsPipelineCacheKey cache_key; |  | ||||||
|     const u64 hash; |  | ||||||
| 
 |  | ||||||
|     vk::DescriptorSetLayout descriptor_set_layout; |  | ||||||
|     DescriptorAllocator descriptor_allocator; |  | ||||||
|     VKUpdateDescriptorQueue& update_descriptor_queue; |  | ||||||
|     vk::PipelineLayout layout; |  | ||||||
|     vk::DescriptorUpdateTemplateKHR descriptor_template; |  | ||||||
|     std::vector<vk::ShaderModule> modules; |  | ||||||
| 
 |  | ||||||
|     vk::Pipeline pipeline; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| } // namespace Vulkan
 |  | ||||||
|  | @ -19,49 +19,27 @@ | ||||||
| #include "video_core/renderer_vulkan/maxwell_to_vk.h" | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||||||
| #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | ||||||
| #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||||||
| #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" |  | ||||||
| #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | ||||||
| #include "video_core/renderer_vulkan/vk_rasterizer.h" | #include "video_core/renderer_vulkan/vk_rasterizer.h" | ||||||
| #include "video_core/renderer_vulkan/vk_scheduler.h" | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||||||
| #include "video_core/renderer_vulkan/vk_update_descriptor.h" | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||||||
| #include "video_core/shader/compiler_settings.h" |  | ||||||
| #include "video_core/shader/memory_util.h" |  | ||||||
| #include "video_core/shader_cache.h" | #include "video_core/shader_cache.h" | ||||||
| #include "video_core/shader_notify.h" | #include "video_core/shader_notify.h" | ||||||
| #include "video_core/vulkan_common/vulkan_device.h" | #include "video_core/vulkan_common/vulkan_device.h" | ||||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||||
| 
 | 
 | ||||||
| namespace Vulkan { | namespace Vulkan { | ||||||
| 
 |  | ||||||
| MICROPROFILE_DECLARE(Vulkan_PipelineCache); | MICROPROFILE_DECLARE(Vulkan_PipelineCache); | ||||||
| 
 | 
 | ||||||
| using Tegra::Engines::ShaderType; | using Tegra::Engines::ShaderType; | ||||||
| using VideoCommon::Shader::GetShaderAddress; |  | ||||||
| using VideoCommon::Shader::GetShaderCode; |  | ||||||
| using VideoCommon::Shader::KERNEL_MAIN_OFFSET; |  | ||||||
| using VideoCommon::Shader::ProgramCode; |  | ||||||
| using VideoCommon::Shader::STAGE_MAIN_OFFSET; |  | ||||||
| 
 | 
 | ||||||
| namespace { | namespace { | ||||||
| 
 | size_t StageFromProgram(size_t program) { | ||||||
| constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; |  | ||||||
| constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; |  | ||||||
| constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; |  | ||||||
| constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; |  | ||||||
| constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; |  | ||||||
| constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; |  | ||||||
| 
 |  | ||||||
| constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ |  | ||||||
|     .depth = VideoCommon::Shader::CompileDepth::FullDecompile, |  | ||||||
|     .disable_else_derivation = true, |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| constexpr std::size_t GetStageFromProgram(std::size_t program) { |  | ||||||
|     return program == 0 ? 0 : program - 1; |     return program == 0 ? 0 : program - 1; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) { | ShaderType StageFromProgram(Maxwell::ShaderProgram program) { | ||||||
|     return static_cast<ShaderType>(GetStageFromProgram(static_cast<std::size_t>(program))); |     return static_cast<ShaderType>(StageFromProgram(static_cast<size_t>(program))); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| ShaderType GetShaderType(Maxwell::ShaderProgram program) { | ShaderType GetShaderType(Maxwell::ShaderProgram program) { | ||||||
|  | @ -81,165 +59,35 @@ ShaderType GetShaderType(Maxwell::ShaderProgram program) { | ||||||
|         return ShaderType::Vertex; |         return ShaderType::Vertex; | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 |  | ||||||
| template <VkDescriptorType descriptor_type, class Container> |  | ||||||
| void AddBindings(std::vector<VkDescriptorSetLayoutBinding>& bindings, u32& binding, |  | ||||||
|                  VkShaderStageFlags stage_flags, const Container& container) { |  | ||||||
|     const u32 num_entries = static_cast<u32>(std::size(container)); |  | ||||||
|     for (std::size_t i = 0; i < num_entries; ++i) { |  | ||||||
|         u32 count = 1; |  | ||||||
|         if constexpr (descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { |  | ||||||
|             // Combined image samplers can be arrayed.
 |  | ||||||
|             count = container[i].size; |  | ||||||
|         } |  | ||||||
|         bindings.push_back({ |  | ||||||
|             .binding = binding++, |  | ||||||
|             .descriptorType = descriptor_type, |  | ||||||
|             .descriptorCount = count, |  | ||||||
|             .stageFlags = stage_flags, |  | ||||||
|             .pImmutableSamplers = nullptr, |  | ||||||
|         }); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| u32 FillDescriptorLayout(const ShaderEntries& entries, |  | ||||||
|                          std::vector<VkDescriptorSetLayoutBinding>& bindings, |  | ||||||
|                          Maxwell::ShaderProgram program_type, u32 base_binding) { |  | ||||||
|     const ShaderType stage = GetStageFromProgram(program_type); |  | ||||||
|     const VkShaderStageFlags flags = MaxwellToVK::ShaderStage(stage); |  | ||||||
| 
 |  | ||||||
|     u32 binding = base_binding; |  | ||||||
|     AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers); |  | ||||||
|     AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers); |  | ||||||
|     AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels); |  | ||||||
|     AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers); |  | ||||||
|     AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels); |  | ||||||
|     AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images); |  | ||||||
|     return binding; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // Anonymous namespace
 | } // Anonymous namespace
 | ||||||
| 
 | 
 | ||||||
| std::size_t GraphicsPipelineCacheKey::Hash() const noexcept { | size_t ComputePipelineCacheKey::Hash() const noexcept { | ||||||
|     const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size()); |  | ||||||
|     return static_cast<std::size_t>(hash); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { |  | ||||||
|     return std::memcmp(&rhs, this, Size()) == 0; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::size_t ComputePipelineCacheKey::Hash() const noexcept { |  | ||||||
|     const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); |     const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); | ||||||
|     return static_cast<std::size_t>(hash); |     return static_cast<size_t>(hash); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept { | bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept { | ||||||
|     return std::memcmp(&rhs, this, sizeof *this) == 0; |     return std::memcmp(&rhs, this, sizeof *this) == 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, ShaderType stage_, | Shader::Shader() = default; | ||||||
|                GPUVAddr gpu_addr_, VAddr cpu_addr_, ProgramCode program_code_, u32 main_offset_) |  | ||||||
|     : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage_, engine_), |  | ||||||
|       shader_ir(program_code, main_offset_, compiler_settings, registry), |  | ||||||
|       entries(GenerateShaderEntries(shader_ir)) {} |  | ||||||
| 
 | 
 | ||||||
| Shader::~Shader() = default; | Shader::~Shader() = default; | ||||||
| 
 | 
 | ||||||
| VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, | PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, | ||||||
|                                  Tegra::Engines::Maxwell3D& maxwell3d_, |                              Tegra::Engines::Maxwell3D& maxwell3d_, | ||||||
|                                  Tegra::Engines::KeplerCompute& kepler_compute_, |                              Tegra::Engines::KeplerCompute& kepler_compute_, | ||||||
|                                  Tegra::MemoryManager& gpu_memory_, const Device& device_, |                              Tegra::MemoryManager& gpu_memory_, const Device& device_, | ||||||
|                                  VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, |                              VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, | ||||||
|                                  VKUpdateDescriptorQueue& update_descriptor_queue_) |                              VKUpdateDescriptorQueue& update_descriptor_queue_) | ||||||
|     : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, |     : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, | ||||||
|       kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, |       kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, | ||||||
|       scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ |       scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ | ||||||
|                                                                     update_descriptor_queue_} {} |                                                                     update_descriptor_queue_} {} | ||||||
| 
 | 
 | ||||||
| VKPipelineCache::~VKPipelineCache() = default; | PipelineCache::~PipelineCache() = default; | ||||||
| 
 | 
 | ||||||
| std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { | ComputePipeline& PipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { | ||||||
|     std::array<Shader*, Maxwell::MaxShaderProgram> shaders{}; |  | ||||||
| 
 |  | ||||||
|     for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |  | ||||||
|         const auto program{static_cast<Maxwell::ShaderProgram>(index)}; |  | ||||||
| 
 |  | ||||||
|         // Skip stages that are not enabled
 |  | ||||||
|         if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)}; |  | ||||||
|         const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); |  | ||||||
|         ASSERT(cpu_addr); |  | ||||||
| 
 |  | ||||||
|         Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); |  | ||||||
|         if (!result) { |  | ||||||
|             const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)}; |  | ||||||
| 
 |  | ||||||
|             // No shader found - create a new one
 |  | ||||||
|             static constexpr u32 stage_offset = STAGE_MAIN_OFFSET; |  | ||||||
|             const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1); |  | ||||||
|             ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false); |  | ||||||
|             const std::size_t size_in_bytes = code.size() * sizeof(u64); |  | ||||||
| 
 |  | ||||||
|             auto shader = std::make_unique<Shader>(maxwell3d, stage, gpu_addr, *cpu_addr, |  | ||||||
|                                                    std::move(code), stage_offset); |  | ||||||
|             result = shader.get(); |  | ||||||
| 
 |  | ||||||
|             if (cpu_addr) { |  | ||||||
|                 Register(std::move(shader), *cpu_addr, size_in_bytes); |  | ||||||
|             } else { |  | ||||||
|                 null_shader = std::move(shader); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|         shaders[index] = result; |  | ||||||
|     } |  | ||||||
|     return last_shaders = shaders; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( |  | ||||||
|     const GraphicsPipelineCacheKey& key, u32 num_color_buffers, |  | ||||||
|     VideoCommon::Shader::AsyncShaders& async_shaders) { |  | ||||||
|     MICROPROFILE_SCOPE(Vulkan_PipelineCache); |  | ||||||
| 
 |  | ||||||
|     if (last_graphics_pipeline && last_graphics_key == key) { |  | ||||||
|         return last_graphics_pipeline; |  | ||||||
|     } |  | ||||||
|     last_graphics_key = key; |  | ||||||
| 
 |  | ||||||
|     if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) { |  | ||||||
|         std::unique_lock lock{pipeline_cache}; |  | ||||||
|         const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); |  | ||||||
|         if (is_cache_miss) { |  | ||||||
|             gpu.ShaderNotify().MarkSharderBuilding(); |  | ||||||
|             LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); |  | ||||||
|             const auto [program, bindings] = DecompileShaders(key.fixed_state); |  | ||||||
|             async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, |  | ||||||
|                                             update_descriptor_queue, bindings, program, key, |  | ||||||
|                                             num_color_buffers); |  | ||||||
|         } |  | ||||||
|         last_graphics_pipeline = pair->second.get(); |  | ||||||
|         return last_graphics_pipeline; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); |  | ||||||
|     auto& entry = pair->second; |  | ||||||
|     if (is_cache_miss) { |  | ||||||
|         gpu.ShaderNotify().MarkSharderBuilding(); |  | ||||||
|         LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); |  | ||||||
|         const auto [program, bindings] = DecompileShaders(key.fixed_state); |  | ||||||
|         entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, |  | ||||||
|                                                      update_descriptor_queue, key, bindings, |  | ||||||
|                                                      program, num_color_buffers); |  | ||||||
|         gpu.ShaderNotify().MarkShaderComplete(); |  | ||||||
|     } |  | ||||||
|     last_graphics_pipeline = entry.get(); |  | ||||||
|     return last_graphics_pipeline; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { |  | ||||||
|     MICROPROFILE_SCOPE(Vulkan_PipelineCache); |     MICROPROFILE_SCOPE(Vulkan_PipelineCache); | ||||||
| 
 | 
 | ||||||
|     const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); |     const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); | ||||||
|  | @ -248,200 +96,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach | ||||||
|         return *entry; |         return *entry; | ||||||
|     } |     } | ||||||
|     LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); |     LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); | ||||||
| 
 |     throw "Bad"; | ||||||
|     const GPUVAddr gpu_addr = key.shader; |  | ||||||
| 
 |  | ||||||
|     const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); |  | ||||||
|     ASSERT(cpu_addr); |  | ||||||
| 
 |  | ||||||
|     Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get(); |  | ||||||
|     if (!shader) { |  | ||||||
|         // No shader found - create a new one
 |  | ||||||
|         const auto host_ptr = gpu_memory.GetPointer(gpu_addr); |  | ||||||
| 
 |  | ||||||
|         ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true); |  | ||||||
|         const std::size_t size_in_bytes = code.size() * sizeof(u64); |  | ||||||
| 
 |  | ||||||
|         auto shader_info = std::make_unique<Shader>(kepler_compute, ShaderType::Compute, gpu_addr, |  | ||||||
|                                                     *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET); |  | ||||||
|         shader = shader_info.get(); |  | ||||||
| 
 |  | ||||||
|         if (cpu_addr) { |  | ||||||
|             Register(std::move(shader_info), *cpu_addr, size_in_bytes); |  | ||||||
|         } else { |  | ||||||
|             null_kernel = std::move(shader_info); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const Specialization specialization{ |  | ||||||
|         .base_binding = 0, |  | ||||||
|         .workgroup_size = key.workgroup_size, |  | ||||||
|         .shared_memory_size = key.shared_memory_size, |  | ||||||
|         .point_size = std::nullopt, |  | ||||||
|         .enabled_attributes = {}, |  | ||||||
|         .attribute_types = {}, |  | ||||||
|         .ndc_minus_one_to_one = false, |  | ||||||
|     }; |  | ||||||
|     const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute, |  | ||||||
|                                              shader->GetRegistry(), specialization), |  | ||||||
|                                    shader->GetEntries()}; |  | ||||||
|     entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool, |  | ||||||
|                                                 update_descriptor_queue, spirv_shader); |  | ||||||
|     return *entry; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) { | void PipelineCache::OnShaderRemoval(Shader*) {} | ||||||
|     gpu.ShaderNotify().MarkShaderComplete(); |  | ||||||
|     std::unique_lock lock{pipeline_cache}; |  | ||||||
|     graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void VKPipelineCache::OnShaderRemoval(Shader* shader) { |  | ||||||
|     bool finished = false; |  | ||||||
|     const auto Finish = [&] { |  | ||||||
|         // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
 |  | ||||||
|         // flush.
 |  | ||||||
|         if (finished) { |  | ||||||
|             return; |  | ||||||
|         } |  | ||||||
|         finished = true; |  | ||||||
|         scheduler.Finish(); |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     const GPUVAddr invalidated_addr = shader->GetGpuAddr(); |  | ||||||
|     for (auto it = graphics_cache.begin(); it != graphics_cache.end();) { |  | ||||||
|         auto& entry = it->first; |  | ||||||
|         if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) == |  | ||||||
|             entry.shaders.end()) { |  | ||||||
|             ++it; |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
|         Finish(); |  | ||||||
|         it = graphics_cache.erase(it); |  | ||||||
|     } |  | ||||||
|     for (auto it = compute_cache.begin(); it != compute_cache.end();) { |  | ||||||
|         auto& entry = it->first; |  | ||||||
|         if (entry.shader != invalidated_addr) { |  | ||||||
|             ++it; |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
|         Finish(); |  | ||||||
|         it = compute_cache.erase(it); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> |  | ||||||
| VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { |  | ||||||
|     Specialization specialization; |  | ||||||
|     if (fixed_state.topology == Maxwell::PrimitiveTopology::Points) { |  | ||||||
|         float point_size; |  | ||||||
|         std::memcpy(&point_size, &fixed_state.point_size, sizeof(float)); |  | ||||||
|         specialization.point_size = point_size; |  | ||||||
|         ASSERT(point_size != 0.0f); |  | ||||||
|     } |  | ||||||
|     for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { |  | ||||||
|         const auto& attribute = fixed_state.attributes[i]; |  | ||||||
|         specialization.enabled_attributes[i] = attribute.enabled.Value() != 0; |  | ||||||
|         specialization.attribute_types[i] = attribute.Type(); |  | ||||||
|     } |  | ||||||
|     specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one; |  | ||||||
|     specialization.early_fragment_tests = fixed_state.early_z; |  | ||||||
| 
 |  | ||||||
|     // Alpha test
 |  | ||||||
|     specialization.alpha_test_func = |  | ||||||
|         FixedPipelineState::UnpackComparisonOp(fixed_state.alpha_test_func.Value()); |  | ||||||
|     specialization.alpha_test_ref = Common::BitCast<float>(fixed_state.alpha_test_ref); |  | ||||||
| 
 |  | ||||||
|     SPIRVProgram program; |  | ||||||
|     std::vector<VkDescriptorSetLayoutBinding> bindings; |  | ||||||
| 
 |  | ||||||
|     for (std::size_t index = 1; index < Maxwell::MaxShaderProgram; ++index) { |  | ||||||
|         const auto program_enum = static_cast<Maxwell::ShaderProgram>(index); |  | ||||||
|         // Skip stages that are not enabled
 |  | ||||||
|         if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
|         const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum); |  | ||||||
|         const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); |  | ||||||
|         Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); |  | ||||||
| 
 |  | ||||||
|         const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
 |  | ||||||
|         const ShaderType program_type = GetShaderType(program_enum); |  | ||||||
|         const auto& entries = shader->GetEntries(); |  | ||||||
|         program[stage] = { |  | ||||||
|             Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization), |  | ||||||
|             entries, |  | ||||||
|         }; |  | ||||||
| 
 |  | ||||||
|         const u32 old_binding = specialization.base_binding; |  | ||||||
|         specialization.base_binding = |  | ||||||
|             FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding); |  | ||||||
|         ASSERT(old_binding + entries.NumBindings() == specialization.base_binding); |  | ||||||
|     } |  | ||||||
|     return {std::move(program), std::move(bindings)}; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| template <VkDescriptorType descriptor_type, class Container> |  | ||||||
| void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u32& binding, |  | ||||||
|               u32& offset, const Container& container) { |  | ||||||
|     static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry)); |  | ||||||
|     const u32 count = static_cast<u32>(std::size(container)); |  | ||||||
| 
 |  | ||||||
|     if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) { |  | ||||||
|         for (u32 i = 0; i < count; ++i) { |  | ||||||
|             const u32 num_samplers = container[i].size; |  | ||||||
|             template_entries.push_back({ |  | ||||||
|                 .dstBinding = binding, |  | ||||||
|                 .dstArrayElement = 0, |  | ||||||
|                 .descriptorCount = num_samplers, |  | ||||||
|                 .descriptorType = descriptor_type, |  | ||||||
|                 .offset = offset, |  | ||||||
|                 .stride = entry_size, |  | ||||||
|             }); |  | ||||||
| 
 |  | ||||||
|             ++binding; |  | ||||||
|             offset += num_samplers * entry_size; |  | ||||||
|         } |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER || |  | ||||||
|                   descriptor_type == STORAGE_TEXEL_BUFFER) { |  | ||||||
|         // Nvidia has a bug where updating multiple texels at once causes the driver to crash.
 |  | ||||||
|         // Note: Fixed in driver Windows 443.24, Linux 440.66.15
 |  | ||||||
|         for (u32 i = 0; i < count; ++i) { |  | ||||||
|             template_entries.push_back({ |  | ||||||
|                 .dstBinding = binding + i, |  | ||||||
|                 .dstArrayElement = 0, |  | ||||||
|                 .descriptorCount = 1, |  | ||||||
|                 .descriptorType = descriptor_type, |  | ||||||
|                 .offset = static_cast<std::size_t>(offset + i * entry_size), |  | ||||||
|                 .stride = entry_size, |  | ||||||
|             }); |  | ||||||
|         } |  | ||||||
|     } else if (count > 0) { |  | ||||||
|         template_entries.push_back({ |  | ||||||
|             .dstBinding = binding, |  | ||||||
|             .dstArrayElement = 0, |  | ||||||
|             .descriptorCount = count, |  | ||||||
|             .descriptorType = descriptor_type, |  | ||||||
|             .offset = offset, |  | ||||||
|             .stride = entry_size, |  | ||||||
|         }); |  | ||||||
|     } |  | ||||||
|     offset += count * entry_size; |  | ||||||
|     binding += count; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void FillDescriptorUpdateTemplateEntries( |  | ||||||
|     const ShaderEntries& entries, u32& binding, u32& offset, |  | ||||||
|     std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) { |  | ||||||
|     AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers); |  | ||||||
|     AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers); |  | ||||||
|     AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels); |  | ||||||
|     AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers); |  | ||||||
|     AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels); |  | ||||||
|     AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images); |  | ||||||
| } |  | ||||||
| 
 | 
 | ||||||
| } // namespace Vulkan
 | } // namespace Vulkan
 | ||||||
|  |  | ||||||
|  | @ -15,15 +15,8 @@ | ||||||
| #include <boost/functional/hash.hpp> | #include <boost/functional/hash.hpp> | ||||||
| 
 | 
 | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "video_core/engines/const_buffer_engine_interface.h" |  | ||||||
| #include "video_core/engines/maxwell_3d.h" | #include "video_core/engines/maxwell_3d.h" | ||||||
| #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | ||||||
| #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" |  | ||||||
| #include "video_core/renderer_vulkan/vk_shader_decompiler.h" |  | ||||||
| #include "video_core/shader/async_shaders.h" |  | ||||||
| #include "video_core/shader/memory_util.h" |  | ||||||
| #include "video_core/shader/registry.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| #include "video_core/shader_cache.h" | #include "video_core/shader_cache.h" | ||||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||||
| 
 | 
 | ||||||
|  | @ -35,7 +28,7 @@ namespace Vulkan { | ||||||
| 
 | 
 | ||||||
| class Device; | class Device; | ||||||
| class RasterizerVulkan; | class RasterizerVulkan; | ||||||
| class VKComputePipeline; | class ComputePipeline; | ||||||
| class VKDescriptorPool; | class VKDescriptorPool; | ||||||
| class VKScheduler; | class VKScheduler; | ||||||
| class VKUpdateDescriptorQueue; | class VKUpdateDescriptorQueue; | ||||||
|  | @ -47,7 +40,7 @@ struct ComputePipelineCacheKey { | ||||||
|     u32 shared_memory_size; |     u32 shared_memory_size; | ||||||
|     std::array<u32, 3> workgroup_size; |     std::array<u32, 3> workgroup_size; | ||||||
| 
 | 
 | ||||||
|     std::size_t Hash() const noexcept; |     size_t Hash() const noexcept; | ||||||
| 
 | 
 | ||||||
|     bool operator==(const ComputePipelineCacheKey& rhs) const noexcept; |     bool operator==(const ComputePipelineCacheKey& rhs) const noexcept; | ||||||
| 
 | 
 | ||||||
|  | @ -63,16 +56,9 @@ static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>); | ||||||
| 
 | 
 | ||||||
| namespace std { | namespace std { | ||||||
| 
 | 
 | ||||||
| template <> |  | ||||||
| struct hash<Vulkan::GraphicsPipelineCacheKey> { |  | ||||||
|     std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { |  | ||||||
|         return k.Hash(); |  | ||||||
|     } |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| template <> | template <> | ||||||
| struct hash<Vulkan::ComputePipelineCacheKey> { | struct hash<Vulkan::ComputePipelineCacheKey> { | ||||||
|     std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { |     size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { | ||||||
|         return k.Hash(); |         return k.Hash(); | ||||||
|     } |     } | ||||||
| }; | }; | ||||||
|  | @ -83,66 +69,26 @@ namespace Vulkan { | ||||||
| 
 | 
 | ||||||
| class Shader { | class Shader { | ||||||
| public: | public: | ||||||
|     explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, |     explicit Shader(); | ||||||
|                     Tegra::Engines::ShaderType stage_, GPUVAddr gpu_addr, VAddr cpu_addr_, |  | ||||||
|                     VideoCommon::Shader::ProgramCode program_code, u32 main_offset_); |  | ||||||
|     ~Shader(); |     ~Shader(); | ||||||
| 
 |  | ||||||
|     GPUVAddr GetGpuAddr() const { |  | ||||||
|         return gpu_addr; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     VideoCommon::Shader::ShaderIR& GetIR() { |  | ||||||
|         return shader_ir; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const VideoCommon::Shader::ShaderIR& GetIR() const { |  | ||||||
|         return shader_ir; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const VideoCommon::Shader::Registry& GetRegistry() const { |  | ||||||
|         return registry; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const ShaderEntries& GetEntries() const { |  | ||||||
|         return entries; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     GPUVAddr gpu_addr{}; |  | ||||||
|     VideoCommon::Shader::ProgramCode program_code; |  | ||||||
|     VideoCommon::Shader::Registry registry; |  | ||||||
|     VideoCommon::Shader::ShaderIR shader_ir; |  | ||||||
|     ShaderEntries entries; |  | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> { | class PipelineCache final : public VideoCommon::ShaderCache<Shader> { | ||||||
| public: | public: | ||||||
|     explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, |     explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, | ||||||
|                              Tegra::Engines::Maxwell3D& maxwell3d, |                            Tegra::Engines::Maxwell3D& maxwell3d, | ||||||
|                              Tegra::Engines::KeplerCompute& kepler_compute, |                            Tegra::Engines::KeplerCompute& kepler_compute, | ||||||
|                              Tegra::MemoryManager& gpu_memory, const Device& device, |                            Tegra::MemoryManager& gpu_memory, const Device& device, | ||||||
|                              VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, |                            VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, | ||||||
|                              VKUpdateDescriptorQueue& update_descriptor_queue); |                            VKUpdateDescriptorQueue& update_descriptor_queue); | ||||||
|     ~VKPipelineCache() override; |     ~PipelineCache() override; | ||||||
| 
 | 
 | ||||||
|     std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); |     ComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); | ||||||
| 
 |  | ||||||
|     VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, |  | ||||||
|                                             u32 num_color_buffers, |  | ||||||
|                                             VideoCommon::Shader::AsyncShaders& async_shaders); |  | ||||||
| 
 |  | ||||||
|     VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); |  | ||||||
| 
 |  | ||||||
|     void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline); |  | ||||||
| 
 | 
 | ||||||
| protected: | protected: | ||||||
|     void OnShaderRemoval(Shader* shader) final; |     void OnShaderRemoval(Shader* shader) final; | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( |  | ||||||
|         const FixedPipelineState& fixed_state); |  | ||||||
| 
 |  | ||||||
|     Tegra::GPU& gpu; |     Tegra::GPU& gpu; | ||||||
|     Tegra::Engines::Maxwell3D& maxwell3d; |     Tegra::Engines::Maxwell3D& maxwell3d; | ||||||
|     Tegra::Engines::KeplerCompute& kepler_compute; |     Tegra::Engines::KeplerCompute& kepler_compute; | ||||||
|  | @ -158,17 +104,8 @@ private: | ||||||
| 
 | 
 | ||||||
|     std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{}; |     std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{}; | ||||||
| 
 | 
 | ||||||
|     GraphicsPipelineCacheKey last_graphics_key; |  | ||||||
|     VKGraphicsPipeline* last_graphics_pipeline = nullptr; |  | ||||||
| 
 |  | ||||||
|     std::mutex pipeline_cache; |     std::mutex pipeline_cache; | ||||||
|     std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>> |     std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache; | ||||||
|         graphics_cache; |  | ||||||
|     std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache; |  | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| void FillDescriptorUpdateTemplateEntries( |  | ||||||
|     const ShaderEntries& entries, u32& binding, u32& offset, |  | ||||||
|     std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries); |  | ||||||
| 
 |  | ||||||
| } // namespace Vulkan
 | } // namespace Vulkan
 | ||||||
|  |  | ||||||
|  | @ -24,7 +24,6 @@ | ||||||
| #include "video_core/renderer_vulkan/vk_buffer_cache.h" | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||||||
| #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | ||||||
| #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||||||
| #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" |  | ||||||
| #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | ||||||
| #include "video_core/renderer_vulkan/vk_rasterizer.h" | #include "video_core/renderer_vulkan/vk_rasterizer.h" | ||||||
| #include "video_core/renderer_vulkan/vk_scheduler.h" | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||||||
|  | @ -97,15 +96,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) { | ||||||
|     return scissor; |     return scissor; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( |  | ||||||
|     const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { |  | ||||||
|     std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses; |  | ||||||
|     for (size_t i = 0; i < std::size(addresses); ++i) { |  | ||||||
|         addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; |  | ||||||
|     } |  | ||||||
|     return addresses; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| struct TextureHandle { | struct TextureHandle { | ||||||
|     constexpr TextureHandle(u32 data, bool via_header_index) { |     constexpr TextureHandle(u32 data, bool via_header_index) { | ||||||
|         const Tegra::Texture::TextureHandle handle{data}; |         const Tegra::Texture::TextureHandle handle{data}; | ||||||
|  | @ -117,98 +107,6 @@ struct TextureHandle { | ||||||
|     u32 sampler; |     u32 sampler; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| template <typename Engine, typename Entry> |  | ||||||
| TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, |  | ||||||
|                              size_t stage, size_t index = 0) { |  | ||||||
|     const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage); |  | ||||||
|     if constexpr (std::is_same_v<Entry, SamplerEntry>) { |  | ||||||
|         if (entry.is_separated) { |  | ||||||
|             const u32 buffer_1 = entry.buffer; |  | ||||||
|             const u32 buffer_2 = entry.secondary_buffer; |  | ||||||
|             const u32 offset_1 = entry.offset; |  | ||||||
|             const u32 offset_2 = entry.secondary_offset; |  | ||||||
|             const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); |  | ||||||
|             const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); |  | ||||||
|             return TextureHandle(handle_1 | handle_2, via_header_index); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     if (entry.is_bindless) { |  | ||||||
|         const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); |  | ||||||
|         return TextureHandle(raw, via_header_index); |  | ||||||
|     } |  | ||||||
|     const u32 buffer = engine.GetBoundBuffer(); |  | ||||||
|     const u64 offset = (entry.offset + index) * sizeof(u32); |  | ||||||
|     return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { |  | ||||||
|     if (entry.is_buffer) { |  | ||||||
|         return ImageViewType::e2D; |  | ||||||
|     } |  | ||||||
|     switch (entry.type) { |  | ||||||
|     case Tegra::Shader::TextureType::Texture1D: |  | ||||||
|         return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; |  | ||||||
|     case Tegra::Shader::TextureType::Texture2D: |  | ||||||
|         return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; |  | ||||||
|     case Tegra::Shader::TextureType::Texture3D: |  | ||||||
|         return ImageViewType::e3D; |  | ||||||
|     case Tegra::Shader::TextureType::TextureCube: |  | ||||||
|         return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; |  | ||||||
|     } |  | ||||||
|     UNREACHABLE(); |  | ||||||
|     return ImageViewType::e2D; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { |  | ||||||
|     switch (entry.type) { |  | ||||||
|     case Tegra::Shader::ImageType::Texture1D: |  | ||||||
|         return ImageViewType::e1D; |  | ||||||
|     case Tegra::Shader::ImageType::Texture1DArray: |  | ||||||
|         return ImageViewType::e1DArray; |  | ||||||
|     case Tegra::Shader::ImageType::Texture2D: |  | ||||||
|         return ImageViewType::e2D; |  | ||||||
|     case Tegra::Shader::ImageType::Texture2DArray: |  | ||||||
|         return ImageViewType::e2DArray; |  | ||||||
|     case Tegra::Shader::ImageType::Texture3D: |  | ||||||
|         return ImageViewType::e3D; |  | ||||||
|     case Tegra::Shader::ImageType::TextureBuffer: |  | ||||||
|         return ImageViewType::Buffer; |  | ||||||
|     } |  | ||||||
|     UNREACHABLE(); |  | ||||||
|     return ImageViewType::e2D; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache, |  | ||||||
|                           VKUpdateDescriptorQueue& update_descriptor_queue, |  | ||||||
|                           ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) { |  | ||||||
|     for ([[maybe_unused]] const auto& entry : entries.uniform_texels) { |  | ||||||
|         const ImageViewId image_view_id = *image_view_id_ptr++; |  | ||||||
|         const ImageView& image_view = texture_cache.GetImageView(image_view_id); |  | ||||||
|         update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); |  | ||||||
|     } |  | ||||||
|     for (const auto& entry : entries.samplers) { |  | ||||||
|         for (size_t i = 0; i < entry.size; ++i) { |  | ||||||
|             const VkSampler sampler = *sampler_ptr++; |  | ||||||
|             const ImageViewId image_view_id = *image_view_id_ptr++; |  | ||||||
|             const ImageView& image_view = texture_cache.GetImageView(image_view_id); |  | ||||||
|             const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); |  | ||||||
|             update_descriptor_queue.AddSampledImage(handle, sampler); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     for ([[maybe_unused]] const auto& entry : entries.storage_texels) { |  | ||||||
|         const ImageViewId image_view_id = *image_view_id_ptr++; |  | ||||||
|         const ImageView& image_view = texture_cache.GetImageView(image_view_id); |  | ||||||
|         update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); |  | ||||||
|     } |  | ||||||
|     for (const auto& entry : entries.images) { |  | ||||||
|         // TODO: Mark as modified
 |  | ||||||
|         const ImageViewId image_view_id = *image_view_id_ptr++; |  | ||||||
|         const ImageView& image_view = texture_cache.GetImageView(image_view_id); |  | ||||||
|         const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); |  | ||||||
|         update_descriptor_queue.AddImage(handle); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, | DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, | ||||||
|                           bool is_indexed) { |                           bool is_indexed) { | ||||||
|     DrawParams params{ |     DrawParams params{ | ||||||
|  | @ -253,71 +151,14 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | ||||||
|                      descriptor_pool, update_descriptor_queue), |                      descriptor_pool, update_descriptor_queue), | ||||||
|       query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, |       query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, | ||||||
|       fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), |       fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), | ||||||
|       wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { |       wfi_event(device.GetLogical().CreateEvent()) { | ||||||
|     scheduler.SetQueryCache(query_cache); |     scheduler.SetQueryCache(query_cache); | ||||||
|     if (device.UseAsynchronousShaders()) { |  | ||||||
|         async_shaders.AllocateWorkers(); |  | ||||||
|     } |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| RasterizerVulkan::~RasterizerVulkan() = default; | RasterizerVulkan::~RasterizerVulkan() = default; | ||||||
| 
 | 
 | ||||||
| void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | ||||||
|     MICROPROFILE_SCOPE(Vulkan_Drawing); |     UNREACHABLE_MSG("Rendering not implemented {} {}", is_indexed, is_instanced); | ||||||
| 
 |  | ||||||
|     SCOPE_EXIT({ gpu.TickWork(); }); |  | ||||||
|     FlushWork(); |  | ||||||
| 
 |  | ||||||
|     query_cache.UpdateCounters(); |  | ||||||
| 
 |  | ||||||
|     graphics_key.fixed_state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); |  | ||||||
| 
 |  | ||||||
|     std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; |  | ||||||
| 
 |  | ||||||
|     texture_cache.SynchronizeGraphicsDescriptors(); |  | ||||||
|     texture_cache.UpdateRenderTargets(false); |  | ||||||
| 
 |  | ||||||
|     const auto shaders = pipeline_cache.GetShaders(); |  | ||||||
|     graphics_key.shaders = GetShaderAddresses(shaders); |  | ||||||
| 
 |  | ||||||
|     SetupShaderDescriptors(shaders, is_indexed); |  | ||||||
| 
 |  | ||||||
|     const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); |  | ||||||
|     graphics_key.renderpass = framebuffer->RenderPass(); |  | ||||||
| 
 |  | ||||||
|     VKGraphicsPipeline* const pipeline = pipeline_cache.GetGraphicsPipeline( |  | ||||||
|         graphics_key, framebuffer->NumColorBuffers(), async_shaders); |  | ||||||
|     if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { |  | ||||||
|         // Async graphics pipeline was not ready.
 |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     BeginTransformFeedback(); |  | ||||||
| 
 |  | ||||||
|     scheduler.RequestRenderpass(framebuffer); |  | ||||||
|     scheduler.BindGraphicsPipeline(pipeline->GetHandle()); |  | ||||||
|     UpdateDynamicStates(); |  | ||||||
| 
 |  | ||||||
|     const auto& regs = maxwell3d.regs; |  | ||||||
|     const u32 num_instances = maxwell3d.mme_draw.instance_count; |  | ||||||
|     const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed); |  | ||||||
|     const VkPipelineLayout pipeline_layout = pipeline->GetLayout(); |  | ||||||
|     const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet(); |  | ||||||
|     scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { |  | ||||||
|         if (descriptor_set) { |  | ||||||
|             cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, |  | ||||||
|                                       DESCRIPTOR_SET, descriptor_set, nullptr); |  | ||||||
|         } |  | ||||||
|         if (draw_params.is_indexed) { |  | ||||||
|             cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, |  | ||||||
|                                draw_params.base_vertex, draw_params.base_instance); |  | ||||||
|         } else { |  | ||||||
|             cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, |  | ||||||
|                         draw_params.base_vertex, draw_params.base_instance); |  | ||||||
|         } |  | ||||||
|     }); |  | ||||||
| 
 |  | ||||||
|     EndTransformFeedback(); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerVulkan::Clear() { | void RasterizerVulkan::Clear() { | ||||||
|  | @ -395,73 +236,8 @@ void RasterizerVulkan::Clear() { | ||||||
|     }); |     }); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | void RasterizerVulkan::DispatchCompute() { | ||||||
|     MICROPROFILE_SCOPE(Vulkan_Compute); |     UNREACHABLE_MSG("Not implemented"); | ||||||
| 
 |  | ||||||
|     query_cache.UpdateCounters(); |  | ||||||
| 
 |  | ||||||
|     const auto& launch_desc = kepler_compute.launch_description; |  | ||||||
|     auto& pipeline = pipeline_cache.GetComputePipeline({ |  | ||||||
|         .shader = code_addr, |  | ||||||
|         .shared_memory_size = launch_desc.shared_alloc, |  | ||||||
|         .workgroup_size{ |  | ||||||
|             launch_desc.block_dim_x, |  | ||||||
|             launch_desc.block_dim_y, |  | ||||||
|             launch_desc.block_dim_z, |  | ||||||
|         }, |  | ||||||
|     }); |  | ||||||
| 
 |  | ||||||
|     // Compute dispatches can't be executed inside a renderpass
 |  | ||||||
|     scheduler.RequestOutsideRenderPassOperationContext(); |  | ||||||
| 
 |  | ||||||
|     image_view_indices.clear(); |  | ||||||
|     sampler_handles.clear(); |  | ||||||
| 
 |  | ||||||
|     std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; |  | ||||||
| 
 |  | ||||||
|     const auto& entries = pipeline.GetEntries(); |  | ||||||
|     buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); |  | ||||||
|     buffer_cache.UnbindComputeStorageBuffers(); |  | ||||||
|     u32 ssbo_index = 0; |  | ||||||
|     for (const auto& buffer : entries.global_buffers) { |  | ||||||
|         buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, |  | ||||||
|                                               buffer.is_written); |  | ||||||
|         ++ssbo_index; |  | ||||||
|     } |  | ||||||
|     buffer_cache.UpdateComputeBuffers(); |  | ||||||
| 
 |  | ||||||
|     texture_cache.SynchronizeComputeDescriptors(); |  | ||||||
| 
 |  | ||||||
|     SetupComputeUniformTexels(entries); |  | ||||||
|     SetupComputeTextures(entries); |  | ||||||
|     SetupComputeStorageTexels(entries); |  | ||||||
|     SetupComputeImages(entries); |  | ||||||
| 
 |  | ||||||
|     const std::span indices_span(image_view_indices.data(), image_view_indices.size()); |  | ||||||
|     texture_cache.FillComputeImageViews(indices_span, image_view_ids); |  | ||||||
| 
 |  | ||||||
|     update_descriptor_queue.Acquire(); |  | ||||||
| 
 |  | ||||||
|     buffer_cache.BindHostComputeBuffers(); |  | ||||||
| 
 |  | ||||||
|     ImageViewId* image_view_id_ptr = image_view_ids.data(); |  | ||||||
|     VkSampler* sampler_ptr = sampler_handles.data(); |  | ||||||
|     PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, |  | ||||||
|                          sampler_ptr); |  | ||||||
| 
 |  | ||||||
|     const VkPipeline pipeline_handle = pipeline.GetHandle(); |  | ||||||
|     const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); |  | ||||||
|     const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet(); |  | ||||||
|     scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, |  | ||||||
|                       grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout, |  | ||||||
|                       descriptor_set](vk::CommandBuffer cmdbuf) { |  | ||||||
|         cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); |  | ||||||
|         if (descriptor_set) { |  | ||||||
|             cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, |  | ||||||
|                                       DESCRIPTOR_SET, descriptor_set, nullptr); |  | ||||||
|         } |  | ||||||
|         cmdbuf.Dispatch(grid_x, grid_y, grid_z); |  | ||||||
|     }); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { | void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { | ||||||
|  | @ -716,52 +492,6 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 | ||||||
|     return buffer_cache.DMACopy(src_address, dest_address, amount); |     return buffer_cache.DMACopy(src_address, dest_address, amount); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerVulkan::SetupShaderDescriptors( |  | ||||||
|     const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) { |  | ||||||
|     image_view_indices.clear(); |  | ||||||
|     sampler_handles.clear(); |  | ||||||
|     for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { |  | ||||||
|         Shader* const shader = shaders[stage + 1]; |  | ||||||
|         if (!shader) { |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
|         const ShaderEntries& entries = shader->GetEntries(); |  | ||||||
|         SetupGraphicsUniformTexels(entries, stage); |  | ||||||
|         SetupGraphicsTextures(entries, stage); |  | ||||||
|         SetupGraphicsStorageTexels(entries, stage); |  | ||||||
|         SetupGraphicsImages(entries, stage); |  | ||||||
| 
 |  | ||||||
|         buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers); |  | ||||||
|         buffer_cache.UnbindGraphicsStorageBuffers(stage); |  | ||||||
|         u32 ssbo_index = 0; |  | ||||||
|         for (const auto& buffer : entries.global_buffers) { |  | ||||||
|             buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, |  | ||||||
|                                                    buffer.cbuf_offset, buffer.is_written); |  | ||||||
|             ++ssbo_index; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     const std::span indices_span(image_view_indices.data(), image_view_indices.size()); |  | ||||||
|     buffer_cache.UpdateGraphicsBuffers(is_indexed); |  | ||||||
|     texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); |  | ||||||
| 
 |  | ||||||
|     buffer_cache.BindHostGeometryBuffers(is_indexed); |  | ||||||
| 
 |  | ||||||
|     update_descriptor_queue.Acquire(); |  | ||||||
| 
 |  | ||||||
|     ImageViewId* image_view_id_ptr = image_view_ids.data(); |  | ||||||
|     VkSampler* sampler_ptr = sampler_handles.data(); |  | ||||||
|     for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { |  | ||||||
|         // Skip VertexA stage
 |  | ||||||
|         Shader* const shader = shaders[stage + 1]; |  | ||||||
|         if (!shader) { |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
|         buffer_cache.BindHostStageBuffers(stage); |  | ||||||
|         PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue, |  | ||||||
|                              image_view_id_ptr, sampler_ptr); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void RasterizerVulkan::UpdateDynamicStates() { | void RasterizerVulkan::UpdateDynamicStates() { | ||||||
|     auto& regs = maxwell3d.regs; |     auto& regs = maxwell3d.regs; | ||||||
|     UpdateViewportsState(regs); |     UpdateViewportsState(regs); | ||||||
|  | @ -810,89 +540,6 @@ void RasterizerVulkan::EndTransformFeedback() { | ||||||
|         [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); |         [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) { |  | ||||||
|     const auto& regs = maxwell3d.regs; |  | ||||||
|     const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; |  | ||||||
|     for (const auto& entry : entries.uniform_texels) { |  | ||||||
|         const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); |  | ||||||
|         image_view_indices.push_back(handle.image); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) { |  | ||||||
|     const auto& regs = maxwell3d.regs; |  | ||||||
|     const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; |  | ||||||
|     for (const auto& entry : entries.samplers) { |  | ||||||
|         for (size_t index = 0; index < entry.size; ++index) { |  | ||||||
|             const TextureHandle handle = |  | ||||||
|                 GetTextureInfo(maxwell3d, via_header_index, entry, stage, index); |  | ||||||
|             image_view_indices.push_back(handle.image); |  | ||||||
| 
 |  | ||||||
|             Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); |  | ||||||
|             sampler_handles.push_back(sampler->Handle()); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) { |  | ||||||
|     const auto& regs = maxwell3d.regs; |  | ||||||
|     const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; |  | ||||||
|     for (const auto& entry : entries.storage_texels) { |  | ||||||
|         const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); |  | ||||||
|         image_view_indices.push_back(handle.image); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) { |  | ||||||
|     const auto& regs = maxwell3d.regs; |  | ||||||
|     const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; |  | ||||||
|     for (const auto& entry : entries.images) { |  | ||||||
|         const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); |  | ||||||
|         image_view_indices.push_back(handle.image); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { |  | ||||||
|     const bool via_header_index = kepler_compute.launch_description.linked_tsc; |  | ||||||
|     for (const auto& entry : entries.uniform_texels) { |  | ||||||
|         const TextureHandle handle = |  | ||||||
|             GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); |  | ||||||
|         image_view_indices.push_back(handle.image); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { |  | ||||||
|     const bool via_header_index = kepler_compute.launch_description.linked_tsc; |  | ||||||
|     for (const auto& entry : entries.samplers) { |  | ||||||
|         for (size_t index = 0; index < entry.size; ++index) { |  | ||||||
|             const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry, |  | ||||||
|                                                         COMPUTE_SHADER_INDEX, index); |  | ||||||
|             image_view_indices.push_back(handle.image); |  | ||||||
| 
 |  | ||||||
|             Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); |  | ||||||
|             sampler_handles.push_back(sampler->Handle()); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { |  | ||||||
|     const bool via_header_index = kepler_compute.launch_description.linked_tsc; |  | ||||||
|     for (const auto& entry : entries.storage_texels) { |  | ||||||
|         const TextureHandle handle = |  | ||||||
|             GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); |  | ||||||
|         image_view_indices.push_back(handle.image); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { |  | ||||||
|     const bool via_header_index = kepler_compute.launch_description.linked_tsc; |  | ||||||
|     for (const auto& entry : entries.images) { |  | ||||||
|         const TextureHandle handle = |  | ||||||
|             GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); |  | ||||||
|         image_view_indices.push_back(handle.image); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { | void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { | ||||||
|     if (!state_tracker.TouchViewports()) { |     if (!state_tracker.TouchViewports()) { | ||||||
|         return; |         return; | ||||||
|  |  | ||||||
|  | @ -28,7 +28,6 @@ | ||||||
| #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | ||||||
| #include "video_core/renderer_vulkan/vk_texture_cache.h" | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||||||
| #include "video_core/renderer_vulkan/vk_update_descriptor.h" | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||||||
| #include "video_core/shader/async_shaders.h" |  | ||||||
| #include "video_core/vulkan_common/vulkan_memory_allocator.h" | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | ||||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||||
| 
 | 
 | ||||||
|  | @ -73,7 +72,7 @@ public: | ||||||
| 
 | 
 | ||||||
|     void Draw(bool is_indexed, bool is_instanced) override; |     void Draw(bool is_indexed, bool is_instanced) override; | ||||||
|     void Clear() override; |     void Clear() override; | ||||||
|     void DispatchCompute(GPUVAddr code_addr) override; |     void DispatchCompute() override; | ||||||
|     void ResetCounter(VideoCore::QueryType type) override; |     void ResetCounter(VideoCore::QueryType type) override; | ||||||
|     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; |     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | ||||||
|     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; |     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | ||||||
|  | @ -103,19 +102,6 @@ public: | ||||||
|     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, |     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | ||||||
|                            u32 pixel_stride) override; |                            u32 pixel_stride) override; | ||||||
| 
 | 
 | ||||||
|     VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { |  | ||||||
|         return async_shaders; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { |  | ||||||
|         return async_shaders; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     /// Maximum supported size that a constbuffer can have in bytes.
 |  | ||||||
|     static constexpr size_t MaxConstbufferSize = 0x10000; |  | ||||||
|     static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, |  | ||||||
|                   "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); |  | ||||||
| 
 |  | ||||||
| private: | private: | ||||||
|     static constexpr size_t MAX_TEXTURES = 192; |     static constexpr size_t MAX_TEXTURES = 192; | ||||||
|     static constexpr size_t MAX_IMAGES = 48; |     static constexpr size_t MAX_IMAGES = 48; | ||||||
|  | @ -125,40 +111,12 @@ private: | ||||||
| 
 | 
 | ||||||
|     void FlushWork(); |     void FlushWork(); | ||||||
| 
 | 
 | ||||||
|     /// Setup descriptors in the graphics pipeline.
 |  | ||||||
|     void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, |  | ||||||
|                                 bool is_indexed); |  | ||||||
| 
 |  | ||||||
|     void UpdateDynamicStates(); |     void UpdateDynamicStates(); | ||||||
| 
 | 
 | ||||||
|     void BeginTransformFeedback(); |     void BeginTransformFeedback(); | ||||||
| 
 | 
 | ||||||
|     void EndTransformFeedback(); |     void EndTransformFeedback(); | ||||||
| 
 | 
 | ||||||
|     /// Setup uniform texels in the graphics pipeline.
 |  | ||||||
|     void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage); |  | ||||||
| 
 |  | ||||||
|     /// Setup textures in the graphics pipeline.
 |  | ||||||
|     void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage); |  | ||||||
| 
 |  | ||||||
|     /// Setup storage texels in the graphics pipeline.
 |  | ||||||
|     void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage); |  | ||||||
| 
 |  | ||||||
|     /// Setup images in the graphics pipeline.
 |  | ||||||
|     void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); |  | ||||||
| 
 |  | ||||||
|     /// Setup texel buffers in the compute pipeline.
 |  | ||||||
|     void SetupComputeUniformTexels(const ShaderEntries& entries); |  | ||||||
| 
 |  | ||||||
|     /// Setup textures in the compute pipeline.
 |  | ||||||
|     void SetupComputeTextures(const ShaderEntries& entries); |  | ||||||
| 
 |  | ||||||
|     /// Setup storage texels in the compute pipeline.
 |  | ||||||
|     void SetupComputeStorageTexels(const ShaderEntries& entries); |  | ||||||
| 
 |  | ||||||
|     /// Setup images in the compute pipeline.
 |  | ||||||
|     void SetupComputeImages(const ShaderEntries& entries); |  | ||||||
| 
 |  | ||||||
|     void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); |     void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); | ||||||
|     void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); |     void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); | ||||||
|     void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); |     void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); | ||||||
|  | @ -198,13 +156,12 @@ private: | ||||||
|     TextureCache texture_cache; |     TextureCache texture_cache; | ||||||
|     BufferCacheRuntime buffer_cache_runtime; |     BufferCacheRuntime buffer_cache_runtime; | ||||||
|     BufferCache buffer_cache; |     BufferCache buffer_cache; | ||||||
|     VKPipelineCache pipeline_cache; |     PipelineCache pipeline_cache; | ||||||
|     VKQueryCache query_cache; |     VKQueryCache query_cache; | ||||||
|     AccelerateDMA accelerate_dma; |     AccelerateDMA accelerate_dma; | ||||||
|     VKFenceManager fence_manager; |     VKFenceManager fence_manager; | ||||||
| 
 | 
 | ||||||
|     vk::Event wfi_event; |     vk::Event wfi_event; | ||||||
|     VideoCommon::Shader::AsyncShaders async_shaders; |  | ||||||
| 
 | 
 | ||||||
|     boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; |     boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; | ||||||
|     std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; |     std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; | ||||||
|  |  | ||||||
|  | @ -1,752 +0,0 @@ | ||||||
| // Copyright 2019 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include <string> |  | ||||||
| #include <string_view> |  | ||||||
| 
 |  | ||||||
| #include <fmt/format.h> |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/shader/ast.h" |  | ||||||
| #include "video_core/shader/expr.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| ASTZipper::ASTZipper() = default; |  | ||||||
| 
 |  | ||||||
| void ASTZipper::Init(const ASTNode new_first, const ASTNode parent) { |  | ||||||
|     ASSERT(new_first->manager == nullptr); |  | ||||||
|     first = new_first; |  | ||||||
|     last = new_first; |  | ||||||
| 
 |  | ||||||
|     ASTNode current = first; |  | ||||||
|     while (current) { |  | ||||||
|         current->manager = this; |  | ||||||
|         current->parent = parent; |  | ||||||
|         last = current; |  | ||||||
|         current = current->next; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ASTZipper::PushBack(const ASTNode new_node) { |  | ||||||
|     ASSERT(new_node->manager == nullptr); |  | ||||||
|     new_node->previous = last; |  | ||||||
|     if (last) { |  | ||||||
|         last->next = new_node; |  | ||||||
|     } |  | ||||||
|     new_node->next.reset(); |  | ||||||
|     last = new_node; |  | ||||||
|     if (!first) { |  | ||||||
|         first = new_node; |  | ||||||
|     } |  | ||||||
|     new_node->manager = this; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ASTZipper::PushFront(const ASTNode new_node) { |  | ||||||
|     ASSERT(new_node->manager == nullptr); |  | ||||||
|     new_node->previous.reset(); |  | ||||||
|     new_node->next = first; |  | ||||||
|     if (first) { |  | ||||||
|         first->previous = new_node; |  | ||||||
|     } |  | ||||||
|     if (last == first) { |  | ||||||
|         last = new_node; |  | ||||||
|     } |  | ||||||
|     first = new_node; |  | ||||||
|     new_node->manager = this; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ASTZipper::InsertAfter(const ASTNode new_node, const ASTNode at_node) { |  | ||||||
|     ASSERT(new_node->manager == nullptr); |  | ||||||
|     if (!at_node) { |  | ||||||
|         PushFront(new_node); |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|     const ASTNode next = at_node->next; |  | ||||||
|     if (next) { |  | ||||||
|         next->previous = new_node; |  | ||||||
|     } |  | ||||||
|     new_node->previous = at_node; |  | ||||||
|     if (at_node == last) { |  | ||||||
|         last = new_node; |  | ||||||
|     } |  | ||||||
|     new_node->next = next; |  | ||||||
|     at_node->next = new_node; |  | ||||||
|     new_node->manager = this; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ASTZipper::InsertBefore(const ASTNode new_node, const ASTNode at_node) { |  | ||||||
|     ASSERT(new_node->manager == nullptr); |  | ||||||
|     if (!at_node) { |  | ||||||
|         PushBack(new_node); |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|     const ASTNode previous = at_node->previous; |  | ||||||
|     if (previous) { |  | ||||||
|         previous->next = new_node; |  | ||||||
|     } |  | ||||||
|     new_node->next = at_node; |  | ||||||
|     if (at_node == first) { |  | ||||||
|         first = new_node; |  | ||||||
|     } |  | ||||||
|     new_node->previous = previous; |  | ||||||
|     at_node->previous = new_node; |  | ||||||
|     new_node->manager = this; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ASTZipper::DetachTail(ASTNode node) { |  | ||||||
|     ASSERT(node->manager == this); |  | ||||||
|     if (node == first) { |  | ||||||
|         first.reset(); |  | ||||||
|         last.reset(); |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     last = node->previous; |  | ||||||
|     last->next.reset(); |  | ||||||
|     node->previous.reset(); |  | ||||||
| 
 |  | ||||||
|     ASTNode current = std::move(node); |  | ||||||
|     while (current) { |  | ||||||
|         current->manager = nullptr; |  | ||||||
|         current->parent.reset(); |  | ||||||
|         current = current->next; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ASTZipper::DetachSegment(const ASTNode start, const ASTNode end) { |  | ||||||
|     ASSERT(start->manager == this && end->manager == this); |  | ||||||
|     if (start == end) { |  | ||||||
|         DetachSingle(start); |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|     const ASTNode prev = start->previous; |  | ||||||
|     const ASTNode post = end->next; |  | ||||||
|     if (!prev) { |  | ||||||
|         first = post; |  | ||||||
|     } else { |  | ||||||
|         prev->next = post; |  | ||||||
|     } |  | ||||||
|     if (!post) { |  | ||||||
|         last = prev; |  | ||||||
|     } else { |  | ||||||
|         post->previous = prev; |  | ||||||
|     } |  | ||||||
|     start->previous.reset(); |  | ||||||
|     end->next.reset(); |  | ||||||
|     ASTNode current = start; |  | ||||||
|     bool found = false; |  | ||||||
|     while (current) { |  | ||||||
|         current->manager = nullptr; |  | ||||||
|         current->parent.reset(); |  | ||||||
|         found |= current == end; |  | ||||||
|         current = current->next; |  | ||||||
|     } |  | ||||||
|     ASSERT(found); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ASTZipper::DetachSingle(const ASTNode node) { |  | ||||||
|     ASSERT(node->manager == this); |  | ||||||
|     const ASTNode prev = node->previous; |  | ||||||
|     const ASTNode post = node->next; |  | ||||||
|     node->previous.reset(); |  | ||||||
|     node->next.reset(); |  | ||||||
|     if (!prev) { |  | ||||||
|         first = post; |  | ||||||
|     } else { |  | ||||||
|         prev->next = post; |  | ||||||
|     } |  | ||||||
|     if (!post) { |  | ||||||
|         last = prev; |  | ||||||
|     } else { |  | ||||||
|         post->previous = prev; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     node->manager = nullptr; |  | ||||||
|     node->parent.reset(); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ASTZipper::Remove(const ASTNode node) { |  | ||||||
|     ASSERT(node->manager == this); |  | ||||||
|     const ASTNode next = node->next; |  | ||||||
|     const ASTNode previous = node->previous; |  | ||||||
|     if (previous) { |  | ||||||
|         previous->next = next; |  | ||||||
|     } |  | ||||||
|     if (next) { |  | ||||||
|         next->previous = previous; |  | ||||||
|     } |  | ||||||
|     node->parent.reset(); |  | ||||||
|     node->manager = nullptr; |  | ||||||
|     if (node == last) { |  | ||||||
|         last = previous; |  | ||||||
|     } |  | ||||||
|     if (node == first) { |  | ||||||
|         first = next; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| class ExprPrinter final { |  | ||||||
| public: |  | ||||||
|     void operator()(const ExprAnd& expr) { |  | ||||||
|         inner += "( "; |  | ||||||
|         std::visit(*this, *expr.operand1); |  | ||||||
|         inner += " && "; |  | ||||||
|         std::visit(*this, *expr.operand2); |  | ||||||
|         inner += ')'; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void operator()(const ExprOr& expr) { |  | ||||||
|         inner += "( "; |  | ||||||
|         std::visit(*this, *expr.operand1); |  | ||||||
|         inner += " || "; |  | ||||||
|         std::visit(*this, *expr.operand2); |  | ||||||
|         inner += ')'; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void operator()(const ExprNot& expr) { |  | ||||||
|         inner += "!"; |  | ||||||
|         std::visit(*this, *expr.operand1); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void operator()(const ExprPredicate& expr) { |  | ||||||
|         inner += fmt::format("P{}", expr.predicate); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void operator()(const ExprCondCode& expr) { |  | ||||||
|         inner += fmt::format("CC{}", expr.cc); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void operator()(const ExprVar& expr) { |  | ||||||
|         inner += fmt::format("V{}", expr.var_index); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void operator()(const ExprBoolean& expr) { |  | ||||||
|         inner += expr.value ? "true" : "false"; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void operator()(const ExprGprEqual& expr) { |  | ||||||
|         inner += fmt::format("(gpr_{} == {})", expr.gpr, expr.value); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const std::string& GetResult() const { |  | ||||||
|         return inner; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     std::string inner; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| class ASTPrinter { |  | ||||||
| public: |  | ||||||
|     void operator()(const ASTProgram& ast) { |  | ||||||
|         scope++; |  | ||||||
|         inner += "program {\n"; |  | ||||||
|         ASTNode current = ast.nodes.GetFirst(); |  | ||||||
|         while (current) { |  | ||||||
|             Visit(current); |  | ||||||
|             current = current->GetNext(); |  | ||||||
|         } |  | ||||||
|         inner += "}\n"; |  | ||||||
|         scope--; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void operator()(const ASTIfThen& ast) { |  | ||||||
|         ExprPrinter expr_parser{}; |  | ||||||
|         std::visit(expr_parser, *ast.condition); |  | ||||||
|         inner += fmt::format("{}if ({}) {{\n", Indent(), expr_parser.GetResult()); |  | ||||||
|         scope++; |  | ||||||
|         ASTNode current = ast.nodes.GetFirst(); |  | ||||||
|         while (current) { |  | ||||||
|             Visit(current); |  | ||||||
|             current = current->GetNext(); |  | ||||||
|         } |  | ||||||
|         scope--; |  | ||||||
|         inner += fmt::format("{}}}\n", Indent()); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void operator()(const ASTIfElse& ast) { |  | ||||||
|         inner += Indent(); |  | ||||||
|         inner += "else {\n"; |  | ||||||
| 
 |  | ||||||
|         scope++; |  | ||||||
|         ASTNode current = ast.nodes.GetFirst(); |  | ||||||
|         while (current) { |  | ||||||
|             Visit(current); |  | ||||||
|             current = current->GetNext(); |  | ||||||
|         } |  | ||||||
|         scope--; |  | ||||||
| 
 |  | ||||||
|         inner += Indent(); |  | ||||||
|         inner += "}\n"; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void operator()(const ASTBlockEncoded& ast) { |  | ||||||
|         inner += fmt::format("{}Block({}, {});\n", Indent(), ast.start, ast.end); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void operator()([[maybe_unused]] const ASTBlockDecoded& ast) { |  | ||||||
|         inner += Indent(); |  | ||||||
|         inner += "Block;\n"; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void operator()(const ASTVarSet& ast) { |  | ||||||
|         ExprPrinter expr_parser{}; |  | ||||||
|         std::visit(expr_parser, *ast.condition); |  | ||||||
|         inner += fmt::format("{}V{} := {};\n", Indent(), ast.index, expr_parser.GetResult()); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void operator()(const ASTLabel& ast) { |  | ||||||
|         inner += fmt::format("Label_{}:\n", ast.index); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void operator()(const ASTGoto& ast) { |  | ||||||
|         ExprPrinter expr_parser{}; |  | ||||||
|         std::visit(expr_parser, *ast.condition); |  | ||||||
|         inner += |  | ||||||
|             fmt::format("{}({}) -> goto Label_{};\n", Indent(), expr_parser.GetResult(), ast.label); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void operator()(const ASTDoWhile& ast) { |  | ||||||
|         ExprPrinter expr_parser{}; |  | ||||||
|         std::visit(expr_parser, *ast.condition); |  | ||||||
|         inner += fmt::format("{}do {{\n", Indent()); |  | ||||||
|         scope++; |  | ||||||
|         ASTNode current = ast.nodes.GetFirst(); |  | ||||||
|         while (current) { |  | ||||||
|             Visit(current); |  | ||||||
|             current = current->GetNext(); |  | ||||||
|         } |  | ||||||
|         scope--; |  | ||||||
|         inner += fmt::format("{}}} while ({});\n", Indent(), expr_parser.GetResult()); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void operator()(const ASTReturn& ast) { |  | ||||||
|         ExprPrinter expr_parser{}; |  | ||||||
|         std::visit(expr_parser, *ast.condition); |  | ||||||
|         inner += fmt::format("{}({}) -> {};\n", Indent(), expr_parser.GetResult(), |  | ||||||
|                              ast.kills ? "discard" : "exit"); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void operator()(const ASTBreak& ast) { |  | ||||||
|         ExprPrinter expr_parser{}; |  | ||||||
|         std::visit(expr_parser, *ast.condition); |  | ||||||
|         inner += fmt::format("{}({}) -> break;\n", Indent(), expr_parser.GetResult()); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void Visit(const ASTNode& node) { |  | ||||||
|         std::visit(*this, *node->GetInnerData()); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const std::string& GetResult() const { |  | ||||||
|         return inner; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     std::string_view Indent() { |  | ||||||
|         if (space_segment_scope == scope) { |  | ||||||
|             return space_segment; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         // Ensure that we don't exceed our view.
 |  | ||||||
|         ASSERT(scope * 2 < spaces.size()); |  | ||||||
| 
 |  | ||||||
|         space_segment = spaces.substr(0, scope * 2); |  | ||||||
|         space_segment_scope = scope; |  | ||||||
|         return space_segment; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     std::string inner{}; |  | ||||||
|     std::string_view space_segment; |  | ||||||
| 
 |  | ||||||
|     u32 scope{}; |  | ||||||
|     u32 space_segment_scope{}; |  | ||||||
| 
 |  | ||||||
|     static constexpr std::string_view spaces{"                                    "}; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| std::string ASTManager::Print() const { |  | ||||||
|     ASTPrinter printer{}; |  | ||||||
|     printer.Visit(main_node); |  | ||||||
|     return printer.GetResult(); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| ASTManager::ASTManager(bool do_full_decompile, bool disable_else_derivation_) |  | ||||||
|     : full_decompile{do_full_decompile}, disable_else_derivation{disable_else_derivation_} {} |  | ||||||
| 
 |  | ||||||
| ASTManager::~ASTManager() { |  | ||||||
|     Clear(); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ASTManager::Init() { |  | ||||||
|     main_node = ASTBase::Make<ASTProgram>(ASTNode{}); |  | ||||||
|     program = std::get_if<ASTProgram>(main_node->GetInnerData()); |  | ||||||
|     false_condition = MakeExpr<ExprBoolean>(false); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ASTManager::DeclareLabel(u32 address) { |  | ||||||
|     const auto pair = labels_map.emplace(address, labels_count); |  | ||||||
|     if (pair.second) { |  | ||||||
|         labels_count++; |  | ||||||
|         labels.resize(labels_count); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ASTManager::InsertLabel(u32 address) { |  | ||||||
|     const u32 index = labels_map[address]; |  | ||||||
|     const ASTNode label = ASTBase::Make<ASTLabel>(main_node, index); |  | ||||||
|     labels[index] = label; |  | ||||||
|     program->nodes.PushBack(label); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ASTManager::InsertGoto(Expr condition, u32 address) { |  | ||||||
|     const u32 index = labels_map[address]; |  | ||||||
|     const ASTNode goto_node = ASTBase::Make<ASTGoto>(main_node, std::move(condition), index); |  | ||||||
|     gotos.push_back(goto_node); |  | ||||||
|     program->nodes.PushBack(goto_node); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ASTManager::InsertBlock(u32 start_address, u32 end_address) { |  | ||||||
|     ASTNode block = ASTBase::Make<ASTBlockEncoded>(main_node, start_address, end_address); |  | ||||||
|     program->nodes.PushBack(std::move(block)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ASTManager::InsertReturn(Expr condition, bool kills) { |  | ||||||
|     ASTNode node = ASTBase::Make<ASTReturn>(main_node, std::move(condition), kills); |  | ||||||
|     program->nodes.PushBack(std::move(node)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| // The decompile algorithm is based on
 |  | ||||||
| // "Taming control flow: A structured approach to eliminating goto statements"
 |  | ||||||
| // by AM Erosa, LJ Hendren 1994. In general, the idea is to get gotos to be
 |  | ||||||
| // on the same structured level as the label which they jump to. This is done,
 |  | ||||||
| // through outward/inward movements and lifting. Once they are at the same
 |  | ||||||
| // level, you can enclose them in an "if" structure or a "do-while" structure.
 |  | ||||||
| void ASTManager::Decompile() { |  | ||||||
|     auto it = gotos.begin(); |  | ||||||
|     while (it != gotos.end()) { |  | ||||||
|         const ASTNode goto_node = *it; |  | ||||||
|         const auto label_index = goto_node->GetGotoLabel(); |  | ||||||
|         if (!label_index) { |  | ||||||
|             return; |  | ||||||
|         } |  | ||||||
|         const ASTNode label = labels[*label_index]; |  | ||||||
|         if (!full_decompile) { |  | ||||||
|             // We only decompile backward jumps
 |  | ||||||
|             if (!IsBackwardsJump(goto_node, label)) { |  | ||||||
|                 it++; |  | ||||||
|                 continue; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|         if (IndirectlyRelated(goto_node, label)) { |  | ||||||
|             while (!DirectlyRelated(goto_node, label)) { |  | ||||||
|                 MoveOutward(goto_node); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|         if (DirectlyRelated(goto_node, label)) { |  | ||||||
|             u32 goto_level = goto_node->GetLevel(); |  | ||||||
|             const u32 label_level = label->GetLevel(); |  | ||||||
|             while (label_level < goto_level) { |  | ||||||
|                 MoveOutward(goto_node); |  | ||||||
|                 goto_level--; |  | ||||||
|             } |  | ||||||
|             // TODO(Blinkhawk): Implement Lifting and Inward Movements
 |  | ||||||
|         } |  | ||||||
|         if (label->GetParent() == goto_node->GetParent()) { |  | ||||||
|             bool is_loop = false; |  | ||||||
|             ASTNode current = goto_node->GetPrevious(); |  | ||||||
|             while (current) { |  | ||||||
|                 if (current == label) { |  | ||||||
|                     is_loop = true; |  | ||||||
|                     break; |  | ||||||
|                 } |  | ||||||
|                 current = current->GetPrevious(); |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             if (is_loop) { |  | ||||||
|                 EncloseDoWhile(goto_node, label); |  | ||||||
|             } else { |  | ||||||
|                 EncloseIfThen(goto_node, label); |  | ||||||
|             } |  | ||||||
|             it = gotos.erase(it); |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
|         it++; |  | ||||||
|     } |  | ||||||
|     if (full_decompile) { |  | ||||||
|         for (const ASTNode& label : labels) { |  | ||||||
|             auto& manager = label->GetManager(); |  | ||||||
|             manager.Remove(label); |  | ||||||
|         } |  | ||||||
|         labels.clear(); |  | ||||||
|     } else { |  | ||||||
|         auto label_it = labels.begin(); |  | ||||||
|         while (label_it != labels.end()) { |  | ||||||
|             bool can_remove = true; |  | ||||||
|             ASTNode label = *label_it; |  | ||||||
|             for (const ASTNode& goto_node : gotos) { |  | ||||||
|                 const auto label_index = goto_node->GetGotoLabel(); |  | ||||||
|                 if (!label_index) { |  | ||||||
|                     return; |  | ||||||
|                 } |  | ||||||
|                 ASTNode& glabel = labels[*label_index]; |  | ||||||
|                 if (glabel == label) { |  | ||||||
|                     can_remove = false; |  | ||||||
|                     break; |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|             if (can_remove) { |  | ||||||
|                 label->MarkLabelUnused(); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool ASTManager::IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const { |  | ||||||
|     u32 goto_level = goto_node->GetLevel(); |  | ||||||
|     u32 label_level = label_node->GetLevel(); |  | ||||||
|     while (goto_level > label_level) { |  | ||||||
|         goto_level--; |  | ||||||
|         goto_node = goto_node->GetParent(); |  | ||||||
|     } |  | ||||||
|     while (label_level > goto_level) { |  | ||||||
|         label_level--; |  | ||||||
|         label_node = label_node->GetParent(); |  | ||||||
|     } |  | ||||||
|     while (goto_node->GetParent() != label_node->GetParent()) { |  | ||||||
|         goto_node = goto_node->GetParent(); |  | ||||||
|         label_node = label_node->GetParent(); |  | ||||||
|     } |  | ||||||
|     ASTNode current = goto_node->GetPrevious(); |  | ||||||
|     while (current) { |  | ||||||
|         if (current == label_node) { |  | ||||||
|             return true; |  | ||||||
|         } |  | ||||||
|         current = current->GetPrevious(); |  | ||||||
|     } |  | ||||||
|     return false; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool ASTManager::IndirectlyRelated(const ASTNode& first, const ASTNode& second) const { |  | ||||||
|     return !(first->GetParent() == second->GetParent() || DirectlyRelated(first, second)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool ASTManager::DirectlyRelated(const ASTNode& first, const ASTNode& second) const { |  | ||||||
|     if (first->GetParent() == second->GetParent()) { |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
|     const u32 first_level = first->GetLevel(); |  | ||||||
|     const u32 second_level = second->GetLevel(); |  | ||||||
|     u32 min_level; |  | ||||||
|     u32 max_level; |  | ||||||
|     ASTNode max; |  | ||||||
|     ASTNode min; |  | ||||||
|     if (first_level > second_level) { |  | ||||||
|         min_level = second_level; |  | ||||||
|         min = second; |  | ||||||
|         max_level = first_level; |  | ||||||
|         max = first; |  | ||||||
|     } else { |  | ||||||
|         min_level = first_level; |  | ||||||
|         min = first; |  | ||||||
|         max_level = second_level; |  | ||||||
|         max = second; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     while (max_level > min_level) { |  | ||||||
|         max_level--; |  | ||||||
|         max = max->GetParent(); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return min->GetParent() == max->GetParent(); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ASTManager::ShowCurrentState(std::string_view state) const { |  | ||||||
|     LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print()); |  | ||||||
|     SanityCheck(); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ASTManager::SanityCheck() const { |  | ||||||
|     for (const auto& label : labels) { |  | ||||||
|         if (!label->GetParent()) { |  | ||||||
|             LOG_CRITICAL(HW_GPU, "Sanity Check Failed"); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ASTManager::EncloseDoWhile(ASTNode goto_node, ASTNode label) { |  | ||||||
|     ASTZipper& zipper = goto_node->GetManager(); |  | ||||||
|     const ASTNode loop_start = label->GetNext(); |  | ||||||
|     if (loop_start == goto_node) { |  | ||||||
|         zipper.Remove(goto_node); |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|     const ASTNode parent = label->GetParent(); |  | ||||||
|     const Expr condition = goto_node->GetGotoCondition(); |  | ||||||
|     zipper.DetachSegment(loop_start, goto_node); |  | ||||||
|     const ASTNode do_while_node = ASTBase::Make<ASTDoWhile>(parent, condition); |  | ||||||
|     ASTZipper* sub_zipper = do_while_node->GetSubNodes(); |  | ||||||
|     sub_zipper->Init(loop_start, do_while_node); |  | ||||||
|     zipper.InsertAfter(do_while_node, label); |  | ||||||
|     sub_zipper->Remove(goto_node); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ASTManager::EncloseIfThen(ASTNode goto_node, ASTNode label) { |  | ||||||
|     ASTZipper& zipper = goto_node->GetManager(); |  | ||||||
|     const ASTNode if_end = label->GetPrevious(); |  | ||||||
|     if (if_end == goto_node) { |  | ||||||
|         zipper.Remove(goto_node); |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|     const ASTNode prev = goto_node->GetPrevious(); |  | ||||||
|     const Expr condition = goto_node->GetGotoCondition(); |  | ||||||
|     bool do_else = false; |  | ||||||
|     if (!disable_else_derivation && prev->IsIfThen()) { |  | ||||||
|         const Expr if_condition = prev->GetIfCondition(); |  | ||||||
|         do_else = ExprAreEqual(if_condition, condition); |  | ||||||
|     } |  | ||||||
|     const ASTNode parent = label->GetParent(); |  | ||||||
|     zipper.DetachSegment(goto_node, if_end); |  | ||||||
|     ASTNode if_node; |  | ||||||
|     if (do_else) { |  | ||||||
|         if_node = ASTBase::Make<ASTIfElse>(parent); |  | ||||||
|     } else { |  | ||||||
|         Expr neg_condition = MakeExprNot(condition); |  | ||||||
|         if_node = ASTBase::Make<ASTIfThen>(parent, neg_condition); |  | ||||||
|     } |  | ||||||
|     ASTZipper* sub_zipper = if_node->GetSubNodes(); |  | ||||||
|     sub_zipper->Init(goto_node, if_node); |  | ||||||
|     zipper.InsertAfter(if_node, prev); |  | ||||||
|     sub_zipper->Remove(goto_node); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ASTManager::MoveOutward(ASTNode goto_node) { |  | ||||||
|     ASTZipper& zipper = goto_node->GetManager(); |  | ||||||
|     const ASTNode parent = goto_node->GetParent(); |  | ||||||
|     ASTZipper& zipper2 = parent->GetManager(); |  | ||||||
|     const ASTNode grandpa = parent->GetParent(); |  | ||||||
|     const bool is_loop = parent->IsLoop(); |  | ||||||
|     const bool is_else = parent->IsIfElse(); |  | ||||||
|     const bool is_if = parent->IsIfThen(); |  | ||||||
| 
 |  | ||||||
|     const ASTNode prev = goto_node->GetPrevious(); |  | ||||||
|     const ASTNode post = goto_node->GetNext(); |  | ||||||
| 
 |  | ||||||
|     const Expr condition = goto_node->GetGotoCondition(); |  | ||||||
|     zipper.DetachSingle(goto_node); |  | ||||||
|     if (is_loop) { |  | ||||||
|         const u32 var_index = NewVariable(); |  | ||||||
|         const Expr var_condition = MakeExpr<ExprVar>(var_index); |  | ||||||
|         const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition); |  | ||||||
|         const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition); |  | ||||||
|         zipper2.InsertBefore(var_node_init, parent); |  | ||||||
|         zipper.InsertAfter(var_node, prev); |  | ||||||
|         goto_node->SetGotoCondition(var_condition); |  | ||||||
|         const ASTNode break_node = ASTBase::Make<ASTBreak>(parent, var_condition); |  | ||||||
|         zipper.InsertAfter(break_node, var_node); |  | ||||||
|     } else if (is_if || is_else) { |  | ||||||
|         const u32 var_index = NewVariable(); |  | ||||||
|         const Expr var_condition = MakeExpr<ExprVar>(var_index); |  | ||||||
|         const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition); |  | ||||||
|         const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition); |  | ||||||
|         if (is_if) { |  | ||||||
|             zipper2.InsertBefore(var_node_init, parent); |  | ||||||
|         } else { |  | ||||||
|             zipper2.InsertBefore(var_node_init, parent->GetPrevious()); |  | ||||||
|         } |  | ||||||
|         zipper.InsertAfter(var_node, prev); |  | ||||||
|         goto_node->SetGotoCondition(var_condition); |  | ||||||
|         if (post) { |  | ||||||
|             zipper.DetachTail(post); |  | ||||||
|             const ASTNode if_node = ASTBase::Make<ASTIfThen>(parent, MakeExprNot(var_condition)); |  | ||||||
|             ASTZipper* sub_zipper = if_node->GetSubNodes(); |  | ||||||
|             sub_zipper->Init(post, if_node); |  | ||||||
|             zipper.InsertAfter(if_node, var_node); |  | ||||||
|         } |  | ||||||
|     } else { |  | ||||||
|         UNREACHABLE(); |  | ||||||
|     } |  | ||||||
|     const ASTNode next = parent->GetNext(); |  | ||||||
|     if (is_if && next && next->IsIfElse()) { |  | ||||||
|         zipper2.InsertAfter(goto_node, next); |  | ||||||
|         goto_node->SetParent(grandpa); |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|     zipper2.InsertAfter(goto_node, parent); |  | ||||||
|     goto_node->SetParent(grandpa); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| class ASTClearer { |  | ||||||
| public: |  | ||||||
|     ASTClearer() = default; |  | ||||||
| 
 |  | ||||||
|     void operator()(const ASTProgram& ast) { |  | ||||||
|         ASTNode current = ast.nodes.GetFirst(); |  | ||||||
|         while (current) { |  | ||||||
|             Visit(current); |  | ||||||
|             current = current->GetNext(); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void operator()(const ASTIfThen& ast) { |  | ||||||
|         ASTNode current = ast.nodes.GetFirst(); |  | ||||||
|         while (current) { |  | ||||||
|             Visit(current); |  | ||||||
|             current = current->GetNext(); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void operator()(const ASTIfElse& ast) { |  | ||||||
|         ASTNode current = ast.nodes.GetFirst(); |  | ||||||
|         while (current) { |  | ||||||
|             Visit(current); |  | ||||||
|             current = current->GetNext(); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {} |  | ||||||
| 
 |  | ||||||
|     void operator()(ASTBlockDecoded& ast) { |  | ||||||
|         ast.nodes.clear(); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void operator()([[maybe_unused]] const ASTVarSet& ast) {} |  | ||||||
| 
 |  | ||||||
|     void operator()([[maybe_unused]] const ASTLabel& ast) {} |  | ||||||
| 
 |  | ||||||
|     void operator()([[maybe_unused]] const ASTGoto& ast) {} |  | ||||||
| 
 |  | ||||||
|     void operator()(const ASTDoWhile& ast) { |  | ||||||
|         ASTNode current = ast.nodes.GetFirst(); |  | ||||||
|         while (current) { |  | ||||||
|             Visit(current); |  | ||||||
|             current = current->GetNext(); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void operator()([[maybe_unused]] const ASTReturn& ast) {} |  | ||||||
| 
 |  | ||||||
|     void operator()([[maybe_unused]] const ASTBreak& ast) {} |  | ||||||
| 
 |  | ||||||
|     void Visit(const ASTNode& node) { |  | ||||||
|         std::visit(*this, *node->GetInnerData()); |  | ||||||
|         node->Clear(); |  | ||||||
|     } |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| void ASTManager::Clear() { |  | ||||||
|     if (!main_node) { |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|     ASTClearer clearer{}; |  | ||||||
|     clearer.Visit(main_node); |  | ||||||
|     main_node.reset(); |  | ||||||
|     program = nullptr; |  | ||||||
|     labels_map.clear(); |  | ||||||
|     labels.clear(); |  | ||||||
|     gotos.clear(); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,398 +0,0 @@ | ||||||
| // Copyright 2019 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #pragma once |  | ||||||
| 
 |  | ||||||
| #include <functional> |  | ||||||
| #include <list> |  | ||||||
| #include <memory> |  | ||||||
| #include <optional> |  | ||||||
| #include <string> |  | ||||||
| #include <unordered_map> |  | ||||||
| #include <vector> |  | ||||||
| 
 |  | ||||||
| #include "video_core/shader/expr.h" |  | ||||||
| #include "video_core/shader/node.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| class ASTBase; |  | ||||||
| class ASTBlockDecoded; |  | ||||||
| class ASTBlockEncoded; |  | ||||||
| class ASTBreak; |  | ||||||
| class ASTDoWhile; |  | ||||||
| class ASTGoto; |  | ||||||
| class ASTIfElse; |  | ||||||
| class ASTIfThen; |  | ||||||
| class ASTLabel; |  | ||||||
| class ASTProgram; |  | ||||||
| class ASTReturn; |  | ||||||
| class ASTVarSet; |  | ||||||
| 
 |  | ||||||
| using ASTData = std::variant<ASTProgram, ASTIfThen, ASTIfElse, ASTBlockEncoded, ASTBlockDecoded, |  | ||||||
|                              ASTVarSet, ASTGoto, ASTLabel, ASTDoWhile, ASTReturn, ASTBreak>; |  | ||||||
| 
 |  | ||||||
| using ASTNode = std::shared_ptr<ASTBase>; |  | ||||||
| 
 |  | ||||||
| enum class ASTZipperType : u32 { |  | ||||||
|     Program, |  | ||||||
|     IfThen, |  | ||||||
|     IfElse, |  | ||||||
|     Loop, |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| class ASTZipper final { |  | ||||||
| public: |  | ||||||
|     explicit ASTZipper(); |  | ||||||
| 
 |  | ||||||
|     void Init(ASTNode first, ASTNode parent); |  | ||||||
| 
 |  | ||||||
|     ASTNode GetFirst() const { |  | ||||||
|         return first; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     ASTNode GetLast() const { |  | ||||||
|         return last; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void PushBack(ASTNode new_node); |  | ||||||
|     void PushFront(ASTNode new_node); |  | ||||||
|     void InsertAfter(ASTNode new_node, ASTNode at_node); |  | ||||||
|     void InsertBefore(ASTNode new_node, ASTNode at_node); |  | ||||||
|     void DetachTail(ASTNode node); |  | ||||||
|     void DetachSingle(ASTNode node); |  | ||||||
|     void DetachSegment(ASTNode start, ASTNode end); |  | ||||||
|     void Remove(ASTNode node); |  | ||||||
| 
 |  | ||||||
|     ASTNode first; |  | ||||||
|     ASTNode last; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| class ASTProgram { |  | ||||||
| public: |  | ||||||
|     ASTZipper nodes{}; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| class ASTIfThen { |  | ||||||
| public: |  | ||||||
|     explicit ASTIfThen(Expr condition_) : condition{std::move(condition_)} {} |  | ||||||
|     Expr condition; |  | ||||||
|     ASTZipper nodes{}; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| class ASTIfElse { |  | ||||||
| public: |  | ||||||
|     ASTZipper nodes{}; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| class ASTBlockEncoded { |  | ||||||
| public: |  | ||||||
|     explicit ASTBlockEncoded(u32 start_, u32 _) : start{start_}, end{_} {} |  | ||||||
|     u32 start; |  | ||||||
|     u32 end; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| class ASTBlockDecoded { |  | ||||||
| public: |  | ||||||
|     explicit ASTBlockDecoded(NodeBlock&& new_nodes_) : nodes(std::move(new_nodes_)) {} |  | ||||||
|     NodeBlock nodes; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| class ASTVarSet { |  | ||||||
| public: |  | ||||||
|     explicit ASTVarSet(u32 index_, Expr condition_) |  | ||||||
|         : index{index_}, condition{std::move(condition_)} {} |  | ||||||
| 
 |  | ||||||
|     u32 index; |  | ||||||
|     Expr condition; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| class ASTLabel { |  | ||||||
| public: |  | ||||||
|     explicit ASTLabel(u32 index_) : index{index_} {} |  | ||||||
|     u32 index; |  | ||||||
|     bool unused{}; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| class ASTGoto { |  | ||||||
| public: |  | ||||||
|     explicit ASTGoto(Expr condition_, u32 label_) |  | ||||||
|         : condition{std::move(condition_)}, label{label_} {} |  | ||||||
| 
 |  | ||||||
|     Expr condition; |  | ||||||
|     u32 label; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| class ASTDoWhile { |  | ||||||
| public: |  | ||||||
|     explicit ASTDoWhile(Expr condition_) : condition{std::move(condition_)} {} |  | ||||||
|     Expr condition; |  | ||||||
|     ASTZipper nodes{}; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| class ASTReturn { |  | ||||||
| public: |  | ||||||
|     explicit ASTReturn(Expr condition_, bool kills_) |  | ||||||
|         : condition{std::move(condition_)}, kills{kills_} {} |  | ||||||
| 
 |  | ||||||
|     Expr condition; |  | ||||||
|     bool kills; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| class ASTBreak { |  | ||||||
| public: |  | ||||||
|     explicit ASTBreak(Expr condition_) : condition{std::move(condition_)} {} |  | ||||||
|     Expr condition; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| class ASTBase { |  | ||||||
| public: |  | ||||||
|     explicit ASTBase(ASTNode parent_, ASTData data_) |  | ||||||
|         : data{std::move(data_)}, parent{std::move(parent_)} {} |  | ||||||
| 
 |  | ||||||
|     template <class U, class... Args> |  | ||||||
|     static ASTNode Make(ASTNode parent, Args&&... args) { |  | ||||||
|         return std::make_shared<ASTBase>(std::move(parent), |  | ||||||
|                                          ASTData(U(std::forward<Args>(args)...))); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void SetParent(ASTNode new_parent) { |  | ||||||
|         parent = std::move(new_parent); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     ASTNode& GetParent() { |  | ||||||
|         return parent; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const ASTNode& GetParent() const { |  | ||||||
|         return parent; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     u32 GetLevel() const { |  | ||||||
|         u32 level = 0; |  | ||||||
|         auto next_parent = parent; |  | ||||||
|         while (next_parent) { |  | ||||||
|             next_parent = next_parent->GetParent(); |  | ||||||
|             level++; |  | ||||||
|         } |  | ||||||
|         return level; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     ASTData* GetInnerData() { |  | ||||||
|         return &data; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const ASTData* GetInnerData() const { |  | ||||||
|         return &data; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     ASTNode GetNext() const { |  | ||||||
|         return next; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     ASTNode GetPrevious() const { |  | ||||||
|         return previous; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     ASTZipper& GetManager() { |  | ||||||
|         return *manager; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const ASTZipper& GetManager() const { |  | ||||||
|         return *manager; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     std::optional<u32> GetGotoLabel() const { |  | ||||||
|         if (const auto* inner = std::get_if<ASTGoto>(&data)) { |  | ||||||
|             return {inner->label}; |  | ||||||
|         } |  | ||||||
|         return std::nullopt; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     Expr GetGotoCondition() const { |  | ||||||
|         if (const auto* inner = std::get_if<ASTGoto>(&data)) { |  | ||||||
|             return inner->condition; |  | ||||||
|         } |  | ||||||
|         return nullptr; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void MarkLabelUnused() { |  | ||||||
|         if (auto* inner = std::get_if<ASTLabel>(&data)) { |  | ||||||
|             inner->unused = true; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool IsLabelUnused() const { |  | ||||||
|         if (const auto* inner = std::get_if<ASTLabel>(&data)) { |  | ||||||
|             return inner->unused; |  | ||||||
|         } |  | ||||||
|         return true; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     std::optional<u32> GetLabelIndex() const { |  | ||||||
|         if (const auto* inner = std::get_if<ASTLabel>(&data)) { |  | ||||||
|             return {inner->index}; |  | ||||||
|         } |  | ||||||
|         return std::nullopt; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     Expr GetIfCondition() const { |  | ||||||
|         if (const auto* inner = std::get_if<ASTIfThen>(&data)) { |  | ||||||
|             return inner->condition; |  | ||||||
|         } |  | ||||||
|         return nullptr; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void SetGotoCondition(Expr new_condition) { |  | ||||||
|         if (auto* inner = std::get_if<ASTGoto>(&data)) { |  | ||||||
|             inner->condition = std::move(new_condition); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool IsIfThen() const { |  | ||||||
|         return std::holds_alternative<ASTIfThen>(data); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool IsIfElse() const { |  | ||||||
|         return std::holds_alternative<ASTIfElse>(data); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool IsBlockEncoded() const { |  | ||||||
|         return std::holds_alternative<ASTBlockEncoded>(data); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void TransformBlockEncoded(NodeBlock&& nodes) { |  | ||||||
|         data = ASTBlockDecoded(std::move(nodes)); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool IsLoop() const { |  | ||||||
|         return std::holds_alternative<ASTDoWhile>(data); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     ASTZipper* GetSubNodes() { |  | ||||||
|         if (std::holds_alternative<ASTProgram>(data)) { |  | ||||||
|             return &std::get_if<ASTProgram>(&data)->nodes; |  | ||||||
|         } |  | ||||||
|         if (std::holds_alternative<ASTIfThen>(data)) { |  | ||||||
|             return &std::get_if<ASTIfThen>(&data)->nodes; |  | ||||||
|         } |  | ||||||
|         if (std::holds_alternative<ASTIfElse>(data)) { |  | ||||||
|             return &std::get_if<ASTIfElse>(&data)->nodes; |  | ||||||
|         } |  | ||||||
|         if (std::holds_alternative<ASTDoWhile>(data)) { |  | ||||||
|             return &std::get_if<ASTDoWhile>(&data)->nodes; |  | ||||||
|         } |  | ||||||
|         return nullptr; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void Clear() { |  | ||||||
|         next.reset(); |  | ||||||
|         previous.reset(); |  | ||||||
|         parent.reset(); |  | ||||||
|         manager = nullptr; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     friend class ASTZipper; |  | ||||||
| 
 |  | ||||||
|     ASTData data; |  | ||||||
|     ASTNode parent; |  | ||||||
|     ASTNode next; |  | ||||||
|     ASTNode previous; |  | ||||||
|     ASTZipper* manager{}; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| class ASTManager final { |  | ||||||
| public: |  | ||||||
|     explicit ASTManager(bool do_full_decompile, bool disable_else_derivation_); |  | ||||||
|     ~ASTManager(); |  | ||||||
| 
 |  | ||||||
|     ASTManager(const ASTManager& o) = delete; |  | ||||||
|     ASTManager& operator=(const ASTManager& other) = delete; |  | ||||||
| 
 |  | ||||||
|     ASTManager(ASTManager&& other) noexcept = default; |  | ||||||
|     ASTManager& operator=(ASTManager&& other) noexcept = default; |  | ||||||
| 
 |  | ||||||
|     void Init(); |  | ||||||
| 
 |  | ||||||
|     void DeclareLabel(u32 address); |  | ||||||
| 
 |  | ||||||
|     void InsertLabel(u32 address); |  | ||||||
| 
 |  | ||||||
|     void InsertGoto(Expr condition, u32 address); |  | ||||||
| 
 |  | ||||||
|     void InsertBlock(u32 start_address, u32 end_address); |  | ||||||
| 
 |  | ||||||
|     void InsertReturn(Expr condition, bool kills); |  | ||||||
| 
 |  | ||||||
|     std::string Print() const; |  | ||||||
| 
 |  | ||||||
|     void Decompile(); |  | ||||||
| 
 |  | ||||||
|     void ShowCurrentState(std::string_view state) const; |  | ||||||
| 
 |  | ||||||
|     void SanityCheck() const; |  | ||||||
| 
 |  | ||||||
|     void Clear(); |  | ||||||
| 
 |  | ||||||
|     bool IsFullyDecompiled() const { |  | ||||||
|         if (full_decompile) { |  | ||||||
|             return gotos.empty(); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         for (ASTNode goto_node : gotos) { |  | ||||||
|             auto label_index = goto_node->GetGotoLabel(); |  | ||||||
|             if (!label_index) { |  | ||||||
|                 return false; |  | ||||||
|             } |  | ||||||
|             ASTNode glabel = labels[*label_index]; |  | ||||||
|             if (IsBackwardsJump(goto_node, glabel)) { |  | ||||||
|                 return false; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|         return true; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     ASTNode GetProgram() const { |  | ||||||
|         return main_node; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     u32 GetVariables() const { |  | ||||||
|         return variables; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const std::vector<ASTNode>& GetLabels() const { |  | ||||||
|         return labels; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     bool IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const; |  | ||||||
| 
 |  | ||||||
|     bool IndirectlyRelated(const ASTNode& first, const ASTNode& second) const; |  | ||||||
| 
 |  | ||||||
|     bool DirectlyRelated(const ASTNode& first, const ASTNode& second) const; |  | ||||||
| 
 |  | ||||||
|     void EncloseDoWhile(ASTNode goto_node, ASTNode label); |  | ||||||
| 
 |  | ||||||
|     void EncloseIfThen(ASTNode goto_node, ASTNode label); |  | ||||||
| 
 |  | ||||||
|     void MoveOutward(ASTNode goto_node); |  | ||||||
| 
 |  | ||||||
|     u32 NewVariable() { |  | ||||||
|         return variables++; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool full_decompile{}; |  | ||||||
|     bool disable_else_derivation{}; |  | ||||||
|     std::unordered_map<u32, u32> labels_map{}; |  | ||||||
|     u32 labels_count{}; |  | ||||||
|     std::vector<ASTNode> labels{}; |  | ||||||
|     std::list<ASTNode> gotos{}; |  | ||||||
|     u32 variables{}; |  | ||||||
|     ASTProgram* program{}; |  | ||||||
|     ASTNode main_node{}; |  | ||||||
|     Expr false_condition{}; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,234 +0,0 @@ | ||||||
| // Copyright 2020 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include <condition_variable> |  | ||||||
| #include <mutex> |  | ||||||
| #include <thread> |  | ||||||
| #include <vector> |  | ||||||
| #include "video_core/engines/maxwell_3d.h" |  | ||||||
| #include "video_core/renderer_base.h" |  | ||||||
| #include "video_core/renderer_opengl/gl_shader_cache.h" |  | ||||||
| #include "video_core/shader/async_shaders.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window_) : emu_window(emu_window_) {} |  | ||||||
| 
 |  | ||||||
| AsyncShaders::~AsyncShaders() { |  | ||||||
|     KillWorkers(); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void AsyncShaders::AllocateWorkers() { |  | ||||||
|     // Use at least one thread
 |  | ||||||
|     u32 num_workers = 1; |  | ||||||
| 
 |  | ||||||
|     // Deduce how many more threads we can use
 |  | ||||||
|     const u32 thread_count = std::thread::hardware_concurrency(); |  | ||||||
|     if (thread_count >= 8) { |  | ||||||
|         // Increase async workers by 1 for every 2 threads >= 8
 |  | ||||||
|         num_workers += 1 + (thread_count - 8) / 2; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     // If we already have workers queued, ignore
 |  | ||||||
|     if (num_workers == worker_threads.size()) { |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     // If workers already exist, clear them
 |  | ||||||
|     if (!worker_threads.empty()) { |  | ||||||
|         FreeWorkers(); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     // Create workers
 |  | ||||||
|     for (std::size_t i = 0; i < num_workers; i++) { |  | ||||||
|         context_list.push_back(emu_window.CreateSharedContext()); |  | ||||||
|         worker_threads.emplace_back(&AsyncShaders::ShaderCompilerThread, this, |  | ||||||
|                                     context_list[i].get()); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void AsyncShaders::FreeWorkers() { |  | ||||||
|     // Mark all threads to quit
 |  | ||||||
|     is_thread_exiting.store(true); |  | ||||||
|     cv.notify_all(); |  | ||||||
|     for (auto& thread : worker_threads) { |  | ||||||
|         thread.join(); |  | ||||||
|     } |  | ||||||
|     // Clear our shared contexts
 |  | ||||||
|     context_list.clear(); |  | ||||||
| 
 |  | ||||||
|     // Clear our worker threads
 |  | ||||||
|     worker_threads.clear(); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void AsyncShaders::KillWorkers() { |  | ||||||
|     is_thread_exiting.store(true); |  | ||||||
|     cv.notify_all(); |  | ||||||
|     for (auto& thread : worker_threads) { |  | ||||||
|         thread.detach(); |  | ||||||
|     } |  | ||||||
|     // Clear our shared contexts
 |  | ||||||
|     context_list.clear(); |  | ||||||
| 
 |  | ||||||
|     // Clear our worker threads
 |  | ||||||
|     worker_threads.clear(); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool AsyncShaders::HasWorkQueued() const { |  | ||||||
|     return !pending_queue.empty(); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool AsyncShaders::HasCompletedWork() const { |  | ||||||
|     std::shared_lock lock{completed_mutex}; |  | ||||||
|     return !finished_work.empty(); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const { |  | ||||||
|     const auto& regs = gpu.Maxwell3D().regs; |  | ||||||
| 
 |  | ||||||
|     // If something is using depth, we can assume that games are not rendering anything which will
 |  | ||||||
|     // be used one time.
 |  | ||||||
|     if (regs.zeta_enable) { |  | ||||||
|         return true; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     // If games are using a small index count, we can assume these are full screen quads. Usually
 |  | ||||||
|     // these shaders are only used once for building textures so we can assume they can't be built
 |  | ||||||
|     // async
 |  | ||||||
|     if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) { |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return true; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() { |  | ||||||
|     std::vector<Result> results; |  | ||||||
|     { |  | ||||||
|         std::unique_lock lock{completed_mutex}; |  | ||||||
|         results = std::move(finished_work); |  | ||||||
|         finished_work.clear(); |  | ||||||
|     } |  | ||||||
|     return results; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, |  | ||||||
|                                      Tegra::Engines::ShaderType shader_type, u64 uid, |  | ||||||
|                                      std::vector<u64> code, std::vector<u64> code_b, |  | ||||||
|                                      u32 main_offset, CompilerSettings compiler_settings, |  | ||||||
|                                      const Registry& registry, VAddr cpu_addr) { |  | ||||||
|     std::unique_lock lock(queue_mutex); |  | ||||||
|     pending_queue.push({ |  | ||||||
|         .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL, |  | ||||||
|         .device = &device, |  | ||||||
|         .shader_type = shader_type, |  | ||||||
|         .uid = uid, |  | ||||||
|         .code = std::move(code), |  | ||||||
|         .code_b = std::move(code_b), |  | ||||||
|         .main_offset = main_offset, |  | ||||||
|         .compiler_settings = compiler_settings, |  | ||||||
|         .registry = registry, |  | ||||||
|         .cpu_address = cpu_addr, |  | ||||||
|         .pp_cache = nullptr, |  | ||||||
|         .vk_device = nullptr, |  | ||||||
|         .scheduler = nullptr, |  | ||||||
|         .descriptor_pool = nullptr, |  | ||||||
|         .update_descriptor_queue = nullptr, |  | ||||||
|         .bindings{}, |  | ||||||
|         .program{}, |  | ||||||
|         .key{}, |  | ||||||
|         .num_color_buffers = 0, |  | ||||||
|     }); |  | ||||||
|     cv.notify_one(); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, |  | ||||||
|                                      const Vulkan::Device& device, Vulkan::VKScheduler& scheduler, |  | ||||||
|                                      Vulkan::VKDescriptorPool& descriptor_pool, |  | ||||||
|                                      Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, |  | ||||||
|                                      std::vector<VkDescriptorSetLayoutBinding> bindings, |  | ||||||
|                                      Vulkan::SPIRVProgram program, |  | ||||||
|                                      Vulkan::GraphicsPipelineCacheKey key, u32 num_color_buffers) { |  | ||||||
|     std::unique_lock lock(queue_mutex); |  | ||||||
|     pending_queue.push({ |  | ||||||
|         .backend = Backend::Vulkan, |  | ||||||
|         .device = nullptr, |  | ||||||
|         .shader_type{}, |  | ||||||
|         .uid = 0, |  | ||||||
|         .code{}, |  | ||||||
|         .code_b{}, |  | ||||||
|         .main_offset = 0, |  | ||||||
|         .compiler_settings{}, |  | ||||||
|         .registry{}, |  | ||||||
|         .cpu_address = 0, |  | ||||||
|         .pp_cache = pp_cache, |  | ||||||
|         .vk_device = &device, |  | ||||||
|         .scheduler = &scheduler, |  | ||||||
|         .descriptor_pool = &descriptor_pool, |  | ||||||
|         .update_descriptor_queue = &update_descriptor_queue, |  | ||||||
|         .bindings = std::move(bindings), |  | ||||||
|         .program = std::move(program), |  | ||||||
|         .key = key, |  | ||||||
|         .num_color_buffers = num_color_buffers, |  | ||||||
|     }); |  | ||||||
|     cv.notify_one(); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) { |  | ||||||
|     while (!is_thread_exiting.load(std::memory_order_relaxed)) { |  | ||||||
|         std::unique_lock lock{queue_mutex}; |  | ||||||
|         cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; }); |  | ||||||
|         if (is_thread_exiting) { |  | ||||||
|             return; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         // Partial lock to allow all threads to read at the same time
 |  | ||||||
|         if (!HasWorkQueued()) { |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
|         // Another thread beat us, just unlock and wait for the next load
 |  | ||||||
|         if (pending_queue.empty()) { |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         // Pull work from queue
 |  | ||||||
|         WorkerParams work = std::move(pending_queue.front()); |  | ||||||
|         pending_queue.pop(); |  | ||||||
|         lock.unlock(); |  | ||||||
| 
 |  | ||||||
|         if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) { |  | ||||||
|             const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry); |  | ||||||
|             const auto scope = context->Acquire(); |  | ||||||
|             auto program = |  | ||||||
|                 OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry); |  | ||||||
|             Result result{}; |  | ||||||
|             result.backend = work.backend; |  | ||||||
|             result.cpu_address = work.cpu_address; |  | ||||||
|             result.uid = work.uid; |  | ||||||
|             result.code = std::move(work.code); |  | ||||||
|             result.code_b = std::move(work.code_b); |  | ||||||
|             result.shader_type = work.shader_type; |  | ||||||
| 
 |  | ||||||
|             if (work.backend == Backend::OpenGL) { |  | ||||||
|                 result.program.opengl = std::move(program->source_program); |  | ||||||
|             } else if (work.backend == Backend::GLASM) { |  | ||||||
|                 result.program.glasm = std::move(program->assembly_program); |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             { |  | ||||||
|                 std::unique_lock complete_lock(completed_mutex); |  | ||||||
|                 finished_work.push_back(std::move(result)); |  | ||||||
|             } |  | ||||||
|         } else if (work.backend == Backend::Vulkan) { |  | ||||||
|             auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>( |  | ||||||
|                 *work.vk_device, *work.scheduler, *work.descriptor_pool, |  | ||||||
|                 *work.update_descriptor_queue, work.key, work.bindings, work.program, |  | ||||||
|                 work.num_color_buffers); |  | ||||||
| 
 |  | ||||||
|             work.pp_cache->EmplacePipeline(std::move(pipeline)); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,138 +0,0 @@ | ||||||
| // Copyright 2020 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #pragma once |  | ||||||
| 
 |  | ||||||
| #include <condition_variable> |  | ||||||
| #include <memory> |  | ||||||
| #include <shared_mutex> |  | ||||||
| #include <thread> |  | ||||||
| 
 |  | ||||||
| #include <glad/glad.h> |  | ||||||
| 
 |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/renderer_opengl/gl_device.h" |  | ||||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" |  | ||||||
| #include "video_core/renderer_opengl/gl_shader_decompiler.h" |  | ||||||
| #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |  | ||||||
| #include "video_core/renderer_vulkan/vk_scheduler.h" |  | ||||||
| #include "video_core/vulkan_common/vulkan_device.h" |  | ||||||
| 
 |  | ||||||
| namespace Core::Frontend { |  | ||||||
| class EmuWindow; |  | ||||||
| class GraphicsContext; |  | ||||||
| } // namespace Core::Frontend
 |  | ||||||
| 
 |  | ||||||
| namespace Tegra { |  | ||||||
| class GPU; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| namespace Vulkan { |  | ||||||
| class VKPipelineCache; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| class AsyncShaders { |  | ||||||
| public: |  | ||||||
|     enum class Backend { |  | ||||||
|         OpenGL, |  | ||||||
|         GLASM, |  | ||||||
|         Vulkan, |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     struct ResultPrograms { |  | ||||||
|         OpenGL::OGLProgram opengl; |  | ||||||
|         OpenGL::OGLAssemblyProgram glasm; |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     struct Result { |  | ||||||
|         u64 uid; |  | ||||||
|         VAddr cpu_address; |  | ||||||
|         Backend backend; |  | ||||||
|         ResultPrograms program; |  | ||||||
|         std::vector<u64> code; |  | ||||||
|         std::vector<u64> code_b; |  | ||||||
|         Tegra::Engines::ShaderType shader_type; |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window_); |  | ||||||
|     ~AsyncShaders(); |  | ||||||
| 
 |  | ||||||
|     /// Start up shader worker threads
 |  | ||||||
|     void AllocateWorkers(); |  | ||||||
| 
 |  | ||||||
|     /// Clear the shader queue and kill all worker threads
 |  | ||||||
|     void FreeWorkers(); |  | ||||||
| 
 |  | ||||||
|     // Force end all threads
 |  | ||||||
|     void KillWorkers(); |  | ||||||
| 
 |  | ||||||
|     /// Check to see if any shaders have actually been compiled
 |  | ||||||
|     [[nodiscard]] bool HasCompletedWork() const; |  | ||||||
| 
 |  | ||||||
|     /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build
 |  | ||||||
|     /// every shader async as some shaders are only built and executed once. We try to "guess" which
 |  | ||||||
|     /// shader would be used only once
 |  | ||||||
|     [[nodiscard]] bool IsShaderAsync(const Tegra::GPU& gpu) const; |  | ||||||
| 
 |  | ||||||
|     /// Pulls completed compiled shaders
 |  | ||||||
|     [[nodiscard]] std::vector<Result> GetCompletedWork(); |  | ||||||
| 
 |  | ||||||
|     void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type, |  | ||||||
|                            u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset, |  | ||||||
|                            CompilerSettings compiler_settings, const Registry& registry, |  | ||||||
|                            VAddr cpu_addr); |  | ||||||
| 
 |  | ||||||
|     void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::Device& device, |  | ||||||
|                            Vulkan::VKScheduler& scheduler, |  | ||||||
|                            Vulkan::VKDescriptorPool& descriptor_pool, |  | ||||||
|                            Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, |  | ||||||
|                            std::vector<VkDescriptorSetLayoutBinding> bindings, |  | ||||||
|                            Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key, |  | ||||||
|                            u32 num_color_buffers); |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); |  | ||||||
| 
 |  | ||||||
|     /// Check our worker queue to see if we have any work queued already
 |  | ||||||
|     [[nodiscard]] bool HasWorkQueued() const; |  | ||||||
| 
 |  | ||||||
|     struct WorkerParams { |  | ||||||
|         Backend backend; |  | ||||||
|         // For OGL
 |  | ||||||
|         const OpenGL::Device* device; |  | ||||||
|         Tegra::Engines::ShaderType shader_type; |  | ||||||
|         u64 uid; |  | ||||||
|         std::vector<u64> code; |  | ||||||
|         std::vector<u64> code_b; |  | ||||||
|         u32 main_offset; |  | ||||||
|         CompilerSettings compiler_settings; |  | ||||||
|         std::optional<Registry> registry; |  | ||||||
|         VAddr cpu_address; |  | ||||||
| 
 |  | ||||||
|         // For Vulkan
 |  | ||||||
|         Vulkan::VKPipelineCache* pp_cache; |  | ||||||
|         const Vulkan::Device* vk_device; |  | ||||||
|         Vulkan::VKScheduler* scheduler; |  | ||||||
|         Vulkan::VKDescriptorPool* descriptor_pool; |  | ||||||
|         Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; |  | ||||||
|         std::vector<VkDescriptorSetLayoutBinding> bindings; |  | ||||||
|         Vulkan::SPIRVProgram program; |  | ||||||
|         Vulkan::GraphicsPipelineCacheKey key; |  | ||||||
|         u32 num_color_buffers; |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     std::condition_variable cv; |  | ||||||
|     mutable std::mutex queue_mutex; |  | ||||||
|     mutable std::shared_mutex completed_mutex; |  | ||||||
|     std::atomic<bool> is_thread_exiting{}; |  | ||||||
|     std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list; |  | ||||||
|     std::vector<std::thread> worker_threads; |  | ||||||
|     std::queue<WorkerParams> pending_queue; |  | ||||||
|     std::vector<Result> finished_work; |  | ||||||
|     Core::Frontend::EmuWindow& emu_window; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,26 +0,0 @@ | ||||||
| // Copyright 2019 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include "video_core/shader/compiler_settings.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| std::string CompileDepthAsString(const CompileDepth cd) { |  | ||||||
|     switch (cd) { |  | ||||||
|     case CompileDepth::BruteForce: |  | ||||||
|         return "Brute Force Compile"; |  | ||||||
|     case CompileDepth::FlowStack: |  | ||||||
|         return "Simple Flow Stack Mode"; |  | ||||||
|     case CompileDepth::NoFlowStack: |  | ||||||
|         return "Remove Flow Stack"; |  | ||||||
|     case CompileDepth::DecompileBackwards: |  | ||||||
|         return "Decompile Backward Jumps"; |  | ||||||
|     case CompileDepth::FullDecompile: |  | ||||||
|         return "Full Decompilation"; |  | ||||||
|     default: |  | ||||||
|         return "Unknown Compiler Process"; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,26 +0,0 @@ | ||||||
| // Copyright 2019 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #pragma once |  | ||||||
| 
 |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| enum class CompileDepth : u32 { |  | ||||||
|     BruteForce = 0, |  | ||||||
|     FlowStack = 1, |  | ||||||
|     NoFlowStack = 2, |  | ||||||
|     DecompileBackwards = 3, |  | ||||||
|     FullDecompile = 4, |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| std::string CompileDepthAsString(CompileDepth cd); |  | ||||||
| 
 |  | ||||||
| struct CompilerSettings { |  | ||||||
|     CompileDepth depth{CompileDepth::NoFlowStack}; |  | ||||||
|     bool disable_else_derivation{true}; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,751 +0,0 @@ | ||||||
| // Copyright 2019 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include <list> |  | ||||||
| #include <map> |  | ||||||
| #include <set> |  | ||||||
| #include <stack> |  | ||||||
| #include <unordered_map> |  | ||||||
| #include <vector> |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/shader/ast.h" |  | ||||||
| #include "video_core/shader/control_flow.h" |  | ||||||
| #include "video_core/shader/memory_util.h" |  | ||||||
| #include "video_core/shader/registry.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| namespace { |  | ||||||
| 
 |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| 
 |  | ||||||
| constexpr s32 unassigned_branch = -2; |  | ||||||
| 
 |  | ||||||
| struct Query { |  | ||||||
|     u32 address{}; |  | ||||||
|     std::stack<u32> ssy_stack{}; |  | ||||||
|     std::stack<u32> pbk_stack{}; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| struct BlockStack { |  | ||||||
|     BlockStack() = default; |  | ||||||
|     explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} |  | ||||||
|     std::stack<u32> ssy_stack{}; |  | ||||||
|     std::stack<u32> pbk_stack{}; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| template <typename T, typename... Args> |  | ||||||
| BlockBranchInfo MakeBranchInfo(Args&&... args) { |  | ||||||
|     static_assert(std::is_convertible_v<T, BranchData>); |  | ||||||
|     return std::make_shared<BranchData>(T(std::forward<Args>(args)...)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool BlockBranchIsIgnored(BlockBranchInfo first) { |  | ||||||
|     bool ignore = false; |  | ||||||
|     if (std::holds_alternative<SingleBranch>(*first)) { |  | ||||||
|         const auto branch = std::get_if<SingleBranch>(first.get()); |  | ||||||
|         ignore = branch->ignore; |  | ||||||
|     } |  | ||||||
|     return ignore; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| struct BlockInfo { |  | ||||||
|     u32 start{}; |  | ||||||
|     u32 end{}; |  | ||||||
|     bool visited{}; |  | ||||||
|     BlockBranchInfo branch{}; |  | ||||||
| 
 |  | ||||||
|     bool IsInside(const u32 address) const { |  | ||||||
|         return start <= address && address <= end; |  | ||||||
|     } |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| struct CFGRebuildState { |  | ||||||
|     explicit CFGRebuildState(const ProgramCode& program_code_, u32 start_, Registry& registry_) |  | ||||||
|         : program_code{program_code_}, registry{registry_}, start{start_} {} |  | ||||||
| 
 |  | ||||||
|     const ProgramCode& program_code; |  | ||||||
|     Registry& registry; |  | ||||||
|     u32 start{}; |  | ||||||
|     std::vector<BlockInfo> block_info; |  | ||||||
|     std::list<u32> inspect_queries; |  | ||||||
|     std::list<Query> queries; |  | ||||||
|     std::unordered_map<u32, u32> registered; |  | ||||||
|     std::set<u32> labels; |  | ||||||
|     std::map<u32, u32> ssy_labels; |  | ||||||
|     std::map<u32, u32> pbk_labels; |  | ||||||
|     std::unordered_map<u32, BlockStack> stacks; |  | ||||||
|     ASTManager* manager{}; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| enum class BlockCollision : u32 { None, Found, Inside }; |  | ||||||
| 
 |  | ||||||
| std::pair<BlockCollision, u32> TryGetBlock(CFGRebuildState& state, u32 address) { |  | ||||||
|     const auto& blocks = state.block_info; |  | ||||||
|     for (u32 index = 0; index < blocks.size(); index++) { |  | ||||||
|         if (blocks[index].start == address) { |  | ||||||
|             return {BlockCollision::Found, index}; |  | ||||||
|         } |  | ||||||
|         if (blocks[index].IsInside(address)) { |  | ||||||
|             return {BlockCollision::Inside, index}; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     return {BlockCollision::None, 0xFFFFFFFF}; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| struct ParseInfo { |  | ||||||
|     BlockBranchInfo branch_info{}; |  | ||||||
|     u32 end_address{}; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) { |  | ||||||
|     auto& it = state.block_info.emplace_back(); |  | ||||||
|     it.start = start; |  | ||||||
|     it.end = end; |  | ||||||
|     const u32 index = static_cast<u32>(state.block_info.size() - 1); |  | ||||||
|     state.registered.insert({start, index}); |  | ||||||
|     return it; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Pred GetPredicate(u32 index, bool negated) { |  | ||||||
|     return static_cast<Pred>(static_cast<u64>(index) + (negated ? 8ULL : 0ULL)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| enum class ParseResult : u32 { |  | ||||||
|     ControlCaught, |  | ||||||
|     BlockEnd, |  | ||||||
|     AbnormalFlow, |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| struct BranchIndirectInfo { |  | ||||||
|     u32 buffer{}; |  | ||||||
|     u32 offset{}; |  | ||||||
|     u32 entries{}; |  | ||||||
|     s32 relative_position{}; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| struct BufferInfo { |  | ||||||
|     u32 index; |  | ||||||
|     u32 offset; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| std::optional<std::pair<s32, u64>> GetBRXInfo(const CFGRebuildState& state, u32& pos) { |  | ||||||
|     const Instruction instr = state.program_code[pos]; |  | ||||||
|     const auto opcode = OpCode::Decode(instr); |  | ||||||
|     if (opcode->get().GetId() != OpCode::Id::BRX) { |  | ||||||
|         return std::nullopt; |  | ||||||
|     } |  | ||||||
|     if (instr.brx.constant_buffer != 0) { |  | ||||||
|         return std::nullopt; |  | ||||||
|     } |  | ||||||
|     --pos; |  | ||||||
|     return std::make_pair(instr.brx.GetBranchExtend(), instr.gpr8.Value()); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| template <typename Result, typename TestCallable, typename PackCallable> |  | ||||||
| // requires std::predicate<TestCallable, Instruction, const OpCode::Matcher&>
 |  | ||||||
| // requires std::invocable<PackCallable, Instruction, const OpCode::Matcher&>
 |  | ||||||
| std::optional<Result> TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test, |  | ||||||
|                                        PackCallable pack) { |  | ||||||
|     for (; pos >= state.start; --pos) { |  | ||||||
|         if (IsSchedInstruction(pos, state.start)) { |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
|         const Instruction instr = state.program_code[pos]; |  | ||||||
|         const auto opcode = OpCode::Decode(instr); |  | ||||||
|         if (!opcode) { |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
|         if (test(instr, opcode->get())) { |  | ||||||
|             --pos; |  | ||||||
|             return std::make_optional(pack(instr, opcode->get())); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     return std::nullopt; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state, u32& pos, |  | ||||||
|                                                    u64 brx_tracked_register) { |  | ||||||
|     return TrackInstruction<std::pair<BufferInfo, u64>>( |  | ||||||
|         state, pos, |  | ||||||
|         [brx_tracked_register](auto instr, const auto& opcode) { |  | ||||||
|             return opcode.GetId() == OpCode::Id::LD_C && |  | ||||||
|                    instr.gpr0.Value() == brx_tracked_register && |  | ||||||
|                    instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single; |  | ||||||
|         }, |  | ||||||
|         [](auto instr, const auto& opcode) { |  | ||||||
|             const BufferInfo info = {static_cast<u32>(instr.cbuf36.index.Value()), |  | ||||||
|                                      static_cast<u32>(instr.cbuf36.GetOffset())}; |  | ||||||
|             return std::make_pair(info, instr.gpr8.Value()); |  | ||||||
|         }); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos, |  | ||||||
|                                     u64 ldc_tracked_register) { |  | ||||||
|     return TrackInstruction<u64>( |  | ||||||
|         state, pos, |  | ||||||
|         [ldc_tracked_register](auto instr, const auto& opcode) { |  | ||||||
|             return opcode.GetId() == OpCode::Id::SHL_IMM && |  | ||||||
|                    instr.gpr0.Value() == ldc_tracked_register; |  | ||||||
|         }, |  | ||||||
|         [](auto instr, const auto&) { return instr.gpr8.Value(); }); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos, |  | ||||||
|                                    u64 shl_tracked_register) { |  | ||||||
|     return TrackInstruction<u32>( |  | ||||||
|         state, pos, |  | ||||||
|         [shl_tracked_register](auto instr, const auto& opcode) { |  | ||||||
|             return opcode.GetId() == OpCode::Id::IMNMX_IMM && |  | ||||||
|                    instr.gpr0.Value() == shl_tracked_register; |  | ||||||
|         }, |  | ||||||
|         [](auto instr, const auto&) { |  | ||||||
|             return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1); |  | ||||||
|         }); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) { |  | ||||||
|     const auto brx_info = GetBRXInfo(state, pos); |  | ||||||
|     if (!brx_info) { |  | ||||||
|         return std::nullopt; |  | ||||||
|     } |  | ||||||
|     const auto [relative_position, brx_tracked_register] = *brx_info; |  | ||||||
| 
 |  | ||||||
|     const auto ldc_info = TrackLDC(state, pos, brx_tracked_register); |  | ||||||
|     if (!ldc_info) { |  | ||||||
|         return std::nullopt; |  | ||||||
|     } |  | ||||||
|     const auto [buffer_info, ldc_tracked_register] = *ldc_info; |  | ||||||
| 
 |  | ||||||
|     const auto shl_tracked_register = TrackSHLRegister(state, pos, ldc_tracked_register); |  | ||||||
|     if (!shl_tracked_register) { |  | ||||||
|         return std::nullopt; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const auto entries = TrackIMNMXValue(state, pos, *shl_tracked_register); |  | ||||||
|     if (!entries) { |  | ||||||
|         return std::nullopt; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return BranchIndirectInfo{buffer_info.index, buffer_info.offset, *entries, relative_position}; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) { |  | ||||||
|     u32 offset = static_cast<u32>(address); |  | ||||||
|     const u32 end_address = static_cast<u32>(state.program_code.size()); |  | ||||||
|     ParseInfo parse_info{}; |  | ||||||
|     SingleBranch single_branch{}; |  | ||||||
| 
 |  | ||||||
|     const auto insert_label = [](CFGRebuildState& rebuild_state, u32 label_address) { |  | ||||||
|         const auto pair = rebuild_state.labels.emplace(label_address); |  | ||||||
|         if (pair.second) { |  | ||||||
|             rebuild_state.inspect_queries.push_back(label_address); |  | ||||||
|         } |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     while (true) { |  | ||||||
|         if (offset >= end_address) { |  | ||||||
|             // ASSERT_OR_EXECUTE can't be used, as it ignores the break
 |  | ||||||
|             ASSERT_MSG(false, "Shader passed the current limit!"); |  | ||||||
| 
 |  | ||||||
|             single_branch.address = exit_branch; |  | ||||||
|             single_branch.ignore = false; |  | ||||||
|             break; |  | ||||||
|         } |  | ||||||
|         if (state.registered.contains(offset)) { |  | ||||||
|             single_branch.address = offset; |  | ||||||
|             single_branch.ignore = true; |  | ||||||
|             break; |  | ||||||
|         } |  | ||||||
|         if (IsSchedInstruction(offset, state.start)) { |  | ||||||
|             offset++; |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
|         const Instruction instr = {state.program_code[offset]}; |  | ||||||
|         const auto opcode = OpCode::Decode(instr); |  | ||||||
|         if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) { |  | ||||||
|             offset++; |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         switch (opcode->get().GetId()) { |  | ||||||
|         case OpCode::Id::EXIT: { |  | ||||||
|             const auto pred_index = static_cast<u32>(instr.pred.pred_index); |  | ||||||
|             single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); |  | ||||||
|             if (single_branch.condition.predicate == Pred::NeverExecute) { |  | ||||||
|                 offset++; |  | ||||||
|                 continue; |  | ||||||
|             } |  | ||||||
|             const ConditionCode cc = instr.flow_condition_code; |  | ||||||
|             single_branch.condition.cc = cc; |  | ||||||
|             if (cc == ConditionCode::F) { |  | ||||||
|                 offset++; |  | ||||||
|                 continue; |  | ||||||
|             } |  | ||||||
|             single_branch.address = exit_branch; |  | ||||||
|             single_branch.kill = false; |  | ||||||
|             single_branch.is_sync = false; |  | ||||||
|             single_branch.is_brk = false; |  | ||||||
|             single_branch.ignore = false; |  | ||||||
|             parse_info.end_address = offset; |  | ||||||
|             parse_info.branch_info = MakeBranchInfo<SingleBranch>( |  | ||||||
|                 single_branch.condition, single_branch.address, single_branch.kill, |  | ||||||
|                 single_branch.is_sync, single_branch.is_brk, single_branch.ignore); |  | ||||||
| 
 |  | ||||||
|             return {ParseResult::ControlCaught, parse_info}; |  | ||||||
|         } |  | ||||||
|         case OpCode::Id::BRA: { |  | ||||||
|             if (instr.bra.constant_buffer != 0) { |  | ||||||
|                 return {ParseResult::AbnormalFlow, parse_info}; |  | ||||||
|             } |  | ||||||
|             const auto pred_index = static_cast<u32>(instr.pred.pred_index); |  | ||||||
|             single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); |  | ||||||
|             if (single_branch.condition.predicate == Pred::NeverExecute) { |  | ||||||
|                 offset++; |  | ||||||
|                 continue; |  | ||||||
|             } |  | ||||||
|             const ConditionCode cc = instr.flow_condition_code; |  | ||||||
|             single_branch.condition.cc = cc; |  | ||||||
|             if (cc == ConditionCode::F) { |  | ||||||
|                 offset++; |  | ||||||
|                 continue; |  | ||||||
|             } |  | ||||||
|             const u32 branch_offset = offset + instr.bra.GetBranchTarget(); |  | ||||||
|             if (branch_offset == 0) { |  | ||||||
|                 single_branch.address = exit_branch; |  | ||||||
|             } else { |  | ||||||
|                 single_branch.address = branch_offset; |  | ||||||
|             } |  | ||||||
|             insert_label(state, branch_offset); |  | ||||||
|             single_branch.kill = false; |  | ||||||
|             single_branch.is_sync = false; |  | ||||||
|             single_branch.is_brk = false; |  | ||||||
|             single_branch.ignore = false; |  | ||||||
|             parse_info.end_address = offset; |  | ||||||
|             parse_info.branch_info = MakeBranchInfo<SingleBranch>( |  | ||||||
|                 single_branch.condition, single_branch.address, single_branch.kill, |  | ||||||
|                 single_branch.is_sync, single_branch.is_brk, single_branch.ignore); |  | ||||||
| 
 |  | ||||||
|             return {ParseResult::ControlCaught, parse_info}; |  | ||||||
|         } |  | ||||||
|         case OpCode::Id::SYNC: { |  | ||||||
|             const auto pred_index = static_cast<u32>(instr.pred.pred_index); |  | ||||||
|             single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); |  | ||||||
|             if (single_branch.condition.predicate == Pred::NeverExecute) { |  | ||||||
|                 offset++; |  | ||||||
|                 continue; |  | ||||||
|             } |  | ||||||
|             const ConditionCode cc = instr.flow_condition_code; |  | ||||||
|             single_branch.condition.cc = cc; |  | ||||||
|             if (cc == ConditionCode::F) { |  | ||||||
|                 offset++; |  | ||||||
|                 continue; |  | ||||||
|             } |  | ||||||
|             single_branch.address = unassigned_branch; |  | ||||||
|             single_branch.kill = false; |  | ||||||
|             single_branch.is_sync = true; |  | ||||||
|             single_branch.is_brk = false; |  | ||||||
|             single_branch.ignore = false; |  | ||||||
|             parse_info.end_address = offset; |  | ||||||
|             parse_info.branch_info = MakeBranchInfo<SingleBranch>( |  | ||||||
|                 single_branch.condition, single_branch.address, single_branch.kill, |  | ||||||
|                 single_branch.is_sync, single_branch.is_brk, single_branch.ignore); |  | ||||||
| 
 |  | ||||||
|             return {ParseResult::ControlCaught, parse_info}; |  | ||||||
|         } |  | ||||||
|         case OpCode::Id::BRK: { |  | ||||||
|             const auto pred_index = static_cast<u32>(instr.pred.pred_index); |  | ||||||
|             single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); |  | ||||||
|             if (single_branch.condition.predicate == Pred::NeverExecute) { |  | ||||||
|                 offset++; |  | ||||||
|                 continue; |  | ||||||
|             } |  | ||||||
|             const ConditionCode cc = instr.flow_condition_code; |  | ||||||
|             single_branch.condition.cc = cc; |  | ||||||
|             if (cc == ConditionCode::F) { |  | ||||||
|                 offset++; |  | ||||||
|                 continue; |  | ||||||
|             } |  | ||||||
|             single_branch.address = unassigned_branch; |  | ||||||
|             single_branch.kill = false; |  | ||||||
|             single_branch.is_sync = false; |  | ||||||
|             single_branch.is_brk = true; |  | ||||||
|             single_branch.ignore = false; |  | ||||||
|             parse_info.end_address = offset; |  | ||||||
|             parse_info.branch_info = MakeBranchInfo<SingleBranch>( |  | ||||||
|                 single_branch.condition, single_branch.address, single_branch.kill, |  | ||||||
|                 single_branch.is_sync, single_branch.is_brk, single_branch.ignore); |  | ||||||
| 
 |  | ||||||
|             return {ParseResult::ControlCaught, parse_info}; |  | ||||||
|         } |  | ||||||
|         case OpCode::Id::KIL: { |  | ||||||
|             const auto pred_index = static_cast<u32>(instr.pred.pred_index); |  | ||||||
|             single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); |  | ||||||
|             if (single_branch.condition.predicate == Pred::NeverExecute) { |  | ||||||
|                 offset++; |  | ||||||
|                 continue; |  | ||||||
|             } |  | ||||||
|             const ConditionCode cc = instr.flow_condition_code; |  | ||||||
|             single_branch.condition.cc = cc; |  | ||||||
|             if (cc == ConditionCode::F) { |  | ||||||
|                 offset++; |  | ||||||
|                 continue; |  | ||||||
|             } |  | ||||||
|             single_branch.address = exit_branch; |  | ||||||
|             single_branch.kill = true; |  | ||||||
|             single_branch.is_sync = false; |  | ||||||
|             single_branch.is_brk = false; |  | ||||||
|             single_branch.ignore = false; |  | ||||||
|             parse_info.end_address = offset; |  | ||||||
|             parse_info.branch_info = MakeBranchInfo<SingleBranch>( |  | ||||||
|                 single_branch.condition, single_branch.address, single_branch.kill, |  | ||||||
|                 single_branch.is_sync, single_branch.is_brk, single_branch.ignore); |  | ||||||
| 
 |  | ||||||
|             return {ParseResult::ControlCaught, parse_info}; |  | ||||||
|         } |  | ||||||
|         case OpCode::Id::SSY: { |  | ||||||
|             const u32 target = offset + instr.bra.GetBranchTarget(); |  | ||||||
|             insert_label(state, target); |  | ||||||
|             state.ssy_labels.emplace(offset, target); |  | ||||||
|             break; |  | ||||||
|         } |  | ||||||
|         case OpCode::Id::PBK: { |  | ||||||
|             const u32 target = offset + instr.bra.GetBranchTarget(); |  | ||||||
|             insert_label(state, target); |  | ||||||
|             state.pbk_labels.emplace(offset, target); |  | ||||||
|             break; |  | ||||||
|         } |  | ||||||
|         case OpCode::Id::BRX: { |  | ||||||
|             const auto tmp = TrackBranchIndirectInfo(state, offset); |  | ||||||
|             if (!tmp) { |  | ||||||
|                 LOG_WARNING(HW_GPU, "BRX Track Unsuccesful"); |  | ||||||
|                 return {ParseResult::AbnormalFlow, parse_info}; |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             const auto result = *tmp; |  | ||||||
|             const s32 pc_target = offset + result.relative_position; |  | ||||||
|             std::vector<CaseBranch> branches; |  | ||||||
|             for (u32 i = 0; i < result.entries; i++) { |  | ||||||
|                 auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4); |  | ||||||
|                 if (!key) { |  | ||||||
|                     return {ParseResult::AbnormalFlow, parse_info}; |  | ||||||
|                 } |  | ||||||
|                 u32 value = *key; |  | ||||||
|                 u32 target = static_cast<u32>((value >> 3) + pc_target); |  | ||||||
|                 insert_label(state, target); |  | ||||||
|                 branches.emplace_back(value, target); |  | ||||||
|             } |  | ||||||
|             parse_info.end_address = offset; |  | ||||||
|             parse_info.branch_info = MakeBranchInfo<MultiBranch>( |  | ||||||
|                 static_cast<u32>(instr.gpr8.Value()), std::move(branches)); |  | ||||||
| 
 |  | ||||||
|             return {ParseResult::ControlCaught, parse_info}; |  | ||||||
|         } |  | ||||||
|         default: |  | ||||||
|             break; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         offset++; |  | ||||||
|     } |  | ||||||
|     single_branch.kill = false; |  | ||||||
|     single_branch.is_sync = false; |  | ||||||
|     single_branch.is_brk = false; |  | ||||||
|     parse_info.end_address = offset - 1; |  | ||||||
|     parse_info.branch_info = MakeBranchInfo<SingleBranch>( |  | ||||||
|         single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync, |  | ||||||
|         single_branch.is_brk, single_branch.ignore); |  | ||||||
|     return {ParseResult::BlockEnd, parse_info}; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool TryInspectAddress(CFGRebuildState& state) { |  | ||||||
|     if (state.inspect_queries.empty()) { |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const u32 address = state.inspect_queries.front(); |  | ||||||
|     state.inspect_queries.pop_front(); |  | ||||||
|     const auto [result, block_index] = TryGetBlock(state, address); |  | ||||||
|     switch (result) { |  | ||||||
|     case BlockCollision::Found: { |  | ||||||
|         return true; |  | ||||||
|     } |  | ||||||
|     case BlockCollision::Inside: { |  | ||||||
|         // This case is the tricky one:
 |  | ||||||
|         // We need to split the block into 2 separate blocks
 |  | ||||||
|         const u32 end = state.block_info[block_index].end; |  | ||||||
|         BlockInfo& new_block = CreateBlockInfo(state, address, end); |  | ||||||
|         BlockInfo& current_block = state.block_info[block_index]; |  | ||||||
|         current_block.end = address - 1; |  | ||||||
|         new_block.branch = std::move(current_block.branch); |  | ||||||
|         BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>(); |  | ||||||
|         const auto branch = std::get_if<SingleBranch>(forward_branch.get()); |  | ||||||
|         branch->address = address; |  | ||||||
|         branch->ignore = true; |  | ||||||
|         current_block.branch = std::move(forward_branch); |  | ||||||
|         return true; |  | ||||||
|     } |  | ||||||
|     default: |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     const auto [parse_result, parse_info] = ParseCode(state, address); |  | ||||||
|     if (parse_result == ParseResult::AbnormalFlow) { |  | ||||||
|         // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction
 |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); |  | ||||||
|     block_info.branch = parse_info.branch_info; |  | ||||||
|     if (std::holds_alternative<SingleBranch>(*block_info.branch)) { |  | ||||||
|         const auto branch = std::get_if<SingleBranch>(block_info.branch.get()); |  | ||||||
|         if (branch->condition.IsUnconditional()) { |  | ||||||
|             return true; |  | ||||||
|         } |  | ||||||
|         const u32 fallthrough_address = parse_info.end_address + 1; |  | ||||||
|         state.inspect_queries.push_front(fallthrough_address); |  | ||||||
|         return true; |  | ||||||
|     } |  | ||||||
|     return true; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool TryQuery(CFGRebuildState& state) { |  | ||||||
|     const auto gather_labels = [](std::stack<u32>& cc, std::map<u32, u32>& labels, |  | ||||||
|                                   BlockInfo& block) { |  | ||||||
|         auto gather_start = labels.lower_bound(block.start); |  | ||||||
|         const auto gather_end = labels.upper_bound(block.end); |  | ||||||
|         while (gather_start != gather_end) { |  | ||||||
|             cc.push(gather_start->second); |  | ||||||
|             ++gather_start; |  | ||||||
|         } |  | ||||||
|     }; |  | ||||||
|     if (state.queries.empty()) { |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     Query& q = state.queries.front(); |  | ||||||
|     const u32 block_index = state.registered[q.address]; |  | ||||||
|     BlockInfo& block = state.block_info[block_index]; |  | ||||||
|     // If the block is visited, check if the stacks match, else gather the ssy/pbk
 |  | ||||||
|     // labels into the current stack and look if the branch at the end of the block
 |  | ||||||
|     // consumes a label. Schedule new queries accordingly
 |  | ||||||
|     if (block.visited) { |  | ||||||
|         BlockStack& stack = state.stacks[q.address]; |  | ||||||
|         const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) && |  | ||||||
|                               (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack); |  | ||||||
|         state.queries.pop_front(); |  | ||||||
|         return all_okay; |  | ||||||
|     } |  | ||||||
|     block.visited = true; |  | ||||||
|     state.stacks.insert_or_assign(q.address, BlockStack{q}); |  | ||||||
| 
 |  | ||||||
|     Query q2(q); |  | ||||||
|     state.queries.pop_front(); |  | ||||||
|     gather_labels(q2.ssy_stack, state.ssy_labels, block); |  | ||||||
|     gather_labels(q2.pbk_stack, state.pbk_labels, block); |  | ||||||
|     if (std::holds_alternative<SingleBranch>(*block.branch)) { |  | ||||||
|         auto* branch = std::get_if<SingleBranch>(block.branch.get()); |  | ||||||
|         if (!branch->condition.IsUnconditional()) { |  | ||||||
|             q2.address = block.end + 1; |  | ||||||
|             state.queries.push_back(q2); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         auto& conditional_query = state.queries.emplace_back(q2); |  | ||||||
|         if (branch->is_sync) { |  | ||||||
|             if (branch->address == unassigned_branch) { |  | ||||||
|                 branch->address = conditional_query.ssy_stack.top(); |  | ||||||
|             } |  | ||||||
|             conditional_query.ssy_stack.pop(); |  | ||||||
|         } |  | ||||||
|         if (branch->is_brk) { |  | ||||||
|             if (branch->address == unassigned_branch) { |  | ||||||
|                 branch->address = conditional_query.pbk_stack.top(); |  | ||||||
|             } |  | ||||||
|             conditional_query.pbk_stack.pop(); |  | ||||||
|         } |  | ||||||
|         conditional_query.address = branch->address; |  | ||||||
|         return true; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const auto* multi_branch = std::get_if<MultiBranch>(block.branch.get()); |  | ||||||
|     for (const auto& branch_case : multi_branch->branches) { |  | ||||||
|         auto& conditional_query = state.queries.emplace_back(q2); |  | ||||||
|         conditional_query.address = branch_case.address; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return true; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { |  | ||||||
|     const auto get_expr = [](const Condition& cond) -> Expr { |  | ||||||
|         Expr result; |  | ||||||
|         if (cond.cc != ConditionCode::T) { |  | ||||||
|             result = MakeExpr<ExprCondCode>(cond.cc); |  | ||||||
|         } |  | ||||||
|         if (cond.predicate != Pred::UnusedIndex) { |  | ||||||
|             u32 pred = static_cast<u32>(cond.predicate); |  | ||||||
|             bool negate = false; |  | ||||||
|             if (pred > 7) { |  | ||||||
|                 negate = true; |  | ||||||
|                 pred -= 8; |  | ||||||
|             } |  | ||||||
|             Expr extra = MakeExpr<ExprPredicate>(pred); |  | ||||||
|             if (negate) { |  | ||||||
|                 extra = MakeExpr<ExprNot>(std::move(extra)); |  | ||||||
|             } |  | ||||||
|             if (result) { |  | ||||||
|                 return MakeExpr<ExprAnd>(std::move(extra), std::move(result)); |  | ||||||
|             } |  | ||||||
|             return extra; |  | ||||||
|         } |  | ||||||
|         if (result) { |  | ||||||
|             return result; |  | ||||||
|         } |  | ||||||
|         return MakeExpr<ExprBoolean>(true); |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     if (std::holds_alternative<SingleBranch>(*branch_info)) { |  | ||||||
|         const auto* branch = std::get_if<SingleBranch>(branch_info.get()); |  | ||||||
|         if (branch->address < 0) { |  | ||||||
|             if (branch->kill) { |  | ||||||
|                 mm.InsertReturn(get_expr(branch->condition), true); |  | ||||||
|                 return; |  | ||||||
|             } |  | ||||||
|             mm.InsertReturn(get_expr(branch->condition), false); |  | ||||||
|             return; |  | ||||||
|         } |  | ||||||
|         mm.InsertGoto(get_expr(branch->condition), branch->address); |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|     const auto* multi_branch = std::get_if<MultiBranch>(branch_info.get()); |  | ||||||
|     for (const auto& branch_case : multi_branch->branches) { |  | ||||||
|         mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value), |  | ||||||
|                       branch_case.address); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void DecompileShader(CFGRebuildState& state) { |  | ||||||
|     state.manager->Init(); |  | ||||||
|     for (auto label : state.labels) { |  | ||||||
|         state.manager->DeclareLabel(label); |  | ||||||
|     } |  | ||||||
|     for (const auto& block : state.block_info) { |  | ||||||
|         if (state.labels.contains(block.start)) { |  | ||||||
|             state.manager->InsertLabel(block.start); |  | ||||||
|         } |  | ||||||
|         const bool ignore = BlockBranchIsIgnored(block.branch); |  | ||||||
|         const u32 end = ignore ? block.end + 1 : block.end; |  | ||||||
|         state.manager->InsertBlock(block.start, end); |  | ||||||
|         if (!ignore) { |  | ||||||
|             InsertBranch(*state.manager, block.branch); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     state.manager->Decompile(); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // Anonymous namespace
 |  | ||||||
| 
 |  | ||||||
| std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, |  | ||||||
|                                                 const CompilerSettings& settings, |  | ||||||
|                                                 Registry& registry) { |  | ||||||
|     auto result_out = std::make_unique<ShaderCharacteristics>(); |  | ||||||
|     if (settings.depth == CompileDepth::BruteForce) { |  | ||||||
|         result_out->settings.depth = CompileDepth::BruteForce; |  | ||||||
|         return result_out; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     CFGRebuildState state{program_code, start_address, registry}; |  | ||||||
|     // Inspect Code and generate blocks
 |  | ||||||
|     state.labels.clear(); |  | ||||||
|     state.labels.emplace(start_address); |  | ||||||
|     state.inspect_queries.push_back(state.start); |  | ||||||
|     while (!state.inspect_queries.empty()) { |  | ||||||
|         if (!TryInspectAddress(state)) { |  | ||||||
|             result_out->settings.depth = CompileDepth::BruteForce; |  | ||||||
|             return result_out; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool use_flow_stack = true; |  | ||||||
| 
 |  | ||||||
|     bool decompiled = false; |  | ||||||
| 
 |  | ||||||
|     if (settings.depth != CompileDepth::FlowStack) { |  | ||||||
|         // Decompile Stacks
 |  | ||||||
|         state.queries.push_back(Query{state.start, {}, {}}); |  | ||||||
|         decompiled = true; |  | ||||||
|         while (!state.queries.empty()) { |  | ||||||
|             if (!TryQuery(state)) { |  | ||||||
|                 decompiled = false; |  | ||||||
|                 break; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     use_flow_stack = !decompiled; |  | ||||||
| 
 |  | ||||||
|     // Sort and organize results
 |  | ||||||
|     std::sort(state.block_info.begin(), state.block_info.end(), |  | ||||||
|               [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; }); |  | ||||||
|     if (decompiled && settings.depth != CompileDepth::NoFlowStack) { |  | ||||||
|         ASTManager manager{settings.depth != CompileDepth::DecompileBackwards, |  | ||||||
|                            settings.disable_else_derivation}; |  | ||||||
|         state.manager = &manager; |  | ||||||
|         DecompileShader(state); |  | ||||||
|         decompiled = state.manager->IsFullyDecompiled(); |  | ||||||
|         if (!decompiled) { |  | ||||||
|             if (settings.depth == CompileDepth::FullDecompile) { |  | ||||||
|                 LOG_CRITICAL(HW_GPU, "Failed to remove all the gotos!:"); |  | ||||||
|             } else { |  | ||||||
|                 LOG_CRITICAL(HW_GPU, "Failed to remove all backward gotos!:"); |  | ||||||
|             } |  | ||||||
|             state.manager->ShowCurrentState("Of Shader"); |  | ||||||
|             state.manager->Clear(); |  | ||||||
|         } else { |  | ||||||
|             auto characteristics = std::make_unique<ShaderCharacteristics>(); |  | ||||||
|             characteristics->start = start_address; |  | ||||||
|             characteristics->settings.depth = settings.depth; |  | ||||||
|             characteristics->manager = std::move(manager); |  | ||||||
|             characteristics->end = state.block_info.back().end + 1; |  | ||||||
|             return characteristics; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     result_out->start = start_address; |  | ||||||
|     result_out->settings.depth = |  | ||||||
|         use_flow_stack ? CompileDepth::FlowStack : CompileDepth::NoFlowStack; |  | ||||||
|     result_out->blocks.clear(); |  | ||||||
|     for (auto& block : state.block_info) { |  | ||||||
|         ShaderBlock new_block{}; |  | ||||||
|         new_block.start = block.start; |  | ||||||
|         new_block.end = block.end; |  | ||||||
|         new_block.ignore_branch = BlockBranchIsIgnored(block.branch); |  | ||||||
|         if (!new_block.ignore_branch) { |  | ||||||
|             new_block.branch = block.branch; |  | ||||||
|         } |  | ||||||
|         result_out->end = std::max(result_out->end, block.end); |  | ||||||
|         result_out->blocks.push_back(new_block); |  | ||||||
|     } |  | ||||||
|     if (!use_flow_stack) { |  | ||||||
|         result_out->labels = std::move(state.labels); |  | ||||||
|         return result_out; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     auto back = result_out->blocks.begin(); |  | ||||||
|     auto next = std::next(back); |  | ||||||
|     while (next != result_out->blocks.end()) { |  | ||||||
|         if (!state.labels.contains(next->start) && next->start == back->end + 1) { |  | ||||||
|             back->end = next->end; |  | ||||||
|             next = result_out->blocks.erase(next); |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
|         back = next; |  | ||||||
|         ++next; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return result_out; |  | ||||||
| } |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,117 +0,0 @@ | ||||||
| // Copyright 2019 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #pragma once |  | ||||||
| 
 |  | ||||||
| #include <list> |  | ||||||
| #include <optional> |  | ||||||
| #include <set> |  | ||||||
| #include <variant> |  | ||||||
| 
 |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/ast.h" |  | ||||||
| #include "video_core/shader/compiler_settings.h" |  | ||||||
| #include "video_core/shader/registry.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using Tegra::Shader::ConditionCode; |  | ||||||
| using Tegra::Shader::Pred; |  | ||||||
| 
 |  | ||||||
| constexpr s32 exit_branch = -1; |  | ||||||
| 
 |  | ||||||
| struct Condition { |  | ||||||
|     Pred predicate{Pred::UnusedIndex}; |  | ||||||
|     ConditionCode cc{ConditionCode::T}; |  | ||||||
| 
 |  | ||||||
|     bool IsUnconditional() const { |  | ||||||
|         return predicate == Pred::UnusedIndex && cc == ConditionCode::T; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool operator==(const Condition& other) const { |  | ||||||
|         return std::tie(predicate, cc) == std::tie(other.predicate, other.cc); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool operator!=(const Condition& other) const { |  | ||||||
|         return !operator==(other); |  | ||||||
|     } |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| class SingleBranch { |  | ||||||
| public: |  | ||||||
|     SingleBranch() = default; |  | ||||||
|     explicit SingleBranch(Condition condition_, s32 address_, bool kill_, bool is_sync_, |  | ||||||
|                           bool is_brk_, bool ignore_) |  | ||||||
|         : condition{condition_}, address{address_}, kill{kill_}, is_sync{is_sync_}, is_brk{is_brk_}, |  | ||||||
|           ignore{ignore_} {} |  | ||||||
| 
 |  | ||||||
|     bool operator==(const SingleBranch& b) const { |  | ||||||
|         return std::tie(condition, address, kill, is_sync, is_brk, ignore) == |  | ||||||
|                std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool operator!=(const SingleBranch& b) const { |  | ||||||
|         return !operator==(b); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     Condition condition{}; |  | ||||||
|     s32 address{exit_branch}; |  | ||||||
|     bool kill{}; |  | ||||||
|     bool is_sync{}; |  | ||||||
|     bool is_brk{}; |  | ||||||
|     bool ignore{}; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| struct CaseBranch { |  | ||||||
|     explicit CaseBranch(u32 cmp_value_, u32 address_) : cmp_value{cmp_value_}, address{address_} {} |  | ||||||
|     u32 cmp_value; |  | ||||||
|     u32 address; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| class MultiBranch { |  | ||||||
| public: |  | ||||||
|     explicit MultiBranch(u32 gpr_, std::vector<CaseBranch>&& branches_) |  | ||||||
|         : gpr{gpr_}, branches{std::move(branches_)} {} |  | ||||||
| 
 |  | ||||||
|     u32 gpr{}; |  | ||||||
|     std::vector<CaseBranch> branches{}; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| using BranchData = std::variant<SingleBranch, MultiBranch>; |  | ||||||
| using BlockBranchInfo = std::shared_ptr<BranchData>; |  | ||||||
| 
 |  | ||||||
| bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second); |  | ||||||
| 
 |  | ||||||
| struct ShaderBlock { |  | ||||||
|     u32 start{}; |  | ||||||
|     u32 end{}; |  | ||||||
|     bool ignore_branch{}; |  | ||||||
|     BlockBranchInfo branch{}; |  | ||||||
| 
 |  | ||||||
|     bool operator==(const ShaderBlock& sb) const { |  | ||||||
|         return std::tie(start, end, ignore_branch) == |  | ||||||
|                    std::tie(sb.start, sb.end, sb.ignore_branch) && |  | ||||||
|                BlockBranchInfoAreEqual(branch, sb.branch); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool operator!=(const ShaderBlock& sb) const { |  | ||||||
|         return !operator==(sb); |  | ||||||
|     } |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| struct ShaderCharacteristics { |  | ||||||
|     std::list<ShaderBlock> blocks{}; |  | ||||||
|     std::set<u32> labels{}; |  | ||||||
|     u32 start{}; |  | ||||||
|     u32 end{}; |  | ||||||
|     ASTManager manager{true, true}; |  | ||||||
|     CompilerSettings settings{}; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, |  | ||||||
|                                                 const CompilerSettings& settings, |  | ||||||
|                                                 Registry& registry); |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,368 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include <cstring> |  | ||||||
| #include <limits> |  | ||||||
| #include <set> |  | ||||||
| 
 |  | ||||||
| #include <fmt/format.h> |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/engines/shader_header.h" |  | ||||||
| #include "video_core/shader/control_flow.h" |  | ||||||
| #include "video_core/shader/memory_util.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| 
 |  | ||||||
| namespace { |  | ||||||
| 
 |  | ||||||
| void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, |  | ||||||
|                               const std::list<SamplerEntry>& used_samplers) { |  | ||||||
|     if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|     u32 count{}; |  | ||||||
|     std::vector<u32> bound_offsets; |  | ||||||
|     for (const auto& sampler : used_samplers) { |  | ||||||
|         if (sampler.is_bindless) { |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
|         ++count; |  | ||||||
|         bound_offsets.emplace_back(sampler.offset); |  | ||||||
|     } |  | ||||||
|     if (count > 1) { |  | ||||||
|         gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets)); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::optional<u32> TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce, |  | ||||||
|                                         VideoCore::GuestDriverProfile& gpu_driver, |  | ||||||
|                                         const std::list<SamplerEntry>& used_samplers) { |  | ||||||
|     const u32 base_offset = sampler_to_deduce.offset; |  | ||||||
|     u32 max_offset{std::numeric_limits<u32>::max()}; |  | ||||||
|     for (const auto& sampler : used_samplers) { |  | ||||||
|         if (sampler.is_bindless) { |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
|         if (sampler.offset > base_offset) { |  | ||||||
|             max_offset = std::min(sampler.offset, max_offset); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     if (max_offset == std::numeric_limits<u32>::max()) { |  | ||||||
|         return std::nullopt; |  | ||||||
|     } |  | ||||||
|     return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize(); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // Anonymous namespace
 |  | ||||||
| 
 |  | ||||||
| class ASTDecoder { |  | ||||||
| public: |  | ||||||
|     explicit ASTDecoder(ShaderIR& ir_) : ir(ir_) {} |  | ||||||
| 
 |  | ||||||
|     void operator()(ASTProgram& ast) { |  | ||||||
|         ASTNode current = ast.nodes.GetFirst(); |  | ||||||
|         while (current) { |  | ||||||
|             Visit(current); |  | ||||||
|             current = current->GetNext(); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void operator()(ASTIfThen& ast) { |  | ||||||
|         ASTNode current = ast.nodes.GetFirst(); |  | ||||||
|         while (current) { |  | ||||||
|             Visit(current); |  | ||||||
|             current = current->GetNext(); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void operator()(ASTIfElse& ast) { |  | ||||||
|         ASTNode current = ast.nodes.GetFirst(); |  | ||||||
|         while (current) { |  | ||||||
|             Visit(current); |  | ||||||
|             current = current->GetNext(); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void operator()(ASTBlockEncoded& ast) {} |  | ||||||
| 
 |  | ||||||
|     void operator()(ASTBlockDecoded& ast) {} |  | ||||||
| 
 |  | ||||||
|     void operator()(ASTVarSet& ast) {} |  | ||||||
| 
 |  | ||||||
|     void operator()(ASTLabel& ast) {} |  | ||||||
| 
 |  | ||||||
|     void operator()(ASTGoto& ast) {} |  | ||||||
| 
 |  | ||||||
|     void operator()(ASTDoWhile& ast) { |  | ||||||
|         ASTNode current = ast.nodes.GetFirst(); |  | ||||||
|         while (current) { |  | ||||||
|             Visit(current); |  | ||||||
|             current = current->GetNext(); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void operator()(ASTReturn& ast) {} |  | ||||||
| 
 |  | ||||||
|     void operator()(ASTBreak& ast) {} |  | ||||||
| 
 |  | ||||||
|     void Visit(ASTNode& node) { |  | ||||||
|         std::visit(*this, *node->GetInnerData()); |  | ||||||
|         if (node->IsBlockEncoded()) { |  | ||||||
|             auto block = std::get_if<ASTBlockEncoded>(node->GetInnerData()); |  | ||||||
|             NodeBlock bb = ir.DecodeRange(block->start, block->end); |  | ||||||
|             node->TransformBlockEncoded(std::move(bb)); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     ShaderIR& ir; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| void ShaderIR::Decode() { |  | ||||||
|     std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); |  | ||||||
| 
 |  | ||||||
|     decompiled = false; |  | ||||||
|     auto info = ScanFlow(program_code, main_offset, settings, registry); |  | ||||||
|     auto& shader_info = *info; |  | ||||||
|     coverage_begin = shader_info.start; |  | ||||||
|     coverage_end = shader_info.end; |  | ||||||
|     switch (shader_info.settings.depth) { |  | ||||||
|     case CompileDepth::FlowStack: { |  | ||||||
|         for (const auto& block : shader_info.blocks) { |  | ||||||
|             basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case CompileDepth::NoFlowStack: { |  | ||||||
|         disable_flow_stack = true; |  | ||||||
|         const auto insert_block = [this](NodeBlock& nodes, u32 label) { |  | ||||||
|             if (label == static_cast<u32>(exit_branch)) { |  | ||||||
|                 return; |  | ||||||
|             } |  | ||||||
|             basic_blocks.insert({label, nodes}); |  | ||||||
|         }; |  | ||||||
|         const auto& blocks = shader_info.blocks; |  | ||||||
|         NodeBlock current_block; |  | ||||||
|         u32 current_label = static_cast<u32>(exit_branch); |  | ||||||
|         for (const auto& block : blocks) { |  | ||||||
|             if (shader_info.labels.contains(block.start)) { |  | ||||||
|                 insert_block(current_block, current_label); |  | ||||||
|                 current_block.clear(); |  | ||||||
|                 current_label = block.start; |  | ||||||
|             } |  | ||||||
|             if (!block.ignore_branch) { |  | ||||||
|                 DecodeRangeInner(current_block, block.start, block.end); |  | ||||||
|                 InsertControlFlow(current_block, block); |  | ||||||
|             } else { |  | ||||||
|                 DecodeRangeInner(current_block, block.start, block.end + 1); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|         insert_block(current_block, current_label); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case CompileDepth::DecompileBackwards: |  | ||||||
|     case CompileDepth::FullDecompile: { |  | ||||||
|         program_manager = std::move(shader_info.manager); |  | ||||||
|         disable_flow_stack = true; |  | ||||||
|         decompiled = true; |  | ||||||
|         ASTDecoder decoder{*this}; |  | ||||||
|         ASTNode program = GetASTProgram(); |  | ||||||
|         decoder.Visit(program); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     default: |  | ||||||
|         LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!"); |  | ||||||
|         [[fallthrough]]; |  | ||||||
|     case CompileDepth::BruteForce: { |  | ||||||
|         const auto shader_end = static_cast<u32>(program_code.size()); |  | ||||||
|         coverage_begin = main_offset; |  | ||||||
|         coverage_end = shader_end; |  | ||||||
|         for (u32 label = main_offset; label < shader_end; ++label) { |  | ||||||
|             basic_blocks.insert({label, DecodeRange(label, label + 1)}); |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     } |  | ||||||
|     if (settings.depth != shader_info.settings.depth) { |  | ||||||
|         LOG_WARNING( |  | ||||||
|             HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"", |  | ||||||
|             CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth)); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { |  | ||||||
|     NodeBlock basic_block; |  | ||||||
|     DecodeRangeInner(basic_block, begin, end); |  | ||||||
|     return basic_block; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) { |  | ||||||
|     for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { |  | ||||||
|         pc = DecodeInstr(bb, pc); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { |  | ||||||
|     const auto apply_conditions = [&](const Condition& cond, Node n) -> Node { |  | ||||||
|         Node result = n; |  | ||||||
|         if (cond.cc != ConditionCode::T) { |  | ||||||
|             result = Conditional(GetConditionCode(cond.cc), {result}); |  | ||||||
|         } |  | ||||||
|         if (cond.predicate != Pred::UnusedIndex) { |  | ||||||
|             u32 pred = static_cast<u32>(cond.predicate); |  | ||||||
|             const bool is_neg = pred > 7; |  | ||||||
|             if (is_neg) { |  | ||||||
|                 pred -= 8; |  | ||||||
|             } |  | ||||||
|             result = Conditional(GetPredicate(pred, is_neg), {result}); |  | ||||||
|         } |  | ||||||
|         return result; |  | ||||||
|     }; |  | ||||||
|     if (std::holds_alternative<SingleBranch>(*block.branch)) { |  | ||||||
|         auto branch = std::get_if<SingleBranch>(block.branch.get()); |  | ||||||
|         if (branch->address < 0) { |  | ||||||
|             if (branch->kill) { |  | ||||||
|                 Node n = Operation(OperationCode::Discard); |  | ||||||
|                 n = apply_conditions(branch->condition, n); |  | ||||||
|                 bb.push_back(n); |  | ||||||
|                 global_code.push_back(n); |  | ||||||
|                 return; |  | ||||||
|             } |  | ||||||
|             Node n = Operation(OperationCode::Exit); |  | ||||||
|             n = apply_conditions(branch->condition, n); |  | ||||||
|             bb.push_back(n); |  | ||||||
|             global_code.push_back(n); |  | ||||||
|             return; |  | ||||||
|         } |  | ||||||
|         Node n = Operation(OperationCode::Branch, Immediate(branch->address)); |  | ||||||
|         n = apply_conditions(branch->condition, n); |  | ||||||
|         bb.push_back(n); |  | ||||||
|         global_code.push_back(n); |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|     auto multi_branch = std::get_if<MultiBranch>(block.branch.get()); |  | ||||||
|     Node op_a = GetRegister(multi_branch->gpr); |  | ||||||
|     for (auto& branch_case : multi_branch->branches) { |  | ||||||
|         Node n = Operation(OperationCode::Branch, Immediate(branch_case.address)); |  | ||||||
|         Node op_b = Immediate(branch_case.cmp_value); |  | ||||||
|         Node condition = |  | ||||||
|             GetPredicateComparisonInteger(Tegra::Shader::PredCondition::EQ, false, op_a, op_b); |  | ||||||
|         auto result = Conditional(condition, {n}); |  | ||||||
|         bb.push_back(result); |  | ||||||
|         global_code.push_back(result); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { |  | ||||||
|     // Ignore sched instructions when generating code.
 |  | ||||||
|     if (IsSchedInstruction(pc, main_offset)) { |  | ||||||
|         return pc + 1; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const Instruction instr = {program_code[pc]}; |  | ||||||
|     const auto opcode = OpCode::Decode(instr); |  | ||||||
|     const u32 nv_address = ConvertAddressToNvidiaSpace(pc); |  | ||||||
| 
 |  | ||||||
|     // Decoding failure
 |  | ||||||
|     if (!opcode) { |  | ||||||
|         UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); |  | ||||||
|         bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})", |  | ||||||
|                                          nv_address, instr.value))); |  | ||||||
|         return pc + 1; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bb.push_back(Comment( |  | ||||||
|         fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value))); |  | ||||||
| 
 |  | ||||||
|     using Tegra::Shader::Pred; |  | ||||||
|     UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, |  | ||||||
|                          "NeverExecute predicate not implemented"); |  | ||||||
| 
 |  | ||||||
|     static const std::map<OpCode::Type, u32 (ShaderIR::*)(NodeBlock&, u32)> decoders = { |  | ||||||
|         {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic}, |  | ||||||
|         {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate}, |  | ||||||
|         {OpCode::Type::Bfe, &ShaderIR::DecodeBfe}, |  | ||||||
|         {OpCode::Type::Bfi, &ShaderIR::DecodeBfi}, |  | ||||||
|         {OpCode::Type::Shift, &ShaderIR::DecodeShift}, |  | ||||||
|         {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger}, |  | ||||||
|         {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate}, |  | ||||||
|         {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf}, |  | ||||||
|         {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate}, |  | ||||||
|         {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, |  | ||||||
|         {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, |  | ||||||
|         {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, |  | ||||||
|         {OpCode::Type::Warp, &ShaderIR::DecodeWarp}, |  | ||||||
|         {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, |  | ||||||
|         {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, |  | ||||||
|         {OpCode::Type::Image, &ShaderIR::DecodeImage}, |  | ||||||
|         {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, |  | ||||||
|         {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, |  | ||||||
|         {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, |  | ||||||
|         {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister}, |  | ||||||
|         {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate}, |  | ||||||
|         {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate}, |  | ||||||
|         {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet}, |  | ||||||
|         {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet}, |  | ||||||
|         {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet}, |  | ||||||
|         {OpCode::Type::Video, &ShaderIR::DecodeVideo}, |  | ||||||
|         {OpCode::Type::Xmad, &ShaderIR::DecodeXmad}, |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     std::vector<Node> tmp_block; |  | ||||||
|     if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) { |  | ||||||
|         pc = (this->*decoder->second)(tmp_block, pc); |  | ||||||
|     } else { |  | ||||||
|         pc = DecodeOther(tmp_block, pc); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     // Some instructions (like SSY) don't have a predicate field, they are always unconditionally
 |  | ||||||
|     // executed.
 |  | ||||||
|     const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId()); |  | ||||||
|     const auto pred_index = static_cast<u32>(instr.pred.pred_index); |  | ||||||
| 
 |  | ||||||
|     if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) { |  | ||||||
|         const Node conditional = |  | ||||||
|             Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block)); |  | ||||||
|         global_code.push_back(conditional); |  | ||||||
|         bb.push_back(conditional); |  | ||||||
|     } else { |  | ||||||
|         for (auto& node : tmp_block) { |  | ||||||
|             global_code.push_back(node); |  | ||||||
|             bb.push_back(node); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return pc + 1; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ShaderIR::PostDecode() { |  | ||||||
|     // Deduce texture handler size if needed
 |  | ||||||
|     auto gpu_driver = registry.AccessGuestDriverProfile(); |  | ||||||
|     DeduceTextureHandlerSize(gpu_driver, used_samplers); |  | ||||||
|     // Deduce Indexed Samplers
 |  | ||||||
|     if (!uses_indexed_samplers) { |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|     for (auto& sampler : used_samplers) { |  | ||||||
|         if (!sampler.is_indexed) { |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
|         if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) { |  | ||||||
|             sampler.size = *size; |  | ||||||
|         } else { |  | ||||||
|             LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler"); |  | ||||||
|             sampler.size = 1; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,166 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "common/logging/log.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| using Tegra::Shader::SubOp; |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { |  | ||||||
|     const Instruction instr = {program_code[pc]}; |  | ||||||
|     const auto opcode = OpCode::Decode(instr); |  | ||||||
| 
 |  | ||||||
|     Node op_a = GetRegister(instr.gpr8); |  | ||||||
| 
 |  | ||||||
|     Node op_b = [&] { |  | ||||||
|         if (instr.is_b_imm) { |  | ||||||
|             return GetImmediate19(instr); |  | ||||||
|         } else if (instr.is_b_gpr) { |  | ||||||
|             return GetRegister(instr.gpr20); |  | ||||||
|         } else { |  | ||||||
|             return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); |  | ||||||
|         } |  | ||||||
|     }(); |  | ||||||
| 
 |  | ||||||
|     switch (opcode->get().GetId()) { |  | ||||||
|     case OpCode::Id::MOV_C: |  | ||||||
|     case OpCode::Id::MOV_R: { |  | ||||||
|         // MOV does not have neither 'abs' nor 'neg' bits.
 |  | ||||||
|         SetRegister(bb, instr.gpr0, op_b); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::FMUL_C: |  | ||||||
|     case OpCode::Id::FMUL_R: |  | ||||||
|     case OpCode::Id::FMUL_IMM: { |  | ||||||
|         // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
 |  | ||||||
|         if (instr.fmul.tab5cb8_2 != 0) { |  | ||||||
|             LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented", |  | ||||||
|                       instr.fmul.tab5cb8_2.Value()); |  | ||||||
|         } |  | ||||||
|         if (instr.fmul.tab5c68_0 != 1) { |  | ||||||
|             LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented", |  | ||||||
|                       instr.fmul.tab5c68_0.Value()); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); |  | ||||||
| 
 |  | ||||||
|         static constexpr std::array FmulPostFactor = { |  | ||||||
|             1.000f, // None
 |  | ||||||
|             0.500f, // Divide 2
 |  | ||||||
|             0.250f, // Divide 4
 |  | ||||||
|             0.125f, // Divide 8
 |  | ||||||
|             8.000f, // Mul 8
 |  | ||||||
|             4.000f, // Mul 4
 |  | ||||||
|             2.000f, // Mul 2
 |  | ||||||
|         }; |  | ||||||
| 
 |  | ||||||
|         if (instr.fmul.postfactor != 0) { |  | ||||||
|             op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a, |  | ||||||
|                              Immediate(FmulPostFactor[instr.fmul.postfactor])); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         // TODO(Rodrigo): Should precise be used when there's a postfactor?
 |  | ||||||
|         Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b); |  | ||||||
| 
 |  | ||||||
|         value = GetSaturatedFloat(value, instr.alu.saturate_d); |  | ||||||
| 
 |  | ||||||
|         SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |  | ||||||
|         SetRegister(bb, instr.gpr0, value); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::FADD_C: |  | ||||||
|     case OpCode::Id::FADD_R: |  | ||||||
|     case OpCode::Id::FADD_IMM: { |  | ||||||
|         op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); |  | ||||||
|         op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); |  | ||||||
| 
 |  | ||||||
|         Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); |  | ||||||
|         value = GetSaturatedFloat(value, instr.alu.saturate_d); |  | ||||||
| 
 |  | ||||||
|         SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |  | ||||||
|         SetRegister(bb, instr.gpr0, value); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::MUFU: { |  | ||||||
|         op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); |  | ||||||
| 
 |  | ||||||
|         Node value = [&]() { |  | ||||||
|             switch (instr.sub_op) { |  | ||||||
|             case SubOp::Cos: |  | ||||||
|                 return Operation(OperationCode::FCos, PRECISE, op_a); |  | ||||||
|             case SubOp::Sin: |  | ||||||
|                 return Operation(OperationCode::FSin, PRECISE, op_a); |  | ||||||
|             case SubOp::Ex2: |  | ||||||
|                 return Operation(OperationCode::FExp2, PRECISE, op_a); |  | ||||||
|             case SubOp::Lg2: |  | ||||||
|                 return Operation(OperationCode::FLog2, PRECISE, op_a); |  | ||||||
|             case SubOp::Rcp: |  | ||||||
|                 return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a); |  | ||||||
|             case SubOp::Rsq: |  | ||||||
|                 return Operation(OperationCode::FInverseSqrt, PRECISE, op_a); |  | ||||||
|             case SubOp::Sqrt: |  | ||||||
|                 return Operation(OperationCode::FSqrt, PRECISE, op_a); |  | ||||||
|             default: |  | ||||||
|                 UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", instr.sub_op.Value()); |  | ||||||
|                 return Immediate(0); |  | ||||||
|             } |  | ||||||
|         }(); |  | ||||||
|         value = GetSaturatedFloat(value, instr.alu.saturate_d); |  | ||||||
| 
 |  | ||||||
|         SetRegister(bb, instr.gpr0, value); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::FMNMX_C: |  | ||||||
|     case OpCode::Id::FMNMX_R: |  | ||||||
|     case OpCode::Id::FMNMX_IMM: { |  | ||||||
|         op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); |  | ||||||
|         op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); |  | ||||||
| 
 |  | ||||||
|         const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0); |  | ||||||
| 
 |  | ||||||
|         const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b); |  | ||||||
|         const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b); |  | ||||||
|         const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); |  | ||||||
| 
 |  | ||||||
|         SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |  | ||||||
|         SetRegister(bb, instr.gpr0, value); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::FCMP_RR: |  | ||||||
|     case OpCode::Id::FCMP_RC: |  | ||||||
|     case OpCode::Id::FCMP_IMMR: { |  | ||||||
|         UNIMPLEMENTED_IF(instr.fcmp.ftz == 0); |  | ||||||
|         Node op_c = GetRegister(instr.gpr39); |  | ||||||
|         Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f)); |  | ||||||
|         SetRegister( |  | ||||||
|             bb, instr.gpr0, |  | ||||||
|             Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b))); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::RRO_C: |  | ||||||
|     case OpCode::Id::RRO_R: |  | ||||||
|     case OpCode::Id::RRO_IMM: { |  | ||||||
|         LOG_DEBUG(HW_GPU, "(STUBBED) RRO used"); |  | ||||||
| 
 |  | ||||||
|         // Currently RRO is only implemented as a register move.
 |  | ||||||
|         op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); |  | ||||||
|         SetRegister(bb, instr.gpr0, op_b); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     default: |  | ||||||
|         UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName()); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return pc; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,101 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "common/logging/log.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using Tegra::Shader::HalfType; |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { |  | ||||||
|     const Instruction instr = {program_code[pc]}; |  | ||||||
|     const auto opcode = OpCode::Decode(instr); |  | ||||||
| 
 |  | ||||||
|     bool negate_a = false; |  | ||||||
|     bool negate_b = false; |  | ||||||
|     bool absolute_a = false; |  | ||||||
|     bool absolute_b = false; |  | ||||||
| 
 |  | ||||||
|     switch (opcode->get().GetId()) { |  | ||||||
|     case OpCode::Id::HADD2_R: |  | ||||||
|         if (instr.alu_half.ftz == 0) { |  | ||||||
|             LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); |  | ||||||
|         } |  | ||||||
|         negate_a = ((instr.value >> 43) & 1) != 0; |  | ||||||
|         negate_b = ((instr.value >> 31) & 1) != 0; |  | ||||||
|         absolute_a = ((instr.value >> 44) & 1) != 0; |  | ||||||
|         absolute_b = ((instr.value >> 30) & 1) != 0; |  | ||||||
|         break; |  | ||||||
|     case OpCode::Id::HADD2_C: |  | ||||||
|         if (instr.alu_half.ftz == 0) { |  | ||||||
|             LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); |  | ||||||
|         } |  | ||||||
|         negate_a = ((instr.value >> 43) & 1) != 0; |  | ||||||
|         negate_b = ((instr.value >> 56) & 1) != 0; |  | ||||||
|         absolute_a = ((instr.value >> 44) & 1) != 0; |  | ||||||
|         absolute_b = ((instr.value >> 54) & 1) != 0; |  | ||||||
|         break; |  | ||||||
|     case OpCode::Id::HMUL2_R: |  | ||||||
|         negate_a = ((instr.value >> 43) & 1) != 0; |  | ||||||
|         absolute_a = ((instr.value >> 44) & 1) != 0; |  | ||||||
|         absolute_b = ((instr.value >> 30) & 1) != 0; |  | ||||||
|         break; |  | ||||||
|     case OpCode::Id::HMUL2_C: |  | ||||||
|         negate_b = ((instr.value >> 31) & 1) != 0; |  | ||||||
|         absolute_a = ((instr.value >> 44) & 1) != 0; |  | ||||||
|         absolute_b = ((instr.value >> 54) & 1) != 0; |  | ||||||
|         break; |  | ||||||
|     default: |  | ||||||
|         UNREACHABLE(); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a); |  | ||||||
|     op_a = GetOperandAbsNegHalf(op_a, absolute_a, negate_a); |  | ||||||
| 
 |  | ||||||
|     auto [type_b, op_b] = [this, instr, opcode]() -> std::pair<HalfType, Node> { |  | ||||||
|         switch (opcode->get().GetId()) { |  | ||||||
|         case OpCode::Id::HADD2_C: |  | ||||||
|         case OpCode::Id::HMUL2_C: |  | ||||||
|             return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; |  | ||||||
|         case OpCode::Id::HADD2_R: |  | ||||||
|         case OpCode::Id::HMUL2_R: |  | ||||||
|             return {instr.alu_half.type_b, GetRegister(instr.gpr20)}; |  | ||||||
|         default: |  | ||||||
|             UNREACHABLE(); |  | ||||||
|             return {HalfType::F32, Immediate(0)}; |  | ||||||
|         } |  | ||||||
|     }(); |  | ||||||
|     op_b = UnpackHalfFloat(op_b, type_b); |  | ||||||
|     op_b = GetOperandAbsNegHalf(op_b, absolute_b, negate_b); |  | ||||||
| 
 |  | ||||||
|     Node value = [this, opcode, op_a, op_b = op_b] { |  | ||||||
|         switch (opcode->get().GetId()) { |  | ||||||
|         case OpCode::Id::HADD2_C: |  | ||||||
|         case OpCode::Id::HADD2_R: |  | ||||||
|             return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); |  | ||||||
|         case OpCode::Id::HMUL2_C: |  | ||||||
|         case OpCode::Id::HMUL2_R: |  | ||||||
|             return Operation(OperationCode::HMul, PRECISE, op_a, op_b); |  | ||||||
|         default: |  | ||||||
|             UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); |  | ||||||
|             return Immediate(0); |  | ||||||
|         } |  | ||||||
|     }(); |  | ||||||
|     value = GetSaturatedHalfFloat(value, instr.alu_half.saturate); |  | ||||||
|     value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge); |  | ||||||
| 
 |  | ||||||
|     SetRegister(bb, instr.gpr0, value); |  | ||||||
| 
 |  | ||||||
|     return pc; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,54 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "common/logging/log.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { |  | ||||||
|     const Instruction instr = {program_code[pc]}; |  | ||||||
|     const auto opcode = OpCode::Decode(instr); |  | ||||||
| 
 |  | ||||||
|     if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { |  | ||||||
|         if (instr.alu_half_imm.ftz == 0) { |  | ||||||
|             LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); |  | ||||||
|         } |  | ||||||
|     } else { |  | ||||||
|         if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) { |  | ||||||
|             LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); |  | ||||||
|     op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a); |  | ||||||
| 
 |  | ||||||
|     const Node op_b = UnpackHalfImmediate(instr, true); |  | ||||||
| 
 |  | ||||||
|     Node value = [&]() { |  | ||||||
|         switch (opcode->get().GetId()) { |  | ||||||
|         case OpCode::Id::HADD2_IMM: |  | ||||||
|             return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); |  | ||||||
|         case OpCode::Id::HMUL2_IMM: |  | ||||||
|             return Operation(OperationCode::HMul, PRECISE, op_a, op_b); |  | ||||||
|         default: |  | ||||||
|             UNREACHABLE(); |  | ||||||
|             return Immediate(0); |  | ||||||
|         } |  | ||||||
|     }(); |  | ||||||
| 
 |  | ||||||
|     value = GetSaturatedHalfFloat(value, instr.alu_half_imm.saturate); |  | ||||||
|     value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge); |  | ||||||
|     SetRegister(bb, instr.gpr0, value); |  | ||||||
|     return pc; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,53 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) { |  | ||||||
|     const Instruction instr = {program_code[pc]}; |  | ||||||
|     const auto opcode = OpCode::Decode(instr); |  | ||||||
| 
 |  | ||||||
|     switch (opcode->get().GetId()) { |  | ||||||
|     case OpCode::Id::MOV32_IMM: { |  | ||||||
|         SetRegister(bb, instr.gpr0, GetImmediate32(instr)); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::FMUL32_IMM: { |  | ||||||
|         Node value = |  | ||||||
|             Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr)); |  | ||||||
|         value = GetSaturatedFloat(value, instr.fmul32.saturate); |  | ||||||
| 
 |  | ||||||
|         SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); |  | ||||||
|         SetRegister(bb, instr.gpr0, value); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::FADD32I: { |  | ||||||
|         const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a, |  | ||||||
|                                                 instr.fadd32i.negate_a); |  | ||||||
|         const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b, |  | ||||||
|                                                 instr.fadd32i.negate_b); |  | ||||||
| 
 |  | ||||||
|         const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); |  | ||||||
|         SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); |  | ||||||
|         SetRegister(bb, instr.gpr0, value); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     default: |  | ||||||
|         UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}", |  | ||||||
|                           opcode->get().GetName()); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return pc; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,375 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using Tegra::Shader::IAdd3Height; |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| using Tegra::Shader::Pred; |  | ||||||
| using Tegra::Shader::Register; |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { |  | ||||||
|     const Instruction instr = {program_code[pc]}; |  | ||||||
|     const auto opcode = OpCode::Decode(instr); |  | ||||||
| 
 |  | ||||||
|     Node op_a = GetRegister(instr.gpr8); |  | ||||||
|     Node op_b = [&]() { |  | ||||||
|         if (instr.is_b_imm) { |  | ||||||
|             return Immediate(instr.alu.GetSignedImm20_20()); |  | ||||||
|         } else if (instr.is_b_gpr) { |  | ||||||
|             return GetRegister(instr.gpr20); |  | ||||||
|         } else { |  | ||||||
|             return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); |  | ||||||
|         } |  | ||||||
|     }(); |  | ||||||
| 
 |  | ||||||
|     switch (opcode->get().GetId()) { |  | ||||||
|     case OpCode::Id::IADD_C: |  | ||||||
|     case OpCode::Id::IADD_R: |  | ||||||
|     case OpCode::Id::IADD_IMM: { |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD.SAT"); |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.iadd.x && instr.generates_cc, "IADD.X Rd.CC"); |  | ||||||
| 
 |  | ||||||
|         op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); |  | ||||||
|         op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); |  | ||||||
| 
 |  | ||||||
|         Node value = Operation(OperationCode::UAdd, op_a, op_b); |  | ||||||
| 
 |  | ||||||
|         if (instr.iadd.x) { |  | ||||||
|             Node carry = GetInternalFlag(InternalFlag::Carry); |  | ||||||
|             Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0)); |  | ||||||
|             value = Operation(OperationCode::UAdd, std::move(value), std::move(x)); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         if (instr.generates_cc) { |  | ||||||
|             const Node i0 = Immediate(0); |  | ||||||
| 
 |  | ||||||
|             Node zero = Operation(OperationCode::LogicalIEqual, value, i0); |  | ||||||
|             Node sign = Operation(OperationCode::LogicalILessThan, value, i0); |  | ||||||
|             Node carry = Operation(OperationCode::LogicalAddCarry, op_a, op_b); |  | ||||||
| 
 |  | ||||||
|             Node pos_a = Operation(OperationCode::LogicalIGreaterThan, op_a, i0); |  | ||||||
|             Node pos_b = Operation(OperationCode::LogicalIGreaterThan, op_b, i0); |  | ||||||
|             Node pos = Operation(OperationCode::LogicalAnd, std::move(pos_a), std::move(pos_b)); |  | ||||||
|             Node overflow = Operation(OperationCode::LogicalAnd, pos, sign); |  | ||||||
| 
 |  | ||||||
|             SetInternalFlag(bb, InternalFlag::Zero, std::move(zero)); |  | ||||||
|             SetInternalFlag(bb, InternalFlag::Sign, std::move(sign)); |  | ||||||
|             SetInternalFlag(bb, InternalFlag::Carry, std::move(carry)); |  | ||||||
|             SetInternalFlag(bb, InternalFlag::Overflow, std::move(overflow)); |  | ||||||
|         } |  | ||||||
|         SetRegister(bb, instr.gpr0, std::move(value)); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::IADD3_C: |  | ||||||
|     case OpCode::Id::IADD3_R: |  | ||||||
|     case OpCode::Id::IADD3_IMM: { |  | ||||||
|         Node op_c = GetRegister(instr.gpr39); |  | ||||||
| 
 |  | ||||||
|         const auto ApplyHeight = [&](IAdd3Height height, Node value) { |  | ||||||
|             switch (height) { |  | ||||||
|             case IAdd3Height::None: |  | ||||||
|                 return value; |  | ||||||
|             case IAdd3Height::LowerHalfWord: |  | ||||||
|                 return BitfieldExtract(value, 0, 16); |  | ||||||
|             case IAdd3Height::UpperHalfWord: |  | ||||||
|                 return BitfieldExtract(value, 16, 16); |  | ||||||
|             default: |  | ||||||
|                 UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", height); |  | ||||||
|                 return Immediate(0); |  | ||||||
|             } |  | ||||||
|         }; |  | ||||||
| 
 |  | ||||||
|         if (opcode->get().GetId() == OpCode::Id::IADD3_R) { |  | ||||||
|             op_a = ApplyHeight(instr.iadd3.height_a, op_a); |  | ||||||
|             op_b = ApplyHeight(instr.iadd3.height_b, op_b); |  | ||||||
|             op_c = ApplyHeight(instr.iadd3.height_c, op_c); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true); |  | ||||||
|         op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true); |  | ||||||
|         op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true); |  | ||||||
| 
 |  | ||||||
|         const Node value = [&] { |  | ||||||
|             Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b); |  | ||||||
|             if (opcode->get().GetId() != OpCode::Id::IADD3_R) { |  | ||||||
|                 return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c); |  | ||||||
|             } |  | ||||||
|             const Node shifted = [&] { |  | ||||||
|                 switch (instr.iadd3.mode) { |  | ||||||
|                 case Tegra::Shader::IAdd3Mode::RightShift: |  | ||||||
|                     // TODO(tech4me): According to
 |  | ||||||
|                     // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3
 |  | ||||||
|                     // The addition between op_a and op_b should be done in uint33, more
 |  | ||||||
|                     // investigation required
 |  | ||||||
|                     return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab, |  | ||||||
|                                      Immediate(16)); |  | ||||||
|                 case Tegra::Shader::IAdd3Mode::LeftShift: |  | ||||||
|                     return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab, |  | ||||||
|                                      Immediate(16)); |  | ||||||
|                 default: |  | ||||||
|                     return add_ab; |  | ||||||
|                 } |  | ||||||
|             }(); |  | ||||||
|             return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c); |  | ||||||
|         }(); |  | ||||||
| 
 |  | ||||||
|         SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |  | ||||||
|         SetRegister(bb, instr.gpr0, value); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::ISCADD_C: |  | ||||||
|     case OpCode::Id::ISCADD_R: |  | ||||||
|     case OpCode::Id::ISCADD_IMM: { |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.generates_cc, |  | ||||||
|                              "Condition codes generation in ISCADD is not implemented"); |  | ||||||
| 
 |  | ||||||
|         op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); |  | ||||||
|         op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); |  | ||||||
| 
 |  | ||||||
|         const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount)); |  | ||||||
|         const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift); |  | ||||||
|         const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b); |  | ||||||
| 
 |  | ||||||
|         SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |  | ||||||
|         SetRegister(bb, instr.gpr0, value); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::POPC_C: |  | ||||||
|     case OpCode::Id::POPC_R: |  | ||||||
|     case OpCode::Id::POPC_IMM: { |  | ||||||
|         if (instr.popc.invert) { |  | ||||||
|             op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); |  | ||||||
|         } |  | ||||||
|         const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b); |  | ||||||
|         SetRegister(bb, instr.gpr0, value); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::FLO_R: |  | ||||||
|     case OpCode::Id::FLO_C: |  | ||||||
|     case OpCode::Id::FLO_IMM: { |  | ||||||
|         Node value; |  | ||||||
|         if (instr.flo.invert) { |  | ||||||
|             op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b)); |  | ||||||
|         } |  | ||||||
|         if (instr.flo.is_signed) { |  | ||||||
|             value = Operation(OperationCode::IBitMSB, NO_PRECISE, std::move(op_b)); |  | ||||||
|         } else { |  | ||||||
|             value = Operation(OperationCode::UBitMSB, NO_PRECISE, std::move(op_b)); |  | ||||||
|         } |  | ||||||
|         if (instr.flo.sh) { |  | ||||||
|             value = |  | ||||||
|                 Operation(OperationCode::UBitwiseXor, NO_PRECISE, std::move(value), Immediate(31)); |  | ||||||
|         } |  | ||||||
|         SetRegister(bb, instr.gpr0, std::move(value)); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::SEL_C: |  | ||||||
|     case OpCode::Id::SEL_R: |  | ||||||
|     case OpCode::Id::SEL_IMM: { |  | ||||||
|         const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0); |  | ||||||
|         const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b); |  | ||||||
|         SetRegister(bb, instr.gpr0, value); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::ICMP_CR: |  | ||||||
|     case OpCode::Id::ICMP_R: |  | ||||||
|     case OpCode::Id::ICMP_RC: |  | ||||||
|     case OpCode::Id::ICMP_IMM: { |  | ||||||
|         const Node zero = Immediate(0); |  | ||||||
| 
 |  | ||||||
|         const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> { |  | ||||||
|             switch (opcode->get().GetId()) { |  | ||||||
|             case OpCode::Id::ICMP_CR: |  | ||||||
|                 return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), |  | ||||||
|                         GetRegister(instr.gpr39)}; |  | ||||||
|             case OpCode::Id::ICMP_R: |  | ||||||
|                 return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; |  | ||||||
|             case OpCode::Id::ICMP_RC: |  | ||||||
|                 return {GetRegister(instr.gpr39), |  | ||||||
|                         GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; |  | ||||||
|             case OpCode::Id::ICMP_IMM: |  | ||||||
|                 return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; |  | ||||||
|             default: |  | ||||||
|                 UNREACHABLE(); |  | ||||||
|                 return {zero, zero}; |  | ||||||
|             } |  | ||||||
|         }(); |  | ||||||
|         const Node op_lhs = GetRegister(instr.gpr8); |  | ||||||
|         const Node comparison = |  | ||||||
|             GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero); |  | ||||||
|         SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs)); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::LOP_C: |  | ||||||
|     case OpCode::Id::LOP_R: |  | ||||||
|     case OpCode::Id::LOP_IMM: { |  | ||||||
|         if (instr.alu.lop.invert_a) |  | ||||||
|             op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); |  | ||||||
|         if (instr.alu.lop.invert_b) |  | ||||||
|             op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); |  | ||||||
| 
 |  | ||||||
|         WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b, |  | ||||||
|                             instr.alu.lop.pred_result_mode, instr.alu.lop.pred48, |  | ||||||
|                             instr.generates_cc); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::LOP3_C: |  | ||||||
|     case OpCode::Id::LOP3_R: |  | ||||||
|     case OpCode::Id::LOP3_IMM: { |  | ||||||
|         const Node op_c = GetRegister(instr.gpr39); |  | ||||||
|         const Node lut = [&]() { |  | ||||||
|             if (opcode->get().GetId() == OpCode::Id::LOP3_R) { |  | ||||||
|                 return Immediate(instr.alu.lop3.GetImmLut28()); |  | ||||||
|             } else { |  | ||||||
|                 return Immediate(instr.alu.lop3.GetImmLut48()); |  | ||||||
|             } |  | ||||||
|         }(); |  | ||||||
| 
 |  | ||||||
|         WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::IMNMX_C: |  | ||||||
|     case OpCode::Id::IMNMX_R: |  | ||||||
|     case OpCode::Id::IMNMX_IMM: { |  | ||||||
|         UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None); |  | ||||||
| 
 |  | ||||||
|         const bool is_signed = instr.imnmx.is_signed; |  | ||||||
| 
 |  | ||||||
|         const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0); |  | ||||||
|         const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b); |  | ||||||
|         const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b); |  | ||||||
|         const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); |  | ||||||
| 
 |  | ||||||
|         SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |  | ||||||
|         SetRegister(bb, instr.gpr0, value); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::LEA_R2: |  | ||||||
|     case OpCode::Id::LEA_R1: |  | ||||||
|     case OpCode::Id::LEA_IMM: |  | ||||||
|     case OpCode::Id::LEA_RZ: |  | ||||||
|     case OpCode::Id::LEA_HI: { |  | ||||||
|         auto [op_a_, op_b_, op_c_] = [&]() -> std::tuple<Node, Node, Node> { |  | ||||||
|             switch (opcode->get().GetId()) { |  | ||||||
|             case OpCode::Id::LEA_R2: { |  | ||||||
|                 return {GetRegister(instr.gpr20), GetRegister(instr.gpr39), |  | ||||||
|                         Immediate(static_cast<u32>(instr.lea.r2.entry_a))}; |  | ||||||
|             } |  | ||||||
|             case OpCode::Id::LEA_R1: { |  | ||||||
|                 const bool neg = instr.lea.r1.neg != 0; |  | ||||||
|                 return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), |  | ||||||
|                         GetRegister(instr.gpr20), |  | ||||||
|                         Immediate(static_cast<u32>(instr.lea.r1.entry_a))}; |  | ||||||
|             } |  | ||||||
|             case OpCode::Id::LEA_IMM: { |  | ||||||
|                 const bool neg = instr.lea.imm.neg != 0; |  | ||||||
|                 return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), |  | ||||||
|                         Immediate(static_cast<u32>(instr.lea.imm.entry_a)), |  | ||||||
|                         Immediate(static_cast<u32>(instr.lea.imm.entry_b))}; |  | ||||||
|             } |  | ||||||
|             case OpCode::Id::LEA_RZ: { |  | ||||||
|                 const bool neg = instr.lea.rz.neg != 0; |  | ||||||
|                 return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset), |  | ||||||
|                         GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), |  | ||||||
|                         Immediate(static_cast<u32>(instr.lea.rz.entry_a))}; |  | ||||||
|             } |  | ||||||
|             case OpCode::Id::LEA_HI: |  | ||||||
|             default: |  | ||||||
|                 UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName()); |  | ||||||
| 
 |  | ||||||
|                 return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetRegister(instr.gpr8), |  | ||||||
|                         Immediate(static_cast<u32>(instr.lea.imm.entry_b))}; |  | ||||||
|             } |  | ||||||
|         }(); |  | ||||||
| 
 |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex), |  | ||||||
|                              "Unhandled LEA Predicate"); |  | ||||||
| 
 |  | ||||||
|         Node value = |  | ||||||
|             Operation(OperationCode::ILogicalShiftLeft, std::move(op_a_), std::move(op_c_)); |  | ||||||
|         value = Operation(OperationCode::IAdd, std::move(op_b_), std::move(value)); |  | ||||||
|         SetRegister(bb, instr.gpr0, std::move(value)); |  | ||||||
| 
 |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     default: |  | ||||||
|         UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName()); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return pc; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, |  | ||||||
|                                     Node imm_lut, bool sets_cc) { |  | ||||||
|     const Node lop3_fast = [&](const Node na, const Node nb, const Node nc, const Node ttbl) { |  | ||||||
|         Node value = Immediate(0); |  | ||||||
|         const ImmediateNode imm = std::get<ImmediateNode>(*ttbl); |  | ||||||
|         if (imm.GetValue() & 0x01) { |  | ||||||
|             const Node a = Operation(OperationCode::IBitwiseNot, na); |  | ||||||
|             const Node b = Operation(OperationCode::IBitwiseNot, nb); |  | ||||||
|             const Node c = Operation(OperationCode::IBitwiseNot, nc); |  | ||||||
|             Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); |  | ||||||
|             r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); |  | ||||||
|             value = Operation(OperationCode::IBitwiseOr, value, r); |  | ||||||
|         } |  | ||||||
|         if (imm.GetValue() & 0x02) { |  | ||||||
|             const Node a = Operation(OperationCode::IBitwiseNot, na); |  | ||||||
|             const Node b = Operation(OperationCode::IBitwiseNot, nb); |  | ||||||
|             Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); |  | ||||||
|             r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); |  | ||||||
|             value = Operation(OperationCode::IBitwiseOr, value, r); |  | ||||||
|         } |  | ||||||
|         if (imm.GetValue() & 0x04) { |  | ||||||
|             const Node a = Operation(OperationCode::IBitwiseNot, na); |  | ||||||
|             const Node c = Operation(OperationCode::IBitwiseNot, nc); |  | ||||||
|             Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); |  | ||||||
|             r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); |  | ||||||
|             value = Operation(OperationCode::IBitwiseOr, value, r); |  | ||||||
|         } |  | ||||||
|         if (imm.GetValue() & 0x08) { |  | ||||||
|             const Node a = Operation(OperationCode::IBitwiseNot, na); |  | ||||||
|             Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); |  | ||||||
|             r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); |  | ||||||
|             value = Operation(OperationCode::IBitwiseOr, value, r); |  | ||||||
|         } |  | ||||||
|         if (imm.GetValue() & 0x10) { |  | ||||||
|             const Node b = Operation(OperationCode::IBitwiseNot, nb); |  | ||||||
|             const Node c = Operation(OperationCode::IBitwiseNot, nc); |  | ||||||
|             Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); |  | ||||||
|             r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); |  | ||||||
|             value = Operation(OperationCode::IBitwiseOr, value, r); |  | ||||||
|         } |  | ||||||
|         if (imm.GetValue() & 0x20) { |  | ||||||
|             const Node b = Operation(OperationCode::IBitwiseNot, nb); |  | ||||||
|             Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); |  | ||||||
|             r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); |  | ||||||
|             value = Operation(OperationCode::IBitwiseOr, value, r); |  | ||||||
|         } |  | ||||||
|         if (imm.GetValue() & 0x40) { |  | ||||||
|             const Node c = Operation(OperationCode::IBitwiseNot, nc); |  | ||||||
|             Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); |  | ||||||
|             r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); |  | ||||||
|             value = Operation(OperationCode::IBitwiseOr, value, r); |  | ||||||
|         } |  | ||||||
|         if (imm.GetValue() & 0x80) { |  | ||||||
|             Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); |  | ||||||
|             r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); |  | ||||||
|             value = Operation(OperationCode::IBitwiseOr, value, r); |  | ||||||
|         } |  | ||||||
|         return value; |  | ||||||
|     }(op_a, op_b, op_c, imm_lut); |  | ||||||
| 
 |  | ||||||
|     SetInternalFlagsFromInteger(bb, lop3_fast, sets_cc); |  | ||||||
|     SetRegister(bb, dest, lop3_fast); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,99 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::LogicOperation; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| using Tegra::Shader::Pred; |  | ||||||
| using Tegra::Shader::PredicateResultMode; |  | ||||||
| using Tegra::Shader::Register; |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) { |  | ||||||
|     const Instruction instr = {program_code[pc]}; |  | ||||||
|     const auto opcode = OpCode::Decode(instr); |  | ||||||
| 
 |  | ||||||
|     Node op_a = GetRegister(instr.gpr8); |  | ||||||
|     Node op_b = Immediate(static_cast<s32>(instr.alu.imm20_32)); |  | ||||||
| 
 |  | ||||||
|     switch (opcode->get().GetId()) { |  | ||||||
|     case OpCode::Id::IADD32I: { |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented"); |  | ||||||
| 
 |  | ||||||
|         op_a = GetOperandAbsNegInteger(std::move(op_a), false, instr.iadd32i.negate_a != 0, true); |  | ||||||
| 
 |  | ||||||
|         Node value = Operation(OperationCode::IAdd, PRECISE, std::move(op_a), std::move(op_b)); |  | ||||||
| 
 |  | ||||||
|         SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc != 0); |  | ||||||
|         SetRegister(bb, instr.gpr0, std::move(value)); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::LOP32I: { |  | ||||||
|         if (instr.alu.lop32i.invert_a) { |  | ||||||
|             op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_a)); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         if (instr.alu.lop32i.invert_b) { |  | ||||||
|             op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b)); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, std::move(op_a), |  | ||||||
|                             std::move(op_b), PredicateResultMode::None, Pred::UnusedIndex, |  | ||||||
|                             instr.op_32.generates_cc != 0); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     default: |  | ||||||
|         UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}", |  | ||||||
|                           opcode->get().GetName()); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return pc; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a, |  | ||||||
|                                    Node op_b, PredicateResultMode predicate_mode, Pred predicate, |  | ||||||
|                                    bool sets_cc) { |  | ||||||
|     Node result = [&] { |  | ||||||
|         switch (logic_op) { |  | ||||||
|         case LogicOperation::And: |  | ||||||
|             return Operation(OperationCode::IBitwiseAnd, PRECISE, std::move(op_a), std::move(op_b)); |  | ||||||
|         case LogicOperation::Or: |  | ||||||
|             return Operation(OperationCode::IBitwiseOr, PRECISE, std::move(op_a), std::move(op_b)); |  | ||||||
|         case LogicOperation::Xor: |  | ||||||
|             return Operation(OperationCode::IBitwiseXor, PRECISE, std::move(op_a), std::move(op_b)); |  | ||||||
|         case LogicOperation::PassB: |  | ||||||
|             return op_b; |  | ||||||
|         default: |  | ||||||
|             UNIMPLEMENTED_MSG("Unimplemented logic operation={}", logic_op); |  | ||||||
|             return Immediate(0); |  | ||||||
|         } |  | ||||||
|     }(); |  | ||||||
| 
 |  | ||||||
|     SetInternalFlagsFromInteger(bb, result, sets_cc); |  | ||||||
|     SetRegister(bb, dest, result); |  | ||||||
| 
 |  | ||||||
|     // Write the predicate value depending on the predicate mode.
 |  | ||||||
|     switch (predicate_mode) { |  | ||||||
|     case PredicateResultMode::None: |  | ||||||
|         // Do nothing.
 |  | ||||||
|         return; |  | ||||||
|     case PredicateResultMode::NotZero: { |  | ||||||
|         // Set the predicate to true if the result is not zero.
 |  | ||||||
|         Node compare = Operation(OperationCode::LogicalINotEqual, std::move(result), Immediate(0)); |  | ||||||
|         SetPredicate(bb, static_cast<u64>(predicate), std::move(compare)); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     default: |  | ||||||
|         UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", predicate_mode); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,77 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) { |  | ||||||
|     const Instruction instr = {program_code[pc]}; |  | ||||||
|     const auto opcode = OpCode::Decode(instr); |  | ||||||
| 
 |  | ||||||
|     Node op_a = GetRegister(instr.gpr8); |  | ||||||
|     Node op_b = [&] { |  | ||||||
|         switch (opcode->get().GetId()) { |  | ||||||
|         case OpCode::Id::BFE_R: |  | ||||||
|             return GetRegister(instr.gpr20); |  | ||||||
|         case OpCode::Id::BFE_C: |  | ||||||
|             return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); |  | ||||||
|         case OpCode::Id::BFE_IMM: |  | ||||||
|             return Immediate(instr.alu.GetSignedImm20_20()); |  | ||||||
|         default: |  | ||||||
|             UNREACHABLE(); |  | ||||||
|             return Immediate(0); |  | ||||||
|         } |  | ||||||
|     }(); |  | ||||||
| 
 |  | ||||||
|     UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented"); |  | ||||||
| 
 |  | ||||||
|     const bool is_signed = instr.bfe.is_signed; |  | ||||||
| 
 |  | ||||||
|     // using reverse parallel method in
 |  | ||||||
|     // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
 |  | ||||||
|     // note for later if possible to implement faster method.
 |  | ||||||
|     if (instr.bfe.brev) { |  | ||||||
|         const auto swap = [&](u32 s, u32 mask) { |  | ||||||
|             Node v1 = |  | ||||||
|                 SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s)); |  | ||||||
|             if (mask != 0) { |  | ||||||
|                 v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1), |  | ||||||
|                                      Immediate(mask)); |  | ||||||
|             } |  | ||||||
|             Node v2 = op_a; |  | ||||||
|             if (mask != 0) { |  | ||||||
|                 v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2), |  | ||||||
|                                      Immediate(mask)); |  | ||||||
|             } |  | ||||||
|             v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2), |  | ||||||
|                                  Immediate(s)); |  | ||||||
|             return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1), |  | ||||||
|                                    std::move(v2)); |  | ||||||
|         }; |  | ||||||
|         op_a = swap(1, 0x55555555U); |  | ||||||
|         op_a = swap(2, 0x33333333U); |  | ||||||
|         op_a = swap(4, 0x0F0F0F0FU); |  | ||||||
|         op_a = swap(8, 0x00FF00FFU); |  | ||||||
|         op_a = swap(16, 0); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, |  | ||||||
|                                         Immediate(0), Immediate(8)); |  | ||||||
|     const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, |  | ||||||
|                                       Immediate(8), Immediate(8)); |  | ||||||
|     auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits); |  | ||||||
|     SetRegister(bb, instr.gpr0, std::move(result)); |  | ||||||
| 
 |  | ||||||
|     return pc; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,45 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) { |  | ||||||
|     const Instruction instr = {program_code[pc]}; |  | ||||||
|     const auto opcode = OpCode::Decode(instr); |  | ||||||
| 
 |  | ||||||
|     const auto [packed_shift, base] = [&]() -> std::pair<Node, Node> { |  | ||||||
|         switch (opcode->get().GetId()) { |  | ||||||
|         case OpCode::Id::BFI_RC: |  | ||||||
|             return {GetRegister(instr.gpr39), |  | ||||||
|                     GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; |  | ||||||
|         case OpCode::Id::BFI_IMM_R: |  | ||||||
|             return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; |  | ||||||
|         default: |  | ||||||
|             UNREACHABLE(); |  | ||||||
|             return {Immediate(0), Immediate(0)}; |  | ||||||
|         } |  | ||||||
|     }(); |  | ||||||
|     const Node insert = GetRegister(instr.gpr8); |  | ||||||
|     const Node offset = BitfieldExtract(packed_shift, 0, 8); |  | ||||||
|     const Node bits = BitfieldExtract(packed_shift, 8, 8); |  | ||||||
| 
 |  | ||||||
|     const Node value = |  | ||||||
|         Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits); |  | ||||||
| 
 |  | ||||||
|     SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |  | ||||||
|     SetRegister(bb, instr.gpr0, value); |  | ||||||
| 
 |  | ||||||
|     return pc; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,321 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include <limits> |  | ||||||
| #include <optional> |  | ||||||
| #include <utility> |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| using Tegra::Shader::Register; |  | ||||||
| 
 |  | ||||||
| namespace { |  | ||||||
| 
 |  | ||||||
| constexpr OperationCode GetFloatSelector(u64 selector) { |  | ||||||
|     return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| constexpr u32 SizeInBits(Register::Size size) { |  | ||||||
|     switch (size) { |  | ||||||
|     case Register::Size::Byte: |  | ||||||
|         return 8; |  | ||||||
|     case Register::Size::Short: |  | ||||||
|         return 16; |  | ||||||
|     case Register::Size::Word: |  | ||||||
|         return 32; |  | ||||||
|     case Register::Size::Long: |  | ||||||
|         return 64; |  | ||||||
|     } |  | ||||||
|     return 0; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| constexpr std::optional<std::pair<s32, s32>> IntegerSaturateBounds(Register::Size src_size, |  | ||||||
|                                                                    Register::Size dst_size, |  | ||||||
|                                                                    bool src_signed, |  | ||||||
|                                                                    bool dst_signed) { |  | ||||||
|     const u32 dst_bits = SizeInBits(dst_size); |  | ||||||
|     if (src_size == Register::Size::Word && dst_size == Register::Size::Word) { |  | ||||||
|         if (src_signed == dst_signed) { |  | ||||||
|             return std::nullopt; |  | ||||||
|         } |  | ||||||
|         return std::make_pair(0, std::numeric_limits<s32>::max()); |  | ||||||
|     } |  | ||||||
|     if (dst_signed) { |  | ||||||
|         // Signed destination, clamp to [-128, 127] for instance
 |  | ||||||
|         return std::make_pair(-(1 << (dst_bits - 1)), (1 << (dst_bits - 1)) - 1); |  | ||||||
|     } else { |  | ||||||
|         // Unsigned destination
 |  | ||||||
|         if (dst_bits == 32) { |  | ||||||
|             // Avoid shifting by 32, that is undefined behavior
 |  | ||||||
|             return std::make_pair(0, s32(std::numeric_limits<u32>::max())); |  | ||||||
|         } |  | ||||||
|         return std::make_pair(0, (1 << dst_bits) - 1); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // Anonymous namespace
 |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { |  | ||||||
|     const Instruction instr = {program_code[pc]}; |  | ||||||
|     const auto opcode = OpCode::Decode(instr); |  | ||||||
| 
 |  | ||||||
|     switch (opcode->get().GetId()) { |  | ||||||
|     case OpCode::Id::I2I_R: |  | ||||||
|     case OpCode::Id::I2I_C: |  | ||||||
|     case OpCode::Id::I2I_IMM: { |  | ||||||
|         const bool src_signed = instr.conversion.is_input_signed; |  | ||||||
|         const bool dst_signed = instr.conversion.is_output_signed; |  | ||||||
|         const Register::Size src_size = instr.conversion.src_size; |  | ||||||
|         const Register::Size dst_size = instr.conversion.dst_size; |  | ||||||
|         const u32 selector = static_cast<u32>(instr.conversion.int_src.selector); |  | ||||||
| 
 |  | ||||||
|         Node value = [this, instr, opcode] { |  | ||||||
|             switch (opcode->get().GetId()) { |  | ||||||
|             case OpCode::Id::I2I_R: |  | ||||||
|                 return GetRegister(instr.gpr20); |  | ||||||
|             case OpCode::Id::I2I_C: |  | ||||||
|                 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); |  | ||||||
|             case OpCode::Id::I2I_IMM: |  | ||||||
|                 return Immediate(instr.alu.GetSignedImm20_20()); |  | ||||||
|             default: |  | ||||||
|                 UNREACHABLE(); |  | ||||||
|                 return Immediate(0); |  | ||||||
|             } |  | ||||||
|         }(); |  | ||||||
| 
 |  | ||||||
|         // Ensure the source selector is valid
 |  | ||||||
|         switch (instr.conversion.src_size) { |  | ||||||
|         case Register::Size::Byte: |  | ||||||
|             break; |  | ||||||
|         case Register::Size::Short: |  | ||||||
|             ASSERT(selector == 0 || selector == 2); |  | ||||||
|             break; |  | ||||||
|         default: |  | ||||||
|             ASSERT(selector == 0); |  | ||||||
|             break; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         if (src_size != Register::Size::Word || selector != 0) { |  | ||||||
|             value = SignedOperation(OperationCode::IBitfieldExtract, src_signed, std::move(value), |  | ||||||
|                                     Immediate(selector * 8), Immediate(SizeInBits(src_size))); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         value = GetOperandAbsNegInteger(std::move(value), instr.conversion.abs_a, |  | ||||||
|                                         instr.conversion.negate_a, src_signed); |  | ||||||
| 
 |  | ||||||
|         if (instr.alu.saturate_d) { |  | ||||||
|             if (src_signed && !dst_signed) { |  | ||||||
|                 Node is_negative = Operation(OperationCode::LogicalUGreaterEqual, value, |  | ||||||
|                                              Immediate(1 << (SizeInBits(src_size) - 1))); |  | ||||||
|                 value = Operation(OperationCode::Select, std::move(is_negative), Immediate(0), |  | ||||||
|                                   std::move(value)); |  | ||||||
| 
 |  | ||||||
|                 // Simplify generated expressions, this can be removed without semantic impact
 |  | ||||||
|                 SetTemporary(bb, 0, std::move(value)); |  | ||||||
|                 value = GetTemporary(0); |  | ||||||
| 
 |  | ||||||
|                 if (dst_size != Register::Size::Word) { |  | ||||||
|                     const Node limit = Immediate((1 << SizeInBits(dst_size)) - 1); |  | ||||||
|                     Node is_large = |  | ||||||
|                         Operation(OperationCode::LogicalUGreaterThan, std::move(value), limit); |  | ||||||
|                     value = Operation(OperationCode::Select, std::move(is_large), limit, |  | ||||||
|                                       std::move(value)); |  | ||||||
|                 } |  | ||||||
|             } else if (const std::optional bounds = |  | ||||||
|                            IntegerSaturateBounds(src_size, dst_size, src_signed, dst_signed)) { |  | ||||||
|                 value = SignedOperation(OperationCode::IMax, src_signed, std::move(value), |  | ||||||
|                                         Immediate(bounds->first)); |  | ||||||
|                 value = SignedOperation(OperationCode::IMin, src_signed, std::move(value), |  | ||||||
|                                         Immediate(bounds->second)); |  | ||||||
|             } |  | ||||||
|         } else if (dst_size != Register::Size::Word) { |  | ||||||
|             // No saturation, we only have to mask the result
 |  | ||||||
|             Node mask = Immediate((1 << SizeInBits(dst_size)) - 1); |  | ||||||
|             value = Operation(OperationCode::UBitwiseAnd, std::move(value), std::move(mask)); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |  | ||||||
|         SetRegister(bb, instr.gpr0, std::move(value)); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::I2F_R: |  | ||||||
|     case OpCode::Id::I2F_C: |  | ||||||
|     case OpCode::Id::I2F_IMM: { |  | ||||||
|         UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.generates_cc, |  | ||||||
|                              "Condition codes generation in I2F is not implemented"); |  | ||||||
| 
 |  | ||||||
|         Node value = [&] { |  | ||||||
|             switch (opcode->get().GetId()) { |  | ||||||
|             case OpCode::Id::I2F_R: |  | ||||||
|                 return GetRegister(instr.gpr20); |  | ||||||
|             case OpCode::Id::I2F_C: |  | ||||||
|                 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); |  | ||||||
|             case OpCode::Id::I2F_IMM: |  | ||||||
|                 return Immediate(instr.alu.GetSignedImm20_20()); |  | ||||||
|             default: |  | ||||||
|                 UNREACHABLE(); |  | ||||||
|                 return Immediate(0); |  | ||||||
|             } |  | ||||||
|         }(); |  | ||||||
| 
 |  | ||||||
|         const bool input_signed = instr.conversion.is_input_signed; |  | ||||||
| 
 |  | ||||||
|         if (const u32 offset = static_cast<u32>(instr.conversion.int_src.selector); offset > 0) { |  | ||||||
|             ASSERT(instr.conversion.src_size == Register::Size::Byte || |  | ||||||
|                    instr.conversion.src_size == Register::Size::Short); |  | ||||||
|             if (instr.conversion.src_size == Register::Size::Short) { |  | ||||||
|                 ASSERT(offset == 0 || offset == 2); |  | ||||||
|             } |  | ||||||
|             value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed, |  | ||||||
|                                     std::move(value), Immediate(offset * 8)); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); |  | ||||||
|         value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed); |  | ||||||
|         value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value); |  | ||||||
|         value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); |  | ||||||
| 
 |  | ||||||
|         SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |  | ||||||
| 
 |  | ||||||
|         if (instr.conversion.dst_size == Register::Size::Short) { |  | ||||||
|             value = Operation(OperationCode::HCastFloat, PRECISE, value); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         SetRegister(bb, instr.gpr0, value); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::F2F_R: |  | ||||||
|     case OpCode::Id::F2F_C: |  | ||||||
|     case OpCode::Id::F2F_IMM: { |  | ||||||
|         UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); |  | ||||||
|         UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.generates_cc, |  | ||||||
|                              "Condition codes generation in F2F is not implemented"); |  | ||||||
| 
 |  | ||||||
|         Node value = [&]() { |  | ||||||
|             switch (opcode->get().GetId()) { |  | ||||||
|             case OpCode::Id::F2F_R: |  | ||||||
|                 return GetRegister(instr.gpr20); |  | ||||||
|             case OpCode::Id::F2F_C: |  | ||||||
|                 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); |  | ||||||
|             case OpCode::Id::F2F_IMM: |  | ||||||
|                 return GetImmediate19(instr); |  | ||||||
|             default: |  | ||||||
|                 UNREACHABLE(); |  | ||||||
|                 return Immediate(0); |  | ||||||
|             } |  | ||||||
|         }(); |  | ||||||
| 
 |  | ||||||
|         if (instr.conversion.src_size == Register::Size::Short) { |  | ||||||
|             value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, |  | ||||||
|                               std::move(value)); |  | ||||||
|         } else { |  | ||||||
|             ASSERT(instr.conversion.float_src.selector == 0); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); |  | ||||||
| 
 |  | ||||||
|         value = [&] { |  | ||||||
|             if (instr.conversion.src_size != instr.conversion.dst_size) { |  | ||||||
|                 // Rounding operations only matter when the source and destination conversion size
 |  | ||||||
|                 // is the same.
 |  | ||||||
|                 return value; |  | ||||||
|             } |  | ||||||
|             switch (instr.conversion.f2f.GetRoundingMode()) { |  | ||||||
|             case Tegra::Shader::F2fRoundingOp::None: |  | ||||||
|                 return value; |  | ||||||
|             case Tegra::Shader::F2fRoundingOp::Round: |  | ||||||
|                 return Operation(OperationCode::FRoundEven, value); |  | ||||||
|             case Tegra::Shader::F2fRoundingOp::Floor: |  | ||||||
|                 return Operation(OperationCode::FFloor, value); |  | ||||||
|             case Tegra::Shader::F2fRoundingOp::Ceil: |  | ||||||
|                 return Operation(OperationCode::FCeil, value); |  | ||||||
|             case Tegra::Shader::F2fRoundingOp::Trunc: |  | ||||||
|                 return Operation(OperationCode::FTrunc, value); |  | ||||||
|             default: |  | ||||||
|                 UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", |  | ||||||
|                                   instr.conversion.f2f.rounding.Value()); |  | ||||||
|                 return value; |  | ||||||
|             } |  | ||||||
|         }(); |  | ||||||
|         value = GetSaturatedFloat(value, instr.alu.saturate_d); |  | ||||||
| 
 |  | ||||||
|         SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |  | ||||||
| 
 |  | ||||||
|         if (instr.conversion.dst_size == Register::Size::Short) { |  | ||||||
|             value = Operation(OperationCode::HCastFloat, PRECISE, value); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         SetRegister(bb, instr.gpr0, value); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::F2I_R: |  | ||||||
|     case OpCode::Id::F2I_C: |  | ||||||
|     case OpCode::Id::F2I_IMM: { |  | ||||||
|         UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.generates_cc, |  | ||||||
|                              "Condition codes generation in F2I is not implemented"); |  | ||||||
|         Node value = [&]() { |  | ||||||
|             switch (opcode->get().GetId()) { |  | ||||||
|             case OpCode::Id::F2I_R: |  | ||||||
|                 return GetRegister(instr.gpr20); |  | ||||||
|             case OpCode::Id::F2I_C: |  | ||||||
|                 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); |  | ||||||
|             case OpCode::Id::F2I_IMM: |  | ||||||
|                 return GetImmediate19(instr); |  | ||||||
|             default: |  | ||||||
|                 UNREACHABLE(); |  | ||||||
|                 return Immediate(0); |  | ||||||
|             } |  | ||||||
|         }(); |  | ||||||
| 
 |  | ||||||
|         if (instr.conversion.src_size == Register::Size::Short) { |  | ||||||
|             value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, |  | ||||||
|                               std::move(value)); |  | ||||||
|         } else { |  | ||||||
|             ASSERT(instr.conversion.float_src.selector == 0); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); |  | ||||||
| 
 |  | ||||||
|         value = [&]() { |  | ||||||
|             switch (instr.conversion.f2i.rounding) { |  | ||||||
|             case Tegra::Shader::F2iRoundingOp::RoundEven: |  | ||||||
|                 return Operation(OperationCode::FRoundEven, PRECISE, value); |  | ||||||
|             case Tegra::Shader::F2iRoundingOp::Floor: |  | ||||||
|                 return Operation(OperationCode::FFloor, PRECISE, value); |  | ||||||
|             case Tegra::Shader::F2iRoundingOp::Ceil: |  | ||||||
|                 return Operation(OperationCode::FCeil, PRECISE, value); |  | ||||||
|             case Tegra::Shader::F2iRoundingOp::Trunc: |  | ||||||
|                 return Operation(OperationCode::FTrunc, PRECISE, value); |  | ||||||
|             default: |  | ||||||
|                 UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}", |  | ||||||
|                                   instr.conversion.f2i.rounding.Value()); |  | ||||||
|                 return Immediate(0); |  | ||||||
|             } |  | ||||||
|         }(); |  | ||||||
|         const bool is_signed = instr.conversion.is_output_signed; |  | ||||||
|         value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value); |  | ||||||
|         value = ConvertIntegerSize(value, instr.conversion.dst_size, is_signed); |  | ||||||
| 
 |  | ||||||
|         SetRegister(bb, instr.gpr0, value); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     default: |  | ||||||
|         UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName()); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return pc; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,62 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { |  | ||||||
|     const Instruction instr = {program_code[pc]}; |  | ||||||
|     const auto opcode = OpCode::Decode(instr); |  | ||||||
| 
 |  | ||||||
|     UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); |  | ||||||
|     if (instr.ffma.tab5980_0 != 1) { |  | ||||||
|         LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value()); |  | ||||||
|     } |  | ||||||
|     if (instr.ffma.tab5980_1 != 0) { |  | ||||||
|         LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const Node op_a = GetRegister(instr.gpr8); |  | ||||||
| 
 |  | ||||||
|     auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> { |  | ||||||
|         switch (opcode->get().GetId()) { |  | ||||||
|         case OpCode::Id::FFMA_CR: { |  | ||||||
|             return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), |  | ||||||
|                     GetRegister(instr.gpr39)}; |  | ||||||
|         } |  | ||||||
|         case OpCode::Id::FFMA_RR: |  | ||||||
|             return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; |  | ||||||
|         case OpCode::Id::FFMA_RC: { |  | ||||||
|             return {GetRegister(instr.gpr39), |  | ||||||
|                     GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; |  | ||||||
|         } |  | ||||||
|         case OpCode::Id::FFMA_IMM: |  | ||||||
|             return {GetImmediate19(instr), GetRegister(instr.gpr39)}; |  | ||||||
|         default: |  | ||||||
|             UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName()); |  | ||||||
|             return {Immediate(0), Immediate(0)}; |  | ||||||
|         } |  | ||||||
|     }(); |  | ||||||
| 
 |  | ||||||
|     op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b); |  | ||||||
|     op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c); |  | ||||||
| 
 |  | ||||||
|     Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c); |  | ||||||
|     value = GetSaturatedFloat(value, instr.alu.saturate_d); |  | ||||||
| 
 |  | ||||||
|     SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |  | ||||||
|     SetRegister(bb, instr.gpr0, value); |  | ||||||
| 
 |  | ||||||
|     return pc; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,58 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) { |  | ||||||
|     const Instruction instr = {program_code[pc]}; |  | ||||||
| 
 |  | ||||||
|     const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0, |  | ||||||
|                                             instr.fset.neg_a != 0); |  | ||||||
| 
 |  | ||||||
|     Node op_b = [&]() { |  | ||||||
|         if (instr.is_b_imm) { |  | ||||||
|             return GetImmediate19(instr); |  | ||||||
|         } else if (instr.is_b_gpr) { |  | ||||||
|             return GetRegister(instr.gpr20); |  | ||||||
|         } else { |  | ||||||
|             return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); |  | ||||||
|         } |  | ||||||
|     }(); |  | ||||||
| 
 |  | ||||||
|     op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0); |  | ||||||
| 
 |  | ||||||
|     // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
 |  | ||||||
|     // condition is true, and to 0 otherwise.
 |  | ||||||
|     const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0); |  | ||||||
| 
 |  | ||||||
|     const OperationCode combiner = GetPredicateCombiner(instr.fset.op); |  | ||||||
|     const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b); |  | ||||||
| 
 |  | ||||||
|     const Node predicate = Operation(combiner, first_pred, second_pred); |  | ||||||
| 
 |  | ||||||
|     const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1); |  | ||||||
|     const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0); |  | ||||||
|     const Node value = |  | ||||||
|         Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); |  | ||||||
| 
 |  | ||||||
|     if (instr.fset.bf) { |  | ||||||
|         SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |  | ||||||
|     } else { |  | ||||||
|         SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |  | ||||||
|     } |  | ||||||
|     SetRegister(bb, instr.gpr0, value); |  | ||||||
| 
 |  | ||||||
|     return pc; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,57 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| using Tegra::Shader::Pred; |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) { |  | ||||||
|     const Instruction instr = {program_code[pc]}; |  | ||||||
| 
 |  | ||||||
|     Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0, |  | ||||||
|                                       instr.fsetp.neg_a != 0); |  | ||||||
|     Node op_b = [&]() { |  | ||||||
|         if (instr.is_b_imm) { |  | ||||||
|             return GetImmediate19(instr); |  | ||||||
|         } else if (instr.is_b_gpr) { |  | ||||||
|             return GetRegister(instr.gpr20); |  | ||||||
|         } else { |  | ||||||
|             return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); |  | ||||||
|         } |  | ||||||
|     }(); |  | ||||||
|     op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b); |  | ||||||
| 
 |  | ||||||
|     // We can't use the constant predicate as destination.
 |  | ||||||
|     ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); |  | ||||||
| 
 |  | ||||||
|     const Node predicate = |  | ||||||
|         GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b)); |  | ||||||
|     const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); |  | ||||||
| 
 |  | ||||||
|     const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op); |  | ||||||
|     const Node value = Operation(combiner, predicate, second_pred); |  | ||||||
| 
 |  | ||||||
|     // Set the primary predicate to the result of Predicate OP SecondPredicate
 |  | ||||||
|     SetPredicate(bb, instr.fsetp.pred3, value); |  | ||||||
| 
 |  | ||||||
|     if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { |  | ||||||
|         // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
 |  | ||||||
|         // if enabled
 |  | ||||||
|         const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); |  | ||||||
|         const Node second_value = Operation(combiner, negated_pred, second_pred); |  | ||||||
|         SetPredicate(bb, instr.fsetp.pred0, second_value); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return pc; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,115 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include <array> |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "common/logging/log.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using std::move; |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| using Tegra::Shader::PredCondition; |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { |  | ||||||
|     const Instruction instr = {program_code[pc]}; |  | ||||||
|     const auto opcode = OpCode::Decode(instr); |  | ||||||
| 
 |  | ||||||
|     PredCondition cond{}; |  | ||||||
|     bool bf = false; |  | ||||||
|     bool ftz = false; |  | ||||||
|     bool neg_a = false; |  | ||||||
|     bool abs_a = false; |  | ||||||
|     bool neg_b = false; |  | ||||||
|     bool abs_b = false; |  | ||||||
|     switch (opcode->get().GetId()) { |  | ||||||
|     case OpCode::Id::HSET2_C: |  | ||||||
|     case OpCode::Id::HSET2_IMM: |  | ||||||
|         cond = instr.hsetp2.cbuf_and_imm.cond; |  | ||||||
|         bf = instr.Bit(53); |  | ||||||
|         ftz = instr.Bit(54); |  | ||||||
|         neg_a = instr.Bit(43); |  | ||||||
|         abs_a = instr.Bit(44); |  | ||||||
|         neg_b = instr.Bit(56); |  | ||||||
|         abs_b = instr.Bit(54); |  | ||||||
|         break; |  | ||||||
|     case OpCode::Id::HSET2_R: |  | ||||||
|         cond = instr.hsetp2.reg.cond; |  | ||||||
|         bf = instr.Bit(49); |  | ||||||
|         ftz = instr.Bit(50); |  | ||||||
|         neg_a = instr.Bit(43); |  | ||||||
|         abs_a = instr.Bit(44); |  | ||||||
|         neg_b = instr.Bit(31); |  | ||||||
|         abs_b = instr.Bit(30); |  | ||||||
|         break; |  | ||||||
|     default: |  | ||||||
|         UNREACHABLE(); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     Node op_b = [this, instr, opcode] { |  | ||||||
|         switch (opcode->get().GetId()) { |  | ||||||
|         case OpCode::Id::HSET2_C: |  | ||||||
|             // Inform as unimplemented as this is not tested.
 |  | ||||||
|             UNIMPLEMENTED_MSG("HSET2_C is not implemented"); |  | ||||||
|             return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); |  | ||||||
|         case OpCode::Id::HSET2_R: |  | ||||||
|             return GetRegister(instr.gpr20); |  | ||||||
|         case OpCode::Id::HSET2_IMM: |  | ||||||
|             return UnpackHalfImmediate(instr, true); |  | ||||||
|         default: |  | ||||||
|             UNREACHABLE(); |  | ||||||
|             return Node{}; |  | ||||||
|         } |  | ||||||
|     }(); |  | ||||||
| 
 |  | ||||||
|     if (!ftz) { |  | ||||||
|         LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); |  | ||||||
|     op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a); |  | ||||||
| 
 |  | ||||||
|     switch (opcode->get().GetId()) { |  | ||||||
|     case OpCode::Id::HSET2_R: |  | ||||||
|         op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b); |  | ||||||
|         [[fallthrough]]; |  | ||||||
|     case OpCode::Id::HSET2_C: |  | ||||||
|         op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b); |  | ||||||
|         break; |  | ||||||
|     default: |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); |  | ||||||
| 
 |  | ||||||
|     Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b); |  | ||||||
| 
 |  | ||||||
|     const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); |  | ||||||
| 
 |  | ||||||
|     // HSET2 operates on each half float in the pack.
 |  | ||||||
|     std::array<Node, 2> values; |  | ||||||
|     for (u32 i = 0; i < 2; ++i) { |  | ||||||
|         const u32 raw_value = bf ? 0x3c00 : 0xffff; |  | ||||||
|         Node true_value = Immediate(raw_value << (i * 16)); |  | ||||||
|         Node false_value = Immediate(0); |  | ||||||
| 
 |  | ||||||
|         Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); |  | ||||||
|         Node predicate = Operation(combiner, comparison, second_pred); |  | ||||||
|         values[i] = |  | ||||||
|             Operation(OperationCode::Select, predicate, move(true_value), move(false_value)); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]); |  | ||||||
|     SetRegister(bb, instr.gpr0, move(value)); |  | ||||||
| 
 |  | ||||||
|     return pc; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,80 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "common/logging/log.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| using Tegra::Shader::Pred; |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { |  | ||||||
|     const Instruction instr = {program_code[pc]}; |  | ||||||
|     const auto opcode = OpCode::Decode(instr); |  | ||||||
| 
 |  | ||||||
|     if (instr.hsetp2.ftz != 0) { |  | ||||||
|         LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); |  | ||||||
|     op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); |  | ||||||
| 
 |  | ||||||
|     Tegra::Shader::PredCondition cond{}; |  | ||||||
|     bool h_and{}; |  | ||||||
|     Node op_b{}; |  | ||||||
|     switch (opcode->get().GetId()) { |  | ||||||
|     case OpCode::Id::HSETP2_C: |  | ||||||
|         cond = instr.hsetp2.cbuf_and_imm.cond; |  | ||||||
|         h_and = instr.hsetp2.cbuf_and_imm.h_and; |  | ||||||
|         op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), |  | ||||||
|                                     instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b); |  | ||||||
|         // F32 is hardcoded in hardware
 |  | ||||||
|         op_b = UnpackHalfFloat(std::move(op_b), Tegra::Shader::HalfType::F32); |  | ||||||
|         break; |  | ||||||
|     case OpCode::Id::HSETP2_IMM: |  | ||||||
|         cond = instr.hsetp2.cbuf_and_imm.cond; |  | ||||||
|         h_and = instr.hsetp2.cbuf_and_imm.h_and; |  | ||||||
|         op_b = UnpackHalfImmediate(instr, true); |  | ||||||
|         break; |  | ||||||
|     case OpCode::Id::HSETP2_R: |  | ||||||
|         cond = instr.hsetp2.reg.cond; |  | ||||||
|         h_and = instr.hsetp2.reg.h_and; |  | ||||||
|         op_b = |  | ||||||
|             GetOperandAbsNegHalf(UnpackHalfFloat(GetRegister(instr.gpr20), instr.hsetp2.reg.type_b), |  | ||||||
|                                  instr.hsetp2.reg.abs_b, instr.hsetp2.reg.negate_b); |  | ||||||
|         break; |  | ||||||
|     default: |  | ||||||
|         UNREACHABLE(); |  | ||||||
|         op_b = Immediate(0); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); |  | ||||||
|     const Node combined_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred); |  | ||||||
| 
 |  | ||||||
|     const auto Write = [&](u64 dest, Node src) { |  | ||||||
|         SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred)); |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b); |  | ||||||
|     const u64 first = instr.hsetp2.pred3; |  | ||||||
|     const u64 second = instr.hsetp2.pred0; |  | ||||||
|     if (h_and) { |  | ||||||
|         Node joined = Operation(OperationCode::LogicalAnd2, comparison); |  | ||||||
|         Write(first, joined); |  | ||||||
|         Write(second, Operation(OperationCode::LogicalNegate, std::move(joined))); |  | ||||||
|     } else { |  | ||||||
|         Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0U))); |  | ||||||
|         Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1U))); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return pc; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,73 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include <tuple> |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using Tegra::Shader::HalfPrecision; |  | ||||||
| using Tegra::Shader::HalfType; |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { |  | ||||||
|     const Instruction instr = {program_code[pc]}; |  | ||||||
|     const auto opcode = OpCode::Decode(instr); |  | ||||||
| 
 |  | ||||||
|     if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { |  | ||||||
|         DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None); |  | ||||||
|     } else { |  | ||||||
|         DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     constexpr auto identity = HalfType::H0_H1; |  | ||||||
|     bool neg_b{}, neg_c{}; |  | ||||||
|     auto [saturate, type_b, op_b, type_c, |  | ||||||
|           op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> { |  | ||||||
|         switch (opcode->get().GetId()) { |  | ||||||
|         case OpCode::Id::HFMA2_CR: |  | ||||||
|             neg_b = instr.hfma2.negate_b; |  | ||||||
|             neg_c = instr.hfma2.negate_c; |  | ||||||
|             return {instr.hfma2.saturate, HalfType::F32, |  | ||||||
|                     GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), |  | ||||||
|                     instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; |  | ||||||
|         case OpCode::Id::HFMA2_RC: |  | ||||||
|             neg_b = instr.hfma2.negate_b; |  | ||||||
|             neg_c = instr.hfma2.negate_c; |  | ||||||
|             return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), |  | ||||||
|                     HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; |  | ||||||
|         case OpCode::Id::HFMA2_RR: |  | ||||||
|             neg_b = instr.hfma2.rr.negate_b; |  | ||||||
|             neg_c = instr.hfma2.rr.negate_c; |  | ||||||
|             return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20), |  | ||||||
|                     instr.hfma2.rr.type_c, GetRegister(instr.gpr39)}; |  | ||||||
|         case OpCode::Id::HFMA2_IMM_R: |  | ||||||
|             neg_c = instr.hfma2.negate_c; |  | ||||||
|             return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true), |  | ||||||
|                     instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; |  | ||||||
|         default: |  | ||||||
|             return {false, identity, Immediate(0), identity, Immediate(0)}; |  | ||||||
|         } |  | ||||||
|     }(); |  | ||||||
| 
 |  | ||||||
|     const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a); |  | ||||||
|     op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b); |  | ||||||
|     op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c); |  | ||||||
| 
 |  | ||||||
|     Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c); |  | ||||||
|     value = GetSaturatedHalfFloat(value, saturate); |  | ||||||
|     value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); |  | ||||||
| 
 |  | ||||||
|     SetRegister(bb, instr.gpr0, value); |  | ||||||
| 
 |  | ||||||
|     return pc; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,536 +0,0 @@ | ||||||
| // Copyright 2019 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include <algorithm> |  | ||||||
| #include <vector> |  | ||||||
| #include <fmt/format.h> |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/bit_field.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "common/logging/log.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| #include "video_core/textures/texture.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| using Tegra::Shader::PredCondition; |  | ||||||
| using Tegra::Shader::StoreType; |  | ||||||
| using Tegra::Texture::ComponentType; |  | ||||||
| using Tegra::Texture::TextureFormat; |  | ||||||
| using Tegra::Texture::TICEntry; |  | ||||||
| 
 |  | ||||||
| namespace { |  | ||||||
| 
 |  | ||||||
| ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, |  | ||||||
|                                std::size_t component) { |  | ||||||
|     const TextureFormat format{descriptor.format}; |  | ||||||
|     switch (format) { |  | ||||||
|     case TextureFormat::R16G16B16A16: |  | ||||||
|     case TextureFormat::R32G32B32A32: |  | ||||||
|     case TextureFormat::R32G32B32: |  | ||||||
|     case TextureFormat::R32G32: |  | ||||||
|     case TextureFormat::R16G16: |  | ||||||
|     case TextureFormat::R32: |  | ||||||
|     case TextureFormat::R16: |  | ||||||
|     case TextureFormat::R8: |  | ||||||
|     case TextureFormat::R1: |  | ||||||
|         if (component == 0) { |  | ||||||
|             return descriptor.r_type; |  | ||||||
|         } |  | ||||||
|         if (component == 1) { |  | ||||||
|             return descriptor.g_type; |  | ||||||
|         } |  | ||||||
|         if (component == 2) { |  | ||||||
|             return descriptor.b_type; |  | ||||||
|         } |  | ||||||
|         if (component == 3) { |  | ||||||
|             return descriptor.a_type; |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     case TextureFormat::A8R8G8B8: |  | ||||||
|         if (component == 0) { |  | ||||||
|             return descriptor.a_type; |  | ||||||
|         } |  | ||||||
|         if (component == 1) { |  | ||||||
|             return descriptor.r_type; |  | ||||||
|         } |  | ||||||
|         if (component == 2) { |  | ||||||
|             return descriptor.g_type; |  | ||||||
|         } |  | ||||||
|         if (component == 3) { |  | ||||||
|             return descriptor.b_type; |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     case TextureFormat::A2B10G10R10: |  | ||||||
|     case TextureFormat::A4B4G4R4: |  | ||||||
|     case TextureFormat::A5B5G5R1: |  | ||||||
|     case TextureFormat::A1B5G5R5: |  | ||||||
|         if (component == 0) { |  | ||||||
|             return descriptor.a_type; |  | ||||||
|         } |  | ||||||
|         if (component == 1) { |  | ||||||
|             return descriptor.b_type; |  | ||||||
|         } |  | ||||||
|         if (component == 2) { |  | ||||||
|             return descriptor.g_type; |  | ||||||
|         } |  | ||||||
|         if (component == 3) { |  | ||||||
|             return descriptor.r_type; |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     case TextureFormat::R32_B24G8: |  | ||||||
|         if (component == 0) { |  | ||||||
|             return descriptor.r_type; |  | ||||||
|         } |  | ||||||
|         if (component == 1) { |  | ||||||
|             return descriptor.b_type; |  | ||||||
|         } |  | ||||||
|         if (component == 2) { |  | ||||||
|             return descriptor.g_type; |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     case TextureFormat::B5G6R5: |  | ||||||
|     case TextureFormat::B6G5R5: |  | ||||||
|     case TextureFormat::B10G11R11: |  | ||||||
|         if (component == 0) { |  | ||||||
|             return descriptor.b_type; |  | ||||||
|         } |  | ||||||
|         if (component == 1) { |  | ||||||
|             return descriptor.g_type; |  | ||||||
|         } |  | ||||||
|         if (component == 2) { |  | ||||||
|             return descriptor.r_type; |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     case TextureFormat::R24G8: |  | ||||||
|     case TextureFormat::R8G24: |  | ||||||
|     case TextureFormat::R8G8: |  | ||||||
|     case TextureFormat::G4R4: |  | ||||||
|         if (component == 0) { |  | ||||||
|             return descriptor.g_type; |  | ||||||
|         } |  | ||||||
|         if (component == 1) { |  | ||||||
|             return descriptor.r_type; |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     default: |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     UNIMPLEMENTED_MSG("Texture format not implemented={}", format); |  | ||||||
|     return ComponentType::FLOAT; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool IsComponentEnabled(std::size_t component_mask, std::size_t component) { |  | ||||||
|     constexpr u8 R = 0b0001; |  | ||||||
|     constexpr u8 G = 0b0010; |  | ||||||
|     constexpr u8 B = 0b0100; |  | ||||||
|     constexpr u8 A = 0b1000; |  | ||||||
|     constexpr std::array<u8, 16> mask = { |  | ||||||
|         0,   (R),     (G),     (R | G),     (B),     (R | B),     (G | B),     (R | G | B), |  | ||||||
|         (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; |  | ||||||
|     return std::bitset<4>{mask.at(component_mask)}.test(component); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| u32 GetComponentSize(TextureFormat format, std::size_t component) { |  | ||||||
|     switch (format) { |  | ||||||
|     case TextureFormat::R32G32B32A32: |  | ||||||
|         return 32; |  | ||||||
|     case TextureFormat::R16G16B16A16: |  | ||||||
|         return 16; |  | ||||||
|     case TextureFormat::R32G32B32: |  | ||||||
|         return component <= 2 ? 32 : 0; |  | ||||||
|     case TextureFormat::R32G32: |  | ||||||
|         return component <= 1 ? 32 : 0; |  | ||||||
|     case TextureFormat::R16G16: |  | ||||||
|         return component <= 1 ? 16 : 0; |  | ||||||
|     case TextureFormat::R32: |  | ||||||
|         return component == 0 ? 32 : 0; |  | ||||||
|     case TextureFormat::R16: |  | ||||||
|         return component == 0 ? 16 : 0; |  | ||||||
|     case TextureFormat::R8: |  | ||||||
|         return component == 0 ? 8 : 0; |  | ||||||
|     case TextureFormat::R1: |  | ||||||
|         return component == 0 ? 1 : 0; |  | ||||||
|     case TextureFormat::A8R8G8B8: |  | ||||||
|         return 8; |  | ||||||
|     case TextureFormat::A2B10G10R10: |  | ||||||
|         return (component == 3 || component == 2 || component == 1) ? 10 : 2; |  | ||||||
|     case TextureFormat::A4B4G4R4: |  | ||||||
|         return 4; |  | ||||||
|     case TextureFormat::A5B5G5R1: |  | ||||||
|         return (component == 0 || component == 1 || component == 2) ? 5 : 1; |  | ||||||
|     case TextureFormat::A1B5G5R5: |  | ||||||
|         return (component == 1 || component == 2 || component == 3) ? 5 : 1; |  | ||||||
|     case TextureFormat::R32_B24G8: |  | ||||||
|         if (component == 0) { |  | ||||||
|             return 32; |  | ||||||
|         } |  | ||||||
|         if (component == 1) { |  | ||||||
|             return 24; |  | ||||||
|         } |  | ||||||
|         if (component == 2) { |  | ||||||
|             return 8; |  | ||||||
|         } |  | ||||||
|         return 0; |  | ||||||
|     case TextureFormat::B5G6R5: |  | ||||||
|         if (component == 0 || component == 2) { |  | ||||||
|             return 5; |  | ||||||
|         } |  | ||||||
|         if (component == 1) { |  | ||||||
|             return 6; |  | ||||||
|         } |  | ||||||
|         return 0; |  | ||||||
|     case TextureFormat::B6G5R5: |  | ||||||
|         if (component == 1 || component == 2) { |  | ||||||
|             return 5; |  | ||||||
|         } |  | ||||||
|         if (component == 0) { |  | ||||||
|             return 6; |  | ||||||
|         } |  | ||||||
|         return 0; |  | ||||||
|     case TextureFormat::B10G11R11: |  | ||||||
|         if (component == 1 || component == 2) { |  | ||||||
|             return 11; |  | ||||||
|         } |  | ||||||
|         if (component == 0) { |  | ||||||
|             return 10; |  | ||||||
|         } |  | ||||||
|         return 0; |  | ||||||
|     case TextureFormat::R24G8: |  | ||||||
|         if (component == 0) { |  | ||||||
|             return 8; |  | ||||||
|         } |  | ||||||
|         if (component == 1) { |  | ||||||
|             return 24; |  | ||||||
|         } |  | ||||||
|         return 0; |  | ||||||
|     case TextureFormat::R8G24: |  | ||||||
|         if (component == 0) { |  | ||||||
|             return 24; |  | ||||||
|         } |  | ||||||
|         if (component == 1) { |  | ||||||
|             return 8; |  | ||||||
|         } |  | ||||||
|         return 0; |  | ||||||
|     case TextureFormat::R8G8: |  | ||||||
|         return (component == 0 || component == 1) ? 8 : 0; |  | ||||||
|     case TextureFormat::G4R4: |  | ||||||
|         return (component == 0 || component == 1) ? 4 : 0; |  | ||||||
|     default: |  | ||||||
|         UNIMPLEMENTED_MSG("Texture format not implemented={}", format); |  | ||||||
|         return 0; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::size_t GetImageComponentMask(TextureFormat format) { |  | ||||||
|     constexpr u8 R = 0b0001; |  | ||||||
|     constexpr u8 G = 0b0010; |  | ||||||
|     constexpr u8 B = 0b0100; |  | ||||||
|     constexpr u8 A = 0b1000; |  | ||||||
|     switch (format) { |  | ||||||
|     case TextureFormat::R32G32B32A32: |  | ||||||
|     case TextureFormat::R16G16B16A16: |  | ||||||
|     case TextureFormat::A8R8G8B8: |  | ||||||
|     case TextureFormat::A2B10G10R10: |  | ||||||
|     case TextureFormat::A4B4G4R4: |  | ||||||
|     case TextureFormat::A5B5G5R1: |  | ||||||
|     case TextureFormat::A1B5G5R5: |  | ||||||
|         return std::size_t{R | G | B | A}; |  | ||||||
|     case TextureFormat::R32G32B32: |  | ||||||
|     case TextureFormat::R32_B24G8: |  | ||||||
|     case TextureFormat::B5G6R5: |  | ||||||
|     case TextureFormat::B6G5R5: |  | ||||||
|     case TextureFormat::B10G11R11: |  | ||||||
|         return std::size_t{R | G | B}; |  | ||||||
|     case TextureFormat::R32G32: |  | ||||||
|     case TextureFormat::R16G16: |  | ||||||
|     case TextureFormat::R24G8: |  | ||||||
|     case TextureFormat::R8G24: |  | ||||||
|     case TextureFormat::R8G8: |  | ||||||
|     case TextureFormat::G4R4: |  | ||||||
|         return std::size_t{R | G}; |  | ||||||
|     case TextureFormat::R32: |  | ||||||
|     case TextureFormat::R16: |  | ||||||
|     case TextureFormat::R8: |  | ||||||
|     case TextureFormat::R1: |  | ||||||
|         return std::size_t{R}; |  | ||||||
|     default: |  | ||||||
|         UNIMPLEMENTED_MSG("Texture format not implemented={}", format); |  | ||||||
|         return std::size_t{R | G | B | A}; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { |  | ||||||
|     switch (image_type) { |  | ||||||
|     case Tegra::Shader::ImageType::Texture1D: |  | ||||||
|     case Tegra::Shader::ImageType::TextureBuffer: |  | ||||||
|         return 1; |  | ||||||
|     case Tegra::Shader::ImageType::Texture1DArray: |  | ||||||
|     case Tegra::Shader::ImageType::Texture2D: |  | ||||||
|         return 2; |  | ||||||
|     case Tegra::Shader::ImageType::Texture2DArray: |  | ||||||
|     case Tegra::Shader::ImageType::Texture3D: |  | ||||||
|         return 3; |  | ||||||
|     } |  | ||||||
|     UNREACHABLE(); |  | ||||||
|     return 1; |  | ||||||
| } |  | ||||||
| } // Anonymous namespace
 |  | ||||||
| 
 |  | ||||||
| std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size, |  | ||||||
|                                                   Node original_value) { |  | ||||||
|     switch (component_type) { |  | ||||||
|     case ComponentType::SNORM: { |  | ||||||
|         // range [-1.0, 1.0]
 |  | ||||||
|         auto cnv_value = Operation(OperationCode::FMul, original_value, |  | ||||||
|                                    Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f)); |  | ||||||
|         cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value)); |  | ||||||
|         return {BitfieldExtract(std::move(cnv_value), 0, component_size), true}; |  | ||||||
|     } |  | ||||||
|     case ComponentType::SINT: |  | ||||||
|     case ComponentType::UNORM: { |  | ||||||
|         bool is_signed = component_type == ComponentType::SINT; |  | ||||||
|         // range [0.0, 1.0]
 |  | ||||||
|         auto cnv_value = Operation(OperationCode::FMul, original_value, |  | ||||||
|                                    Immediate(static_cast<float>(1 << component_size) - 1.f)); |  | ||||||
|         return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)), |  | ||||||
|                 is_signed}; |  | ||||||
|     } |  | ||||||
|     case ComponentType::UINT: // range [0, (1 << component_size) - 1]
 |  | ||||||
|         return {std::move(original_value), false}; |  | ||||||
|     case ComponentType::FLOAT: |  | ||||||
|         if (component_size == 16) { |  | ||||||
|             return {Operation(OperationCode::HCastFloat, original_value), true}; |  | ||||||
|         } else { |  | ||||||
|             return {std::move(original_value), true}; |  | ||||||
|         } |  | ||||||
|     default: |  | ||||||
|         UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type); |  | ||||||
|         return {std::move(original_value), true}; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { |  | ||||||
|     const Instruction instr = {program_code[pc]}; |  | ||||||
|     const auto opcode = OpCode::Decode(instr); |  | ||||||
| 
 |  | ||||||
|     const auto GetCoordinates = [this, instr](Tegra::Shader::ImageType image_type) { |  | ||||||
|         std::vector<Node> coords; |  | ||||||
|         const std::size_t num_coords{GetImageTypeNumCoordinates(image_type)}; |  | ||||||
|         coords.reserve(num_coords); |  | ||||||
|         for (std::size_t i = 0; i < num_coords; ++i) { |  | ||||||
|             coords.push_back(GetRegister(instr.gpr8.Value() + i)); |  | ||||||
|         } |  | ||||||
|         return coords; |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     switch (opcode->get().GetId()) { |  | ||||||
|     case OpCode::Id::SULD: { |  | ||||||
|         UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != |  | ||||||
|                          Tegra::Shader::OutOfBoundsStore::Ignore); |  | ||||||
| 
 |  | ||||||
|         const auto type{instr.suldst.image_type}; |  | ||||||
|         auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type) |  | ||||||
|                                               : GetBindlessImage(instr.gpr39, type)}; |  | ||||||
|         image.MarkRead(); |  | ||||||
| 
 |  | ||||||
|         if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) { |  | ||||||
|             u32 indexer = 0; |  | ||||||
|             for (u32 element = 0; element < 4; ++element) { |  | ||||||
|                 if (!instr.suldst.IsComponentEnabled(element)) { |  | ||||||
|                     continue; |  | ||||||
|                 } |  | ||||||
|                 MetaImage meta{image, {}, element}; |  | ||||||
|                 Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)); |  | ||||||
|                 SetTemporary(bb, indexer++, std::move(value)); |  | ||||||
|             } |  | ||||||
|             for (u32 i = 0; i < indexer; ++i) { |  | ||||||
|                 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |  | ||||||
|             } |  | ||||||
|         } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) { |  | ||||||
|             UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 && |  | ||||||
|                              instr.suldst.GetStoreDataLayout() != StoreType::Bits64); |  | ||||||
| 
 |  | ||||||
|             auto descriptor = [this, instr] { |  | ||||||
|                 std::optional<Tegra::Engines::SamplerDescriptor> sampler_descriptor; |  | ||||||
|                 if (instr.suldst.is_immediate) { |  | ||||||
|                     sampler_descriptor = |  | ||||||
|                         registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value())); |  | ||||||
|                 } else { |  | ||||||
|                     const Node image_register = GetRegister(instr.gpr39); |  | ||||||
|                     const auto result = TrackCbuf(image_register, global_code, |  | ||||||
|                                                   static_cast<s64>(global_code.size())); |  | ||||||
|                     const auto buffer = std::get<1>(result); |  | ||||||
|                     const auto offset = std::get<2>(result); |  | ||||||
|                     sampler_descriptor = registry.ObtainBindlessSampler(buffer, offset); |  | ||||||
|                 } |  | ||||||
|                 if (!sampler_descriptor) { |  | ||||||
|                     UNREACHABLE_MSG("Failed to obtain image descriptor"); |  | ||||||
|                 } |  | ||||||
|                 return *sampler_descriptor; |  | ||||||
|             }(); |  | ||||||
| 
 |  | ||||||
|             const auto comp_mask = GetImageComponentMask(descriptor.format); |  | ||||||
| 
 |  | ||||||
|             switch (instr.suldst.GetStoreDataLayout()) { |  | ||||||
|             case StoreType::Bits32: |  | ||||||
|             case StoreType::Bits64: { |  | ||||||
|                 u32 indexer = 0; |  | ||||||
|                 u32 shifted_counter = 0; |  | ||||||
|                 Node value = Immediate(0); |  | ||||||
|                 for (u32 element = 0; element < 4; ++element) { |  | ||||||
|                     if (!IsComponentEnabled(comp_mask, element)) { |  | ||||||
|                         continue; |  | ||||||
|                     } |  | ||||||
|                     const auto component_type = GetComponentType(descriptor, element); |  | ||||||
|                     const auto component_size = GetComponentSize(descriptor.format, element); |  | ||||||
|                     MetaImage meta{image, {}, element}; |  | ||||||
| 
 |  | ||||||
|                     auto [converted_value, is_signed] = GetComponentValue( |  | ||||||
|                         component_type, component_size, |  | ||||||
|                         Operation(OperationCode::ImageLoad, meta, GetCoordinates(type))); |  | ||||||
| 
 |  | ||||||
|                     // shift element to correct position
 |  | ||||||
|                     const auto shifted = shifted_counter; |  | ||||||
|                     if (shifted > 0) { |  | ||||||
|                         converted_value = |  | ||||||
|                             SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, |  | ||||||
|                                             std::move(converted_value), Immediate(shifted)); |  | ||||||
|                     } |  | ||||||
|                     shifted_counter += component_size; |  | ||||||
| 
 |  | ||||||
|                     // add value into result
 |  | ||||||
|                     value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value)); |  | ||||||
| 
 |  | ||||||
|                     // if we shifted enough for 1 byte -> we save it into temp
 |  | ||||||
|                     if (shifted_counter >= 32) { |  | ||||||
|                         SetTemporary(bb, indexer++, std::move(value)); |  | ||||||
|                         // reset counter and value to prepare pack next byte
 |  | ||||||
|                         value = Immediate(0); |  | ||||||
|                         shifted_counter = 0; |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|                 for (u32 i = 0; i < indexer; ++i) { |  | ||||||
|                     SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |  | ||||||
|                 } |  | ||||||
|                 break; |  | ||||||
|             } |  | ||||||
|             default: |  | ||||||
|                 UNREACHABLE(); |  | ||||||
|                 break; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::SUST: { |  | ||||||
|         UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P); |  | ||||||
|         UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != |  | ||||||
|                          Tegra::Shader::OutOfBoundsStore::Ignore); |  | ||||||
|         UNIMPLEMENTED_IF(instr.suldst.component_mask_selector != 0xf); // Ensure we have RGBA
 |  | ||||||
| 
 |  | ||||||
|         std::vector<Node> values; |  | ||||||
|         constexpr std::size_t hardcoded_size{4}; |  | ||||||
|         for (std::size_t i = 0; i < hardcoded_size; ++i) { |  | ||||||
|             values.push_back(GetRegister(instr.gpr0.Value() + i)); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         const auto type{instr.suldst.image_type}; |  | ||||||
|         auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type) |  | ||||||
|                                               : GetBindlessImage(instr.gpr39, type)}; |  | ||||||
|         image.MarkWrite(); |  | ||||||
| 
 |  | ||||||
|         MetaImage meta{image, std::move(values)}; |  | ||||||
|         bb.push_back(Operation(OperationCode::ImageStore, meta, GetCoordinates(type))); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::SUATOM: { |  | ||||||
|         UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0); |  | ||||||
| 
 |  | ||||||
|         const OperationCode operation_code = [instr] { |  | ||||||
|             switch (instr.suatom_d.operation_type) { |  | ||||||
|             case Tegra::Shader::ImageAtomicOperationType::S32: |  | ||||||
|             case Tegra::Shader::ImageAtomicOperationType::U32: |  | ||||||
|                 switch (instr.suatom_d.operation) { |  | ||||||
|                 case Tegra::Shader::ImageAtomicOperation::Add: |  | ||||||
|                     return OperationCode::AtomicImageAdd; |  | ||||||
|                 case Tegra::Shader::ImageAtomicOperation::And: |  | ||||||
|                     return OperationCode::AtomicImageAnd; |  | ||||||
|                 case Tegra::Shader::ImageAtomicOperation::Or: |  | ||||||
|                     return OperationCode::AtomicImageOr; |  | ||||||
|                 case Tegra::Shader::ImageAtomicOperation::Xor: |  | ||||||
|                     return OperationCode::AtomicImageXor; |  | ||||||
|                 case Tegra::Shader::ImageAtomicOperation::Exch: |  | ||||||
|                     return OperationCode::AtomicImageExchange; |  | ||||||
|                 default: |  | ||||||
|                     break; |  | ||||||
|                 } |  | ||||||
|                 break; |  | ||||||
|             default: |  | ||||||
|                 break; |  | ||||||
|             } |  | ||||||
|             UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}", |  | ||||||
|                               static_cast<u64>(instr.suatom_d.operation.Value()), |  | ||||||
|                               static_cast<u64>(instr.suatom_d.operation_type.Value())); |  | ||||||
|             return OperationCode::AtomicImageAdd; |  | ||||||
|         }(); |  | ||||||
| 
 |  | ||||||
|         Node value = GetRegister(instr.gpr0); |  | ||||||
| 
 |  | ||||||
|         const auto type = instr.suatom_d.image_type; |  | ||||||
|         auto& image = GetImage(instr.image, type); |  | ||||||
|         image.MarkAtomic(); |  | ||||||
| 
 |  | ||||||
|         MetaImage meta{image, {std::move(value)}}; |  | ||||||
|         SetRegister(bb, instr.gpr0, Operation(operation_code, meta, GetCoordinates(type))); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     default: |  | ||||||
|         UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName()); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return pc; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { |  | ||||||
|     const auto offset = static_cast<u32>(image.index.Value()); |  | ||||||
| 
 |  | ||||||
|     const auto it = |  | ||||||
|         std::find_if(std::begin(used_images), std::end(used_images), |  | ||||||
|                      [offset](const ImageEntry& entry) { return entry.offset == offset; }); |  | ||||||
|     if (it != std::end(used_images)) { |  | ||||||
|         ASSERT(!it->is_bindless && it->type == type); |  | ||||||
|         return *it; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const auto next_index = static_cast<u32>(used_images.size()); |  | ||||||
|     return used_images.emplace_back(next_index, offset, type); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { |  | ||||||
|     const Node image_register = GetRegister(reg); |  | ||||||
|     const auto result = |  | ||||||
|         TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size())); |  | ||||||
| 
 |  | ||||||
|     const auto buffer = std::get<1>(result); |  | ||||||
|     const auto offset = std::get<2>(result); |  | ||||||
| 
 |  | ||||||
|     const auto it = std::find_if(std::begin(used_images), std::end(used_images), |  | ||||||
|                                  [buffer, offset](const ImageEntry& entry) { |  | ||||||
|                                      return entry.buffer == buffer && entry.offset == offset; |  | ||||||
|                                  }); |  | ||||||
|     if (it != std::end(used_images)) { |  | ||||||
|         ASSERT(it->is_bindless && it->type == type); |  | ||||||
|         return *it; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const auto next_index = static_cast<u32>(used_images.size()); |  | ||||||
|     return used_images.emplace_back(next_index, offset, buffer, type); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,49 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) { |  | ||||||
|     const Instruction instr = {program_code[pc]}; |  | ||||||
| 
 |  | ||||||
|     const Node op_a = GetRegister(instr.gpr8); |  | ||||||
|     const Node op_b = [&]() { |  | ||||||
|         if (instr.is_b_imm) { |  | ||||||
|             return Immediate(instr.alu.GetSignedImm20_20()); |  | ||||||
|         } else if (instr.is_b_gpr) { |  | ||||||
|             return GetRegister(instr.gpr20); |  | ||||||
|         } else { |  | ||||||
|             return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); |  | ||||||
|         } |  | ||||||
|     }(); |  | ||||||
| 
 |  | ||||||
|     // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition
 |  | ||||||
|     // is true, and to 0 otherwise.
 |  | ||||||
|     const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0); |  | ||||||
|     const Node first_pred = |  | ||||||
|         GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b); |  | ||||||
| 
 |  | ||||||
|     const OperationCode combiner = GetPredicateCombiner(instr.iset.op); |  | ||||||
| 
 |  | ||||||
|     const Node predicate = Operation(combiner, first_pred, second_pred); |  | ||||||
| 
 |  | ||||||
|     const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1); |  | ||||||
|     const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0); |  | ||||||
|     const Node value = |  | ||||||
|         Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); |  | ||||||
| 
 |  | ||||||
|     SetRegister(bb, instr.gpr0, value); |  | ||||||
| 
 |  | ||||||
|     return pc; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,53 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| using Tegra::Shader::Pred; |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) { |  | ||||||
|     const Instruction instr = {program_code[pc]}; |  | ||||||
| 
 |  | ||||||
|     const Node op_a = GetRegister(instr.gpr8); |  | ||||||
| 
 |  | ||||||
|     const Node op_b = [&]() { |  | ||||||
|         if (instr.is_b_imm) { |  | ||||||
|             return Immediate(instr.alu.GetSignedImm20_20()); |  | ||||||
|         } else if (instr.is_b_gpr) { |  | ||||||
|             return GetRegister(instr.gpr20); |  | ||||||
|         } else { |  | ||||||
|             return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); |  | ||||||
|         } |  | ||||||
|     }(); |  | ||||||
| 
 |  | ||||||
|     // We can't use the constant predicate as destination.
 |  | ||||||
|     ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); |  | ||||||
| 
 |  | ||||||
|     const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0); |  | ||||||
|     const Node predicate = |  | ||||||
|         GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b); |  | ||||||
| 
 |  | ||||||
|     // Set the primary predicate to the result of Predicate OP SecondPredicate
 |  | ||||||
|     const OperationCode combiner = GetPredicateCombiner(instr.isetp.op); |  | ||||||
|     const Node value = Operation(combiner, predicate, second_pred); |  | ||||||
|     SetPredicate(bb, instr.isetp.pred3, value); |  | ||||||
| 
 |  | ||||||
|     if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { |  | ||||||
|         // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
 |  | ||||||
|         const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); |  | ||||||
|         SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred)); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return pc; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,493 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include <algorithm> |  | ||||||
| #include <utility> |  | ||||||
| #include <vector> |  | ||||||
| 
 |  | ||||||
| #include <fmt/format.h> |  | ||||||
| 
 |  | ||||||
| #include "common/alignment.h" |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "common/logging/log.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using std::move; |  | ||||||
| using Tegra::Shader::AtomicOp; |  | ||||||
| using Tegra::Shader::AtomicType; |  | ||||||
| using Tegra::Shader::Attribute; |  | ||||||
| using Tegra::Shader::GlobalAtomicType; |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| using Tegra::Shader::Register; |  | ||||||
| using Tegra::Shader::StoreType; |  | ||||||
| 
 |  | ||||||
| namespace { |  | ||||||
| 
 |  | ||||||
| OperationCode GetAtomOperation(AtomicOp op) { |  | ||||||
|     switch (op) { |  | ||||||
|     case AtomicOp::Add: |  | ||||||
|         return OperationCode::AtomicIAdd; |  | ||||||
|     case AtomicOp::Min: |  | ||||||
|         return OperationCode::AtomicIMin; |  | ||||||
|     case AtomicOp::Max: |  | ||||||
|         return OperationCode::AtomicIMax; |  | ||||||
|     case AtomicOp::And: |  | ||||||
|         return OperationCode::AtomicIAnd; |  | ||||||
|     case AtomicOp::Or: |  | ||||||
|         return OperationCode::AtomicIOr; |  | ||||||
|     case AtomicOp::Xor: |  | ||||||
|         return OperationCode::AtomicIXor; |  | ||||||
|     case AtomicOp::Exch: |  | ||||||
|         return OperationCode::AtomicIExchange; |  | ||||||
|     default: |  | ||||||
|         UNIMPLEMENTED_MSG("op={}", op); |  | ||||||
|         return OperationCode::AtomicIAdd; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool IsUnaligned(Tegra::Shader::UniformType uniform_type) { |  | ||||||
|     return uniform_type == Tegra::Shader::UniformType::UnsignedByte || |  | ||||||
|            uniform_type == Tegra::Shader::UniformType::UnsignedShort; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) { |  | ||||||
|     switch (uniform_type) { |  | ||||||
|     case Tegra::Shader::UniformType::UnsignedByte: |  | ||||||
|         return 0b11; |  | ||||||
|     case Tegra::Shader::UniformType::UnsignedShort: |  | ||||||
|         return 0b10; |  | ||||||
|     default: |  | ||||||
|         UNREACHABLE(); |  | ||||||
|         return 0; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { |  | ||||||
|     switch (uniform_type) { |  | ||||||
|     case Tegra::Shader::UniformType::UnsignedByte: |  | ||||||
|         return 8; |  | ||||||
|     case Tegra::Shader::UniformType::UnsignedShort: |  | ||||||
|         return 16; |  | ||||||
|     case Tegra::Shader::UniformType::Single: |  | ||||||
|         return 32; |  | ||||||
|     case Tegra::Shader::UniformType::Double: |  | ||||||
|         return 64; |  | ||||||
|     case Tegra::Shader::UniformType::Quad: |  | ||||||
|     case Tegra::Shader::UniformType::UnsignedQuad: |  | ||||||
|         return 128; |  | ||||||
|     default: |  | ||||||
|         UNIMPLEMENTED_MSG("Unimplemented size={}!", uniform_type); |  | ||||||
|         return 32; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) { |  | ||||||
|     Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask)); |  | ||||||
|     offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); |  | ||||||
|     return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) { |  | ||||||
|     Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask)); |  | ||||||
|     offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); |  | ||||||
|     return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset), |  | ||||||
|                      Immediate(size)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node Sign16Extend(Node value) { |  | ||||||
|     Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); |  | ||||||
|     Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15)); |  | ||||||
|     Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0)); |  | ||||||
|     return Operation(OperationCode::UBitwiseOr, move(value), move(extend)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // Anonymous namespace
 |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { |  | ||||||
|     const Instruction instr = {program_code[pc]}; |  | ||||||
|     const auto opcode = OpCode::Decode(instr); |  | ||||||
| 
 |  | ||||||
|     switch (opcode->get().GetId()) { |  | ||||||
|     case OpCode::Id::LD_A: { |  | ||||||
|         // Note: Shouldn't this be interp mode flat? As in no interpolation made.
 |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, |  | ||||||
|                              "Indirect attribute loads are not supported"); |  | ||||||
|         UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, |  | ||||||
|                              "Unaligned attribute loads are not supported"); |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.attribute.fmt20.IsPhysical() && |  | ||||||
|                                  instr.attribute.fmt20.size != Tegra::Shader::AttributeSize::Word, |  | ||||||
|                              "Non-32 bits PHYS reads are not implemented"); |  | ||||||
| 
 |  | ||||||
|         const Node buffer{GetRegister(instr.gpr39)}; |  | ||||||
| 
 |  | ||||||
|         u64 next_element = instr.attribute.fmt20.element; |  | ||||||
|         auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); |  | ||||||
| 
 |  | ||||||
|         const auto LoadNextElement = [&](u32 reg_offset) { |  | ||||||
|             const Node attribute{instr.attribute.fmt20.IsPhysical() |  | ||||||
|                                      ? GetPhysicalInputAttribute(instr.gpr8, buffer) |  | ||||||
|                                      : GetInputAttribute(static_cast<Attribute::Index>(next_index), |  | ||||||
|                                                          next_element, buffer)}; |  | ||||||
| 
 |  | ||||||
|             SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute); |  | ||||||
| 
 |  | ||||||
|             // Load the next attribute element into the following register. If the element
 |  | ||||||
|             // to load goes beyond the vec4 size, load the first element of the next
 |  | ||||||
|             // attribute.
 |  | ||||||
|             next_element = (next_element + 1) % 4; |  | ||||||
|             next_index = next_index + (next_element == 0 ? 1 : 0); |  | ||||||
|         }; |  | ||||||
| 
 |  | ||||||
|         const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; |  | ||||||
|         for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { |  | ||||||
|             LoadNextElement(reg_offset); |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::LD_C: { |  | ||||||
|         UNIMPLEMENTED_IF(instr.ld_c.unknown != 0); |  | ||||||
| 
 |  | ||||||
|         Node index = GetRegister(instr.gpr8); |  | ||||||
| 
 |  | ||||||
|         const Node op_a = |  | ||||||
|             GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); |  | ||||||
| 
 |  | ||||||
|         switch (instr.ld_c.type.Value()) { |  | ||||||
|         case Tegra::Shader::UniformType::Single: |  | ||||||
|             SetRegister(bb, instr.gpr0, op_a); |  | ||||||
|             break; |  | ||||||
| 
 |  | ||||||
|         case Tegra::Shader::UniformType::Double: { |  | ||||||
|             const Node op_b = |  | ||||||
|                 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); |  | ||||||
| 
 |  | ||||||
|             SetTemporary(bb, 0, op_a); |  | ||||||
|             SetTemporary(bb, 1, op_b); |  | ||||||
|             SetRegister(bb, instr.gpr0, GetTemporary(0)); |  | ||||||
|             SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1)); |  | ||||||
|             break; |  | ||||||
|         } |  | ||||||
|         default: |  | ||||||
|             UNIMPLEMENTED_MSG("Unhandled type: {}", instr.ld_c.type.Value()); |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::LD_L: |  | ||||||
|         LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", instr.ld_l.unknown); |  | ||||||
|         [[fallthrough]]; |  | ||||||
|     case OpCode::Id::LD_S: { |  | ||||||
|         const auto GetAddress = [&](s32 offset) { |  | ||||||
|             ASSERT(offset % 4 == 0); |  | ||||||
|             const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset); |  | ||||||
|             return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset); |  | ||||||
|         }; |  | ||||||
|         const auto GetMemory = [&](s32 offset) { |  | ||||||
|             return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset)) |  | ||||||
|                                                              : GetLocalMemory(GetAddress(offset)); |  | ||||||
|         }; |  | ||||||
| 
 |  | ||||||
|         switch (instr.ldst_sl.type.Value()) { |  | ||||||
|         case StoreType::Signed16: |  | ||||||
|             SetRegister(bb, instr.gpr0, |  | ||||||
|                         Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16))); |  | ||||||
|             break; |  | ||||||
|         case StoreType::Bits32: |  | ||||||
|         case StoreType::Bits64: |  | ||||||
|         case StoreType::Bits128: { |  | ||||||
|             const u32 count = [&] { |  | ||||||
|                 switch (instr.ldst_sl.type.Value()) { |  | ||||||
|                 case StoreType::Bits32: |  | ||||||
|                     return 1; |  | ||||||
|                 case StoreType::Bits64: |  | ||||||
|                     return 2; |  | ||||||
|                 case StoreType::Bits128: |  | ||||||
|                     return 4; |  | ||||||
|                 default: |  | ||||||
|                     UNREACHABLE(); |  | ||||||
|                     return 0; |  | ||||||
|                 } |  | ||||||
|             }(); |  | ||||||
|             for (u32 i = 0; i < count; ++i) { |  | ||||||
|                 SetTemporary(bb, i, GetMemory(i * 4)); |  | ||||||
|             } |  | ||||||
|             for (u32 i = 0; i < count; ++i) { |  | ||||||
|                 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |  | ||||||
|             } |  | ||||||
|             break; |  | ||||||
|         } |  | ||||||
|         default: |  | ||||||
|             UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(), |  | ||||||
|                               instr.ldst_sl.type.Value()); |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::LD: |  | ||||||
|     case OpCode::Id::LDG: { |  | ||||||
|         const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { |  | ||||||
|             switch (opcode->get().GetId()) { |  | ||||||
|             case OpCode::Id::LD: |  | ||||||
|                 UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented"); |  | ||||||
|                 return instr.generic.type; |  | ||||||
|             case OpCode::Id::LDG: |  | ||||||
|                 return instr.ldg.type; |  | ||||||
|             default: |  | ||||||
|                 UNREACHABLE(); |  | ||||||
|                 return {}; |  | ||||||
|             } |  | ||||||
|         }(); |  | ||||||
| 
 |  | ||||||
|         const auto [real_address_base, base_address, descriptor] = |  | ||||||
|             TrackGlobalMemory(bb, instr, true, false); |  | ||||||
| 
 |  | ||||||
|         const u32 size = GetMemorySize(type); |  | ||||||
|         const u32 count = Common::AlignUp(size, 32) / 32; |  | ||||||
|         if (!real_address_base || !base_address) { |  | ||||||
|             // Tracking failed, load zeroes.
 |  | ||||||
|             for (u32 i = 0; i < count; ++i) { |  | ||||||
|                 SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f)); |  | ||||||
|             } |  | ||||||
|             break; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         for (u32 i = 0; i < count; ++i) { |  | ||||||
|             const Node it_offset = Immediate(i * 4); |  | ||||||
|             const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); |  | ||||||
|             Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); |  | ||||||
| 
 |  | ||||||
|             // To handle unaligned loads get the bytes used to dereference global memory and extract
 |  | ||||||
|             // those bytes from the loaded u32.
 |  | ||||||
|             if (IsUnaligned(type)) { |  | ||||||
|                 gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size); |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             SetTemporary(bb, i, gmem); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         for (u32 i = 0; i < count; ++i) { |  | ||||||
|             SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::ST_A: { |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, |  | ||||||
|                              "Indirect attribute loads are not supported"); |  | ||||||
|         UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, |  | ||||||
|                              "Unaligned attribute loads are not supported"); |  | ||||||
| 
 |  | ||||||
|         u64 element = instr.attribute.fmt20.element; |  | ||||||
|         auto index = static_cast<u64>(instr.attribute.fmt20.index.Value()); |  | ||||||
| 
 |  | ||||||
|         const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; |  | ||||||
|         for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { |  | ||||||
|             Node dest; |  | ||||||
|             if (instr.attribute.fmt20.patch) { |  | ||||||
|                 const u32 offset = static_cast<u32>(index) * 4 + static_cast<u32>(element); |  | ||||||
|                 dest = MakeNode<PatchNode>(offset); |  | ||||||
|             } else { |  | ||||||
|                 dest = GetOutputAttribute(static_cast<Attribute::Index>(index), element, |  | ||||||
|                                           GetRegister(instr.gpr39)); |  | ||||||
|             } |  | ||||||
|             const auto src = GetRegister(instr.gpr0.Value() + reg_offset); |  | ||||||
| 
 |  | ||||||
|             bb.push_back(Operation(OperationCode::Assign, dest, src)); |  | ||||||
| 
 |  | ||||||
|             // Load the next attribute element into the following register. If the element to load
 |  | ||||||
|             // goes beyond the vec4 size, load the first element of the next attribute.
 |  | ||||||
|             element = (element + 1) % 4; |  | ||||||
|             index = index + (element == 0 ? 1 : 0); |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::ST_L: |  | ||||||
|         LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", instr.st_l.cache_management.Value()); |  | ||||||
|         [[fallthrough]]; |  | ||||||
|     case OpCode::Id::ST_S: { |  | ||||||
|         const auto GetAddress = [&](s32 offset) { |  | ||||||
|             ASSERT(offset % 4 == 0); |  | ||||||
|             const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset); |  | ||||||
|             return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate); |  | ||||||
|         }; |  | ||||||
| 
 |  | ||||||
|         const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L; |  | ||||||
|         const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory; |  | ||||||
|         const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory; |  | ||||||
| 
 |  | ||||||
|         switch (instr.ldst_sl.type.Value()) { |  | ||||||
|         case StoreType::Bits128: |  | ||||||
|             (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3)); |  | ||||||
|             (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2)); |  | ||||||
|             [[fallthrough]]; |  | ||||||
|         case StoreType::Bits64: |  | ||||||
|             (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1)); |  | ||||||
|             [[fallthrough]]; |  | ||||||
|         case StoreType::Bits32: |  | ||||||
|             (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0)); |  | ||||||
|             break; |  | ||||||
|         case StoreType::Unsigned16: |  | ||||||
|         case StoreType::Signed16: { |  | ||||||
|             Node address = GetAddress(0); |  | ||||||
|             Node memory = (this->*get_memory)(address); |  | ||||||
|             (this->*set_memory)( |  | ||||||
|                 bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16)); |  | ||||||
|             break; |  | ||||||
|         } |  | ||||||
|         default: |  | ||||||
|             UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(), |  | ||||||
|                               instr.ldst_sl.type.Value()); |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::ST: |  | ||||||
|     case OpCode::Id::STG: { |  | ||||||
|         const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { |  | ||||||
|             switch (opcode->get().GetId()) { |  | ||||||
|             case OpCode::Id::ST: |  | ||||||
|                 UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented"); |  | ||||||
|                 return instr.generic.type; |  | ||||||
|             case OpCode::Id::STG: |  | ||||||
|                 return instr.stg.type; |  | ||||||
|             default: |  | ||||||
|                 UNREACHABLE(); |  | ||||||
|                 return {}; |  | ||||||
|             } |  | ||||||
|         }(); |  | ||||||
| 
 |  | ||||||
|         // For unaligned reads we have to read memory too.
 |  | ||||||
|         const bool is_read = IsUnaligned(type); |  | ||||||
|         const auto [real_address_base, base_address, descriptor] = |  | ||||||
|             TrackGlobalMemory(bb, instr, is_read, true); |  | ||||||
|         if (!real_address_base || !base_address) { |  | ||||||
|             // Tracking failed, skip the store.
 |  | ||||||
|             break; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         const u32 size = GetMemorySize(type); |  | ||||||
|         const u32 count = Common::AlignUp(size, 32) / 32; |  | ||||||
|         for (u32 i = 0; i < count; ++i) { |  | ||||||
|             const Node it_offset = Immediate(i * 4); |  | ||||||
|             const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); |  | ||||||
|             const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); |  | ||||||
|             Node value = GetRegister(instr.gpr0.Value() + i); |  | ||||||
| 
 |  | ||||||
|             if (IsUnaligned(type)) { |  | ||||||
|                 const u32 mask = GetUnalignedMask(type); |  | ||||||
|                 value = InsertUnaligned(gmem, move(value), real_address, mask, size); |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             bb.push_back(Operation(OperationCode::Assign, gmem, value)); |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::RED: { |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}", |  | ||||||
|                              instr.red.type.Value()); |  | ||||||
|         const auto [real_address, base_address, descriptor] = |  | ||||||
|             TrackGlobalMemory(bb, instr, true, true); |  | ||||||
|         if (!real_address || !base_address) { |  | ||||||
|             // Tracking failed, skip atomic.
 |  | ||||||
|             break; |  | ||||||
|         } |  | ||||||
|         Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); |  | ||||||
|         Node value = GetRegister(instr.gpr0); |  | ||||||
|         bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value))); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::ATOM: { |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc || |  | ||||||
|                                  instr.atom.operation == AtomicOp::Dec || |  | ||||||
|                                  instr.atom.operation == AtomicOp::SafeAdd, |  | ||||||
|                              "operation={}", instr.atom.operation.Value()); |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 || |  | ||||||
|                                  instr.atom.type == GlobalAtomicType::U64 || |  | ||||||
|                                  instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN || |  | ||||||
|                                  instr.atom.type == GlobalAtomicType::F32_FTZ_RN, |  | ||||||
|                              "type={}", instr.atom.type.Value()); |  | ||||||
| 
 |  | ||||||
|         const auto [real_address, base_address, descriptor] = |  | ||||||
|             TrackGlobalMemory(bb, instr, true, true); |  | ||||||
|         if (!real_address || !base_address) { |  | ||||||
|             // Tracking failed, skip atomic.
 |  | ||||||
|             break; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         const bool is_signed = |  | ||||||
|             instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64; |  | ||||||
|         Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); |  | ||||||
|         SetRegister(bb, instr.gpr0, |  | ||||||
|                     SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem, |  | ||||||
|                                     GetRegister(instr.gpr20))); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::ATOMS: { |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc || |  | ||||||
|                                  instr.atoms.operation == AtomicOp::Dec, |  | ||||||
|                              "operation={}", instr.atoms.operation.Value()); |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 || |  | ||||||
|                                  instr.atoms.type == AtomicType::U64, |  | ||||||
|                              "type={}", instr.atoms.type.Value()); |  | ||||||
|         const bool is_signed = |  | ||||||
|             instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; |  | ||||||
|         const s32 offset = instr.atoms.GetImmediateOffset(); |  | ||||||
|         Node address = GetRegister(instr.gpr8); |  | ||||||
|         address = Operation(OperationCode::IAdd, move(address), Immediate(offset)); |  | ||||||
|         SetRegister(bb, instr.gpr0, |  | ||||||
|                     SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed, |  | ||||||
|                                     GetSharedMemory(move(address)), GetRegister(instr.gpr20))); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::AL2P: { |  | ||||||
|         // Ignore al2p.direction since we don't care about it.
 |  | ||||||
| 
 |  | ||||||
|         // Calculate emulation fake physical address.
 |  | ||||||
|         const Node fixed_address{Immediate(static_cast<u32>(instr.al2p.address))}; |  | ||||||
|         const Node reg{GetRegister(instr.gpr8)}; |  | ||||||
|         const Node fake_address{Operation(OperationCode::IAdd, NO_PRECISE, reg, fixed_address)}; |  | ||||||
| 
 |  | ||||||
|         // Set the fake address to target register.
 |  | ||||||
|         SetRegister(bb, instr.gpr0, fake_address); |  | ||||||
| 
 |  | ||||||
|         // Signal the shader IR to declare all possible attributes and varyings
 |  | ||||||
|         uses_physical_attributes = true; |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     default: |  | ||||||
|         UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return pc; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb, |  | ||||||
|                                                                      Instruction instr, |  | ||||||
|                                                                      bool is_read, bool is_write) { |  | ||||||
|     const auto addr_register{GetRegister(instr.gmem.gpr)}; |  | ||||||
|     const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; |  | ||||||
| 
 |  | ||||||
|     const auto [base_address, index, offset] = |  | ||||||
|         TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); |  | ||||||
|     ASSERT_OR_EXECUTE_MSG( |  | ||||||
|         base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); }, |  | ||||||
|         "Global memory tracking failed"); |  | ||||||
| 
 |  | ||||||
|     bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); |  | ||||||
| 
 |  | ||||||
|     const GlobalMemoryBase descriptor{index, offset}; |  | ||||||
|     const auto& entry = used_global_memory.try_emplace(descriptor).first; |  | ||||||
|     auto& usage = entry->second; |  | ||||||
|     usage.is_written |= is_write; |  | ||||||
|     usage.is_read |= is_read; |  | ||||||
| 
 |  | ||||||
|     const auto real_address = |  | ||||||
|         Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register); |  | ||||||
| 
 |  | ||||||
|     return {real_address, base_address, descriptor}; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,322 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "common/logging/log.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using std::move; |  | ||||||
| using Tegra::Shader::ConditionCode; |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::IpaInterpMode; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| using Tegra::Shader::PixelImap; |  | ||||||
| using Tegra::Shader::Register; |  | ||||||
| using Tegra::Shader::SystemVariable; |  | ||||||
| 
 |  | ||||||
| using Index = Tegra::Shader::Attribute::Index; |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { |  | ||||||
|     const Instruction instr = {program_code[pc]}; |  | ||||||
|     const auto opcode = OpCode::Decode(instr); |  | ||||||
| 
 |  | ||||||
|     switch (opcode->get().GetId()) { |  | ||||||
|     case OpCode::Id::NOP: { |  | ||||||
|         UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T); |  | ||||||
|         UNIMPLEMENTED_IF(instr.nop.trigger != 0); |  | ||||||
|         // With the previous preconditions, this instruction is a no-operation.
 |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::EXIT: { |  | ||||||
|         const ConditionCode cc = instr.flow_condition_code; |  | ||||||
|         UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc); |  | ||||||
| 
 |  | ||||||
|         switch (instr.flow.cond) { |  | ||||||
|         case Tegra::Shader::FlowCondition::Always: |  | ||||||
|             bb.push_back(Operation(OperationCode::Exit)); |  | ||||||
|             if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { |  | ||||||
|                 // If this is an unconditional exit then just end processing here,
 |  | ||||||
|                 // otherwise we have to account for the possibility of the condition
 |  | ||||||
|                 // not being met, so continue processing the next instruction.
 |  | ||||||
|                 pc = MAX_PROGRAM_LENGTH - 1; |  | ||||||
|             } |  | ||||||
|             break; |  | ||||||
| 
 |  | ||||||
|         case Tegra::Shader::FlowCondition::Fcsm_Tr: |  | ||||||
|             // TODO(bunnei): What is this used for? If we assume this conditon is not
 |  | ||||||
|             // satisifed, dual vertex shaders in Farming Simulator make more sense
 |  | ||||||
|             UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr"); |  | ||||||
|             break; |  | ||||||
| 
 |  | ||||||
|         default: |  | ||||||
|             UNIMPLEMENTED_MSG("Unhandled flow condition: {}", instr.flow.cond.Value()); |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::KIL: { |  | ||||||
|         UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always); |  | ||||||
| 
 |  | ||||||
|         const ConditionCode cc = instr.flow_condition_code; |  | ||||||
|         UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "KIL condition code used: {}", cc); |  | ||||||
| 
 |  | ||||||
|         bb.push_back(Operation(OperationCode::Discard)); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::S2R: { |  | ||||||
|         const Node value = [this, instr] { |  | ||||||
|             switch (instr.sys20) { |  | ||||||
|             case SystemVariable::LaneId: |  | ||||||
|                 return Operation(OperationCode::ThreadId); |  | ||||||
|             case SystemVariable::InvocationId: |  | ||||||
|                 return Operation(OperationCode::InvocationId); |  | ||||||
|             case SystemVariable::Ydirection: |  | ||||||
|                 uses_y_negate = true; |  | ||||||
|                 return Operation(OperationCode::YNegate); |  | ||||||
|             case SystemVariable::InvocationInfo: |  | ||||||
|                 LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); |  | ||||||
|                 return Immediate(0x00ff'0000U); |  | ||||||
|             case SystemVariable::WscaleFactorXY: |  | ||||||
|                 UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented"); |  | ||||||
|                 return Immediate(0U); |  | ||||||
|             case SystemVariable::WscaleFactorZ: |  | ||||||
|                 UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented"); |  | ||||||
|                 return Immediate(0U); |  | ||||||
|             case SystemVariable::Tid: { |  | ||||||
|                 Node val = Immediate(0); |  | ||||||
|                 val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdX), 0, 9); |  | ||||||
|                 val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdY), 16, 9); |  | ||||||
|                 val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdZ), 26, 5); |  | ||||||
|                 return val; |  | ||||||
|             } |  | ||||||
|             case SystemVariable::TidX: |  | ||||||
|                 return Operation(OperationCode::LocalInvocationIdX); |  | ||||||
|             case SystemVariable::TidY: |  | ||||||
|                 return Operation(OperationCode::LocalInvocationIdY); |  | ||||||
|             case SystemVariable::TidZ: |  | ||||||
|                 return Operation(OperationCode::LocalInvocationIdZ); |  | ||||||
|             case SystemVariable::CtaIdX: |  | ||||||
|                 return Operation(OperationCode::WorkGroupIdX); |  | ||||||
|             case SystemVariable::CtaIdY: |  | ||||||
|                 return Operation(OperationCode::WorkGroupIdY); |  | ||||||
|             case SystemVariable::CtaIdZ: |  | ||||||
|                 return Operation(OperationCode::WorkGroupIdZ); |  | ||||||
|             case SystemVariable::EqMask: |  | ||||||
|             case SystemVariable::LtMask: |  | ||||||
|             case SystemVariable::LeMask: |  | ||||||
|             case SystemVariable::GtMask: |  | ||||||
|             case SystemVariable::GeMask: |  | ||||||
|                 uses_warps = true; |  | ||||||
|                 switch (instr.sys20) { |  | ||||||
|                 case SystemVariable::EqMask: |  | ||||||
|                     return Operation(OperationCode::ThreadEqMask); |  | ||||||
|                 case SystemVariable::LtMask: |  | ||||||
|                     return Operation(OperationCode::ThreadLtMask); |  | ||||||
|                 case SystemVariable::LeMask: |  | ||||||
|                     return Operation(OperationCode::ThreadLeMask); |  | ||||||
|                 case SystemVariable::GtMask: |  | ||||||
|                     return Operation(OperationCode::ThreadGtMask); |  | ||||||
|                 case SystemVariable::GeMask: |  | ||||||
|                     return Operation(OperationCode::ThreadGeMask); |  | ||||||
|                 default: |  | ||||||
|                     UNREACHABLE(); |  | ||||||
|                     return Immediate(0u); |  | ||||||
|                 } |  | ||||||
|             default: |  | ||||||
|                 UNIMPLEMENTED_MSG("Unhandled system move: {}", instr.sys20.Value()); |  | ||||||
|                 return Immediate(0u); |  | ||||||
|             } |  | ||||||
|         }(); |  | ||||||
|         SetRegister(bb, instr.gpr0, value); |  | ||||||
| 
 |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::BRA: { |  | ||||||
|         Node branch; |  | ||||||
|         if (instr.bra.constant_buffer == 0) { |  | ||||||
|             const u32 target = pc + instr.bra.GetBranchTarget(); |  | ||||||
|             branch = Operation(OperationCode::Branch, Immediate(target)); |  | ||||||
|         } else { |  | ||||||
|             const u32 target = pc + 1; |  | ||||||
|             const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset()); |  | ||||||
|             const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, |  | ||||||
|                                                  PRECISE, op_a, Immediate(3)); |  | ||||||
|             const Node operand = |  | ||||||
|                 Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); |  | ||||||
|             branch = Operation(OperationCode::BranchIndirect, operand); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; |  | ||||||
|         if (cc != Tegra::Shader::ConditionCode::T) { |  | ||||||
|             bb.push_back(Conditional(GetConditionCode(cc), {branch})); |  | ||||||
|         } else { |  | ||||||
|             bb.push_back(branch); |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::BRX: { |  | ||||||
|         Node operand; |  | ||||||
|         if (instr.brx.constant_buffer != 0) { |  | ||||||
|             const s32 target = pc + 1; |  | ||||||
|             const Node index = GetRegister(instr.gpr8); |  | ||||||
|             const Node op_a = |  | ||||||
|                 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); |  | ||||||
|             const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, |  | ||||||
|                                                  PRECISE, op_a, Immediate(3)); |  | ||||||
|             operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); |  | ||||||
|         } else { |  | ||||||
|             const s32 target = pc + instr.brx.GetBranchExtend(); |  | ||||||
|             const Node op_a = GetRegister(instr.gpr8); |  | ||||||
|             const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, |  | ||||||
|                                                  PRECISE, op_a, Immediate(3)); |  | ||||||
|             operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); |  | ||||||
|         } |  | ||||||
|         const Node branch = Operation(OperationCode::BranchIndirect, operand); |  | ||||||
| 
 |  | ||||||
|         const ConditionCode cc = instr.flow_condition_code; |  | ||||||
|         if (cc != ConditionCode::T) { |  | ||||||
|             bb.push_back(Conditional(GetConditionCode(cc), {branch})); |  | ||||||
|         } else { |  | ||||||
|             bb.push_back(branch); |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::SSY: { |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, |  | ||||||
|                              "Constant buffer flow is not supported"); |  | ||||||
| 
 |  | ||||||
|         if (disable_flow_stack) { |  | ||||||
|             break; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC.
 |  | ||||||
|         const u32 target = pc + instr.bra.GetBranchTarget(); |  | ||||||
|         bb.push_back( |  | ||||||
|             Operation(OperationCode::PushFlowStack, MetaStackClass::Ssy, Immediate(target))); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::PBK: { |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, |  | ||||||
|                              "Constant buffer PBK is not supported"); |  | ||||||
| 
 |  | ||||||
|         if (disable_flow_stack) { |  | ||||||
|             break; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         // PBK pushes to a stack the address where BRK will jump to.
 |  | ||||||
|         const u32 target = pc + instr.bra.GetBranchTarget(); |  | ||||||
|         bb.push_back( |  | ||||||
|             Operation(OperationCode::PushFlowStack, MetaStackClass::Pbk, Immediate(target))); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::SYNC: { |  | ||||||
|         const ConditionCode cc = instr.flow_condition_code; |  | ||||||
|         UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "SYNC condition code used: {}", cc); |  | ||||||
| 
 |  | ||||||
|         if (decompiled) { |  | ||||||
|             break; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         // The SYNC opcode jumps to the address previously set by the SSY opcode
 |  | ||||||
|         bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::BRK: { |  | ||||||
|         const ConditionCode cc = instr.flow_condition_code; |  | ||||||
|         UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "BRK condition code used: {}", cc); |  | ||||||
|         if (decompiled) { |  | ||||||
|             break; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         // The BRK opcode jumps to the address previously set by the PBK opcode
 |  | ||||||
|         bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::IPA: { |  | ||||||
|         const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff; |  | ||||||
|         const auto attribute = instr.attribute.fmt28; |  | ||||||
|         const Index index = attribute.index; |  | ||||||
| 
 |  | ||||||
|         Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8) |  | ||||||
|                                  : GetInputAttribute(index, attribute.element); |  | ||||||
| 
 |  | ||||||
|         // Code taken from Ryujinx.
 |  | ||||||
|         if (index >= Index::Attribute_0 && index <= Index::Attribute_31) { |  | ||||||
|             const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0); |  | ||||||
|             if (header.ps.GetPixelImap(location) == PixelImap::Perspective) { |  | ||||||
|                 Node position_w = GetInputAttribute(Index::Position, 3); |  | ||||||
|                 value = Operation(OperationCode::FMul, move(value), move(position_w)); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         if (instr.ipa.interp_mode == IpaInterpMode::Multiply) { |  | ||||||
|             value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20)); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         value = GetSaturatedFloat(move(value), instr.ipa.saturate); |  | ||||||
| 
 |  | ||||||
|         SetRegister(bb, instr.gpr0, move(value)); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::OUT_R: { |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex, |  | ||||||
|                              "Stream buffer is not supported"); |  | ||||||
| 
 |  | ||||||
|         if (instr.out.emit) { |  | ||||||
|             // gpr0 is used to store the next address and gpr8 contains the address to emit.
 |  | ||||||
|             // Hardware uses pointers here but we just ignore it
 |  | ||||||
|             bb.push_back(Operation(OperationCode::EmitVertex)); |  | ||||||
|             SetRegister(bb, instr.gpr0, Immediate(0)); |  | ||||||
|         } |  | ||||||
|         if (instr.out.cut) { |  | ||||||
|             bb.push_back(Operation(OperationCode::EndPrimitive)); |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::ISBERD: { |  | ||||||
|         UNIMPLEMENTED_IF(instr.isberd.o != 0); |  | ||||||
|         UNIMPLEMENTED_IF(instr.isberd.skew != 0); |  | ||||||
|         UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None); |  | ||||||
|         UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None); |  | ||||||
|         LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete"); |  | ||||||
|         SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8)); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::BAR: { |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0"); |  | ||||||
|         bb.push_back(Operation(OperationCode::Barrier)); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::MEMBAR: { |  | ||||||
|         UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default); |  | ||||||
|         const OperationCode type = [instr] { |  | ||||||
|             switch (instr.membar.type) { |  | ||||||
|             case Tegra::Shader::MembarType::CTA: |  | ||||||
|                 return OperationCode::MemoryBarrierGroup; |  | ||||||
|             case Tegra::Shader::MembarType::GL: |  | ||||||
|                 return OperationCode::MemoryBarrierGlobal; |  | ||||||
|             default: |  | ||||||
|                 UNIMPLEMENTED_MSG("MEMBAR type={}", instr.membar.type.Value()); |  | ||||||
|                 return OperationCode::MemoryBarrierGlobal; |  | ||||||
|             } |  | ||||||
|         }(); |  | ||||||
|         bb.push_back(Operation(type)); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::DEPBAR: { |  | ||||||
|         LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed"); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     default: |  | ||||||
|         UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName()); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return pc; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,68 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| using Tegra::Shader::Pred; |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) { |  | ||||||
|     const Instruction instr = {program_code[pc]}; |  | ||||||
|     const auto opcode = OpCode::Decode(instr); |  | ||||||
| 
 |  | ||||||
|     switch (opcode->get().GetId()) { |  | ||||||
|     case OpCode::Id::PSETP: { |  | ||||||
|         const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); |  | ||||||
|         const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); |  | ||||||
| 
 |  | ||||||
|         // We can't use the constant predicate as destination.
 |  | ||||||
|         ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); |  | ||||||
| 
 |  | ||||||
|         const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); |  | ||||||
| 
 |  | ||||||
|         const OperationCode combiner = GetPredicateCombiner(instr.psetp.op); |  | ||||||
|         const Node predicate = Operation(combiner, op_a, op_b); |  | ||||||
| 
 |  | ||||||
|         // Set the primary predicate to the result of Predicate OP SecondPredicate
 |  | ||||||
|         SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred)); |  | ||||||
| 
 |  | ||||||
|         if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { |  | ||||||
|             // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if
 |  | ||||||
|             // enabled
 |  | ||||||
|             SetPredicate(bb, instr.psetp.pred0, |  | ||||||
|                          Operation(combiner, Operation(OperationCode::LogicalNegate, predicate), |  | ||||||
|                                    second_pred)); |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::CSETP: { |  | ||||||
|         const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0); |  | ||||||
|         const Node condition_code = GetConditionCode(instr.csetp.cc); |  | ||||||
| 
 |  | ||||||
|         const OperationCode combiner = GetPredicateCombiner(instr.csetp.op); |  | ||||||
| 
 |  | ||||||
|         if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) { |  | ||||||
|             SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred)); |  | ||||||
|         } |  | ||||||
|         if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { |  | ||||||
|             const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code); |  | ||||||
|             SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred)); |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     default: |  | ||||||
|         UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName()); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return pc; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,46 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) { |  | ||||||
|     const Instruction instr = {program_code[pc]}; |  | ||||||
| 
 |  | ||||||
|     UNIMPLEMENTED_IF_MSG(instr.generates_cc, |  | ||||||
|                          "Condition codes generation in PSET is not implemented"); |  | ||||||
| 
 |  | ||||||
|     const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0); |  | ||||||
|     const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0); |  | ||||||
|     const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b); |  | ||||||
| 
 |  | ||||||
|     const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0); |  | ||||||
| 
 |  | ||||||
|     const OperationCode combiner = GetPredicateCombiner(instr.pset.op); |  | ||||||
|     const Node predicate = Operation(combiner, first_pred, second_pred); |  | ||||||
| 
 |  | ||||||
|     const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff); |  | ||||||
|     const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0); |  | ||||||
|     const Node value = |  | ||||||
|         Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); |  | ||||||
| 
 |  | ||||||
|     if (instr.pset.bf) { |  | ||||||
|         SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |  | ||||||
|     } else { |  | ||||||
|         SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |  | ||||||
|     } |  | ||||||
|     SetRegister(bb, instr.gpr0, value); |  | ||||||
| 
 |  | ||||||
|     return pc; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,86 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include <utility> |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using std::move; |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| 
 |  | ||||||
| namespace { |  | ||||||
| constexpr u64 NUM_CONDITION_CODES = 4; |  | ||||||
| constexpr u64 NUM_PREDICATES = 7; |  | ||||||
| } // namespace
 |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { |  | ||||||
|     const Instruction instr = {program_code[pc]}; |  | ||||||
|     const auto opcode = OpCode::Decode(instr); |  | ||||||
| 
 |  | ||||||
|     Node apply_mask = [this, opcode, instr] { |  | ||||||
|         switch (opcode->get().GetId()) { |  | ||||||
|         case OpCode::Id::R2P_IMM: |  | ||||||
|         case OpCode::Id::P2R_IMM: |  | ||||||
|             return Immediate(static_cast<u32>(instr.p2r_r2p.immediate_mask)); |  | ||||||
|         default: |  | ||||||
|             UNREACHABLE(); |  | ||||||
|             return Immediate(0); |  | ||||||
|         } |  | ||||||
|     }(); |  | ||||||
| 
 |  | ||||||
|     const u32 offset = static_cast<u32>(instr.p2r_r2p.byte) * 8; |  | ||||||
| 
 |  | ||||||
|     const bool cc = instr.p2r_r2p.mode == Tegra::Shader::R2pMode::Cc; |  | ||||||
|     const u64 num_entries = cc ? NUM_CONDITION_CODES : NUM_PREDICATES; |  | ||||||
|     const auto get_entry = [this, cc](u64 entry) { |  | ||||||
|         return cc ? GetInternalFlag(static_cast<InternalFlag>(entry)) : GetPredicate(entry); |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     switch (opcode->get().GetId()) { |  | ||||||
|     case OpCode::Id::R2P_IMM: { |  | ||||||
|         Node mask = GetRegister(instr.gpr8); |  | ||||||
| 
 |  | ||||||
|         for (u64 entry = 0; entry < num_entries; ++entry) { |  | ||||||
|             const u32 shift = static_cast<u32>(entry); |  | ||||||
| 
 |  | ||||||
|             Node apply = BitfieldExtract(apply_mask, shift, 1); |  | ||||||
|             Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0)); |  | ||||||
| 
 |  | ||||||
|             Node compare = BitfieldExtract(mask, offset + shift, 1); |  | ||||||
|             Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0)); |  | ||||||
| 
 |  | ||||||
|             Node code = Operation(OperationCode::LogicalAssign, get_entry(entry), move(value)); |  | ||||||
|             bb.push_back(Conditional(condition, {move(code)})); |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::P2R_IMM: { |  | ||||||
|         Node value = Immediate(0); |  | ||||||
|         for (u64 entry = 0; entry < num_entries; ++entry) { |  | ||||||
|             Node bit = Operation(OperationCode::Select, get_entry(entry), Immediate(1U << entry), |  | ||||||
|                                  Immediate(0)); |  | ||||||
|             value = Operation(OperationCode::UBitwiseOr, move(value), move(bit)); |  | ||||||
|         } |  | ||||||
|         value = Operation(OperationCode::UBitwiseAnd, move(value), apply_mask); |  | ||||||
|         value = BitfieldInsert(GetRegister(instr.gpr8), move(value), offset, 8); |  | ||||||
| 
 |  | ||||||
|         SetRegister(bb, instr.gpr0, move(value)); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     default: |  | ||||||
|         UNIMPLEMENTED_MSG("Unhandled P2R/R2R instruction: {}", opcode->get().GetName()); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return pc; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,153 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using std::move; |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| using Tegra::Shader::ShfType; |  | ||||||
| using Tegra::Shader::ShfXmode; |  | ||||||
| 
 |  | ||||||
| namespace { |  | ||||||
| 
 |  | ||||||
| Node IsFull(Node shift) { |  | ||||||
|     return Operation(OperationCode::LogicalIEqual, move(shift), Immediate(32)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node Shift(OperationCode opcode, Node value, Node shift) { |  | ||||||
|     Node shifted = Operation(opcode, move(value), shift); |  | ||||||
|     return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ClampShift(Node shift, s32 size = 32) { |  | ||||||
|     shift = Operation(OperationCode::IMax, move(shift), Immediate(0)); |  | ||||||
|     return Operation(OperationCode::IMin, move(shift), Immediate(size)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node WrapShift(Node shift, s32 size = 32) { |  | ||||||
|     return Operation(OperationCode::UBitwiseAnd, move(shift), Immediate(size - 1)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShiftRight(Node low, Node high, Node shift, Node low_shift, ShfType type) { |  | ||||||
|     // These values are used when the shift value is less than 32
 |  | ||||||
|     Node less_low = Shift(OperationCode::ILogicalShiftRight, low, shift); |  | ||||||
|     Node less_high = Shift(OperationCode::ILogicalShiftLeft, high, low_shift); |  | ||||||
|     Node less = Operation(OperationCode::IBitwiseOr, move(less_high), move(less_low)); |  | ||||||
| 
 |  | ||||||
|     if (type == ShfType::Bits32) { |  | ||||||
|         // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits
 |  | ||||||
|         return Operation(OperationCode::Select, IsFull(move(shift)), move(high), move(less)); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     // And these when it's larger than or 32
 |  | ||||||
|     const bool is_signed = type == ShfType::S64; |  | ||||||
|     const auto opcode = SignedToUnsignedCode(OperationCode::IArithmeticShiftRight, is_signed); |  | ||||||
|     Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32)); |  | ||||||
|     Node greater = Shift(opcode, high, move(reduced)); |  | ||||||
| 
 |  | ||||||
|     Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32)); |  | ||||||
|     Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0)); |  | ||||||
| 
 |  | ||||||
|     Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater)); |  | ||||||
|     return Operation(OperationCode::Select, move(is_zero), move(high), move(value)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShiftLeft(Node low, Node high, Node shift, Node low_shift, ShfType type) { |  | ||||||
|     // These values are used when the shift value is less than 32
 |  | ||||||
|     Node less_low = Operation(OperationCode::ILogicalShiftRight, low, low_shift); |  | ||||||
|     Node less_high = Operation(OperationCode::ILogicalShiftLeft, high, shift); |  | ||||||
|     Node less = Operation(OperationCode::IBitwiseOr, move(less_low), move(less_high)); |  | ||||||
| 
 |  | ||||||
|     if (type == ShfType::Bits32) { |  | ||||||
|         // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits
 |  | ||||||
|         return Operation(OperationCode::Select, IsFull(move(shift)), move(low), move(less)); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     // And these when it's larger than or 32
 |  | ||||||
|     Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32)); |  | ||||||
|     Node greater = Shift(OperationCode::ILogicalShiftLeft, move(low), move(reduced)); |  | ||||||
| 
 |  | ||||||
|     Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32)); |  | ||||||
|     Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0)); |  | ||||||
| 
 |  | ||||||
|     Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater)); |  | ||||||
|     return Operation(OperationCode::Select, move(is_zero), move(high), move(value)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // Anonymous namespace
 |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { |  | ||||||
|     const Instruction instr = {program_code[pc]}; |  | ||||||
|     const auto opcode = OpCode::Decode(instr); |  | ||||||
| 
 |  | ||||||
|     Node op_a = GetRegister(instr.gpr8); |  | ||||||
|     Node op_b = [this, instr] { |  | ||||||
|         if (instr.is_b_imm) { |  | ||||||
|             return Immediate(instr.alu.GetSignedImm20_20()); |  | ||||||
|         } else if (instr.is_b_gpr) { |  | ||||||
|             return GetRegister(instr.gpr20); |  | ||||||
|         } else { |  | ||||||
|             return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); |  | ||||||
|         } |  | ||||||
|     }(); |  | ||||||
| 
 |  | ||||||
|     switch (const auto opid = opcode->get().GetId(); opid) { |  | ||||||
|     case OpCode::Id::SHR_C: |  | ||||||
|     case OpCode::Id::SHR_R: |  | ||||||
|     case OpCode::Id::SHR_IMM: { |  | ||||||
|         op_b = instr.shr.wrap ? WrapShift(move(op_b)) : ClampShift(move(op_b)); |  | ||||||
| 
 |  | ||||||
|         Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed, |  | ||||||
|                                      move(op_a), move(op_b)); |  | ||||||
|         SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |  | ||||||
|         SetRegister(bb, instr.gpr0, move(value)); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::SHL_C: |  | ||||||
|     case OpCode::Id::SHL_R: |  | ||||||
|     case OpCode::Id::SHL_IMM: { |  | ||||||
|         Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b); |  | ||||||
|         SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |  | ||||||
|         SetRegister(bb, instr.gpr0, move(value)); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::SHF_RIGHT_R: |  | ||||||
|     case OpCode::Id::SHF_RIGHT_IMM: |  | ||||||
|     case OpCode::Id::SHF_LEFT_R: |  | ||||||
|     case OpCode::Id::SHF_LEFT_IMM: { |  | ||||||
|         UNIMPLEMENTED_IF(instr.generates_cc); |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}", |  | ||||||
|                              instr.shf.xmode.Value()); |  | ||||||
| 
 |  | ||||||
|         if (instr.is_b_imm) { |  | ||||||
|             op_b = Immediate(static_cast<u32>(instr.shf.immediate)); |  | ||||||
|         } |  | ||||||
|         const s32 size = instr.shf.type == ShfType::Bits32 ? 32 : 64; |  | ||||||
|         Node shift = instr.shf.wrap ? WrapShift(move(op_b), size) : ClampShift(move(op_b), size); |  | ||||||
| 
 |  | ||||||
|         Node negated_shift = Operation(OperationCode::INegate, shift); |  | ||||||
|         Node low_shift = Operation(OperationCode::IAdd, move(negated_shift), Immediate(32)); |  | ||||||
| 
 |  | ||||||
|         const bool is_right = opid == OpCode::Id::SHF_RIGHT_R || opid == OpCode::Id::SHF_RIGHT_IMM; |  | ||||||
|         Node value = (is_right ? ShiftRight : ShiftLeft)( |  | ||||||
|             move(op_a), GetRegister(instr.gpr39), move(shift), move(low_shift), instr.shf.type); |  | ||||||
| 
 |  | ||||||
|         SetRegister(bb, instr.gpr0, move(value)); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     default: |  | ||||||
|         UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName()); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return pc; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,935 +0,0 @@ | ||||||
| // Copyright 2019 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include <algorithm> |  | ||||||
| #include <vector> |  | ||||||
| #include <fmt/format.h> |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/bit_field.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "common/logging/log.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/registry.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| using Tegra::Shader::Register; |  | ||||||
| using Tegra::Shader::TextureMiscMode; |  | ||||||
| using Tegra::Shader::TextureProcessMode; |  | ||||||
| using Tegra::Shader::TextureType; |  | ||||||
| 
 |  | ||||||
| static std::size_t GetCoordCount(TextureType texture_type) { |  | ||||||
|     switch (texture_type) { |  | ||||||
|     case TextureType::Texture1D: |  | ||||||
|         return 1; |  | ||||||
|     case TextureType::Texture2D: |  | ||||||
|         return 2; |  | ||||||
|     case TextureType::Texture3D: |  | ||||||
|     case TextureType::TextureCube: |  | ||||||
|         return 3; |  | ||||||
|     default: |  | ||||||
|         UNIMPLEMENTED_MSG("Unhandled texture type: {}", texture_type); |  | ||||||
|         return 0; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { |  | ||||||
|     const Instruction instr = {program_code[pc]}; |  | ||||||
|     const auto opcode = OpCode::Decode(instr); |  | ||||||
|     bool is_bindless = false; |  | ||||||
|     switch (opcode->get().GetId()) { |  | ||||||
|     case OpCode::Id::TEX: { |  | ||||||
|         const TextureType texture_type{instr.tex.texture_type}; |  | ||||||
|         const bool is_array = instr.tex.array != 0; |  | ||||||
|         const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); |  | ||||||
|         const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); |  | ||||||
|         const auto process_mode = instr.tex.GetTextureProcessMode(); |  | ||||||
|         WriteTexInstructionFloat( |  | ||||||
|             bb, instr, |  | ||||||
|             GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {})); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::TEX_B: { |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), |  | ||||||
|                              "AOFFI is not implemented"); |  | ||||||
| 
 |  | ||||||
|         const TextureType texture_type{instr.tex_b.texture_type}; |  | ||||||
|         const bool is_array = instr.tex_b.array != 0; |  | ||||||
|         const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); |  | ||||||
|         const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC); |  | ||||||
|         const auto process_mode = instr.tex_b.GetTextureProcessMode(); |  | ||||||
|         WriteTexInstructionFloat(bb, instr, |  | ||||||
|                                  GetTexCode(instr, texture_type, process_mode, depth_compare, |  | ||||||
|                                             is_array, is_aoffi, {instr.gpr20})); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::TEXS: { |  | ||||||
|         const TextureType texture_type{instr.texs.GetTextureType()}; |  | ||||||
|         const bool is_array{instr.texs.IsArrayTexture()}; |  | ||||||
|         const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); |  | ||||||
|         const auto process_mode = instr.texs.GetTextureProcessMode(); |  | ||||||
| 
 |  | ||||||
|         const Node4 components = |  | ||||||
|             GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); |  | ||||||
| 
 |  | ||||||
|         if (instr.texs.fp32_flag) { |  | ||||||
|             WriteTexsInstructionFloat(bb, instr, components); |  | ||||||
|         } else { |  | ||||||
|             WriteTexsInstructionHalfFloat(bb, instr, components); |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::TLD4_B: { |  | ||||||
|         is_bindless = true; |  | ||||||
|         [[fallthrough]]; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::TLD4: { |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), |  | ||||||
|                              "NDV is not implemented"); |  | ||||||
|         const auto texture_type = instr.tld4.texture_type.Value(); |  | ||||||
|         const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC) |  | ||||||
|                                                : instr.tld4.UsesMiscMode(TextureMiscMode::DC); |  | ||||||
|         const bool is_array = instr.tld4.array != 0; |  | ||||||
|         const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI) |  | ||||||
|                                           : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); |  | ||||||
|         const bool is_ptp = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::PTP) |  | ||||||
|                                         : instr.tld4.UsesMiscMode(TextureMiscMode::PTP); |  | ||||||
|         WriteTexInstructionFloat(bb, instr, |  | ||||||
|                                  GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, |  | ||||||
|                                              is_ptp, is_bindless)); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::TLD4S: { |  | ||||||
|         constexpr std::size_t num_coords = 2; |  | ||||||
|         const bool is_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI); |  | ||||||
|         const bool is_depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); |  | ||||||
|         const Node op_a = GetRegister(instr.gpr8); |  | ||||||
|         const Node op_b = GetRegister(instr.gpr20); |  | ||||||
| 
 |  | ||||||
|         // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
 |  | ||||||
|         std::vector<Node> coords; |  | ||||||
|         std::vector<Node> aoffi; |  | ||||||
|         Node depth_compare; |  | ||||||
|         if (is_depth_compare) { |  | ||||||
|             // Note: TLD4S coordinate encoding works just like TEXS's
 |  | ||||||
|             const Node op_y = GetRegister(instr.gpr8.Value() + 1); |  | ||||||
|             coords.push_back(op_a); |  | ||||||
|             coords.push_back(op_y); |  | ||||||
|             if (is_aoffi) { |  | ||||||
|                 aoffi = GetAoffiCoordinates(op_b, num_coords, true); |  | ||||||
|                 depth_compare = GetRegister(instr.gpr20.Value() + 1); |  | ||||||
|             } else { |  | ||||||
|                 depth_compare = op_b; |  | ||||||
|             } |  | ||||||
|         } else { |  | ||||||
|             // There's no depth compare
 |  | ||||||
|             coords.push_back(op_a); |  | ||||||
|             if (is_aoffi) { |  | ||||||
|                 coords.push_back(GetRegister(instr.gpr8.Value() + 1)); |  | ||||||
|                 aoffi = GetAoffiCoordinates(op_b, num_coords, true); |  | ||||||
|             } else { |  | ||||||
|                 coords.push_back(op_b); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|         const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); |  | ||||||
| 
 |  | ||||||
|         SamplerInfo info; |  | ||||||
|         info.is_shadow = is_depth_compare; |  | ||||||
|         const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info); |  | ||||||
| 
 |  | ||||||
|         Node4 values; |  | ||||||
|         for (u32 element = 0; element < values.size(); ++element) { |  | ||||||
|             MetaTexture meta{*sampler, {}, depth_compare, aoffi,   {}, {}, |  | ||||||
|                              {},       {}, component,     element, {}}; |  | ||||||
|             values[element] = Operation(OperationCode::TextureGather, meta, coords); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         if (instr.tld4s.fp16_flag) { |  | ||||||
|             WriteTexsInstructionHalfFloat(bb, instr, values, true); |  | ||||||
|         } else { |  | ||||||
|             WriteTexsInstructionFloat(bb, instr, values, true); |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::TXD_B: |  | ||||||
|         is_bindless = true; |  | ||||||
|         [[fallthrough]]; |  | ||||||
|     case OpCode::Id::TXD: { |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI), |  | ||||||
|                              "AOFFI is not implemented"); |  | ||||||
| 
 |  | ||||||
|         const bool is_array = instr.txd.is_array != 0; |  | ||||||
|         const auto derivate_reg = instr.gpr20.Value(); |  | ||||||
|         const auto texture_type = instr.txd.texture_type.Value(); |  | ||||||
|         const auto coord_count = GetCoordCount(texture_type); |  | ||||||
|         u64 base_reg = instr.gpr8.Value(); |  | ||||||
|         Node index_var; |  | ||||||
|         SamplerInfo info; |  | ||||||
|         info.type = texture_type; |  | ||||||
|         info.is_array = is_array; |  | ||||||
|         const std::optional<SamplerEntry> sampler = |  | ||||||
|             is_bindless ? GetBindlessSampler(base_reg, info, index_var) |  | ||||||
|                         : GetSampler(instr.sampler, info); |  | ||||||
|         Node4 values; |  | ||||||
|         if (!sampler) { |  | ||||||
|             std::generate(values.begin(), values.end(), [this] { return Immediate(0); }); |  | ||||||
|             WriteTexInstructionFloat(bb, instr, values); |  | ||||||
|             break; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         if (is_bindless) { |  | ||||||
|             base_reg++; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         std::vector<Node> coords; |  | ||||||
|         std::vector<Node> derivates; |  | ||||||
|         for (std::size_t i = 0; i < coord_count; ++i) { |  | ||||||
|             coords.push_back(GetRegister(base_reg + i)); |  | ||||||
|             const std::size_t derivate = i * 2; |  | ||||||
|             derivates.push_back(GetRegister(derivate_reg + derivate)); |  | ||||||
|             derivates.push_back(GetRegister(derivate_reg + derivate + 1)); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         Node array_node = {}; |  | ||||||
|         if (is_array) { |  | ||||||
|             const Node info_reg = GetRegister(base_reg + coord_count); |  | ||||||
|             array_node = BitfieldExtract(info_reg, 0, 16); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         for (u32 element = 0; element < values.size(); ++element) { |  | ||||||
|             MetaTexture meta{*sampler, array_node, {}, {},      {},       derivates, |  | ||||||
|                              {},       {},         {}, element, index_var}; |  | ||||||
|             values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         WriteTexInstructionFloat(bb, instr, values); |  | ||||||
| 
 |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::TXQ_B: |  | ||||||
|         is_bindless = true; |  | ||||||
|         [[fallthrough]]; |  | ||||||
|     case OpCode::Id::TXQ: { |  | ||||||
|         Node index_var; |  | ||||||
|         const std::optional<SamplerEntry> sampler = |  | ||||||
|             is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var) |  | ||||||
|                         : GetSampler(instr.sampler, {}); |  | ||||||
| 
 |  | ||||||
|         if (!sampler) { |  | ||||||
|             u32 indexer = 0; |  | ||||||
|             for (u32 element = 0; element < 4; ++element) { |  | ||||||
|                 if (!instr.txq.IsComponentEnabled(element)) { |  | ||||||
|                     continue; |  | ||||||
|                 } |  | ||||||
|                 const Node value = Immediate(0); |  | ||||||
|                 SetTemporary(bb, indexer++, value); |  | ||||||
|             } |  | ||||||
|             for (u32 i = 0; i < indexer; ++i) { |  | ||||||
|                 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |  | ||||||
|             } |  | ||||||
|             break; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         u32 indexer = 0; |  | ||||||
|         switch (instr.txq.query_type) { |  | ||||||
|         case Tegra::Shader::TextureQueryType::Dimension: { |  | ||||||
|             for (u32 element = 0; element < 4; ++element) { |  | ||||||
|                 if (!instr.txq.IsComponentEnabled(element)) { |  | ||||||
|                     continue; |  | ||||||
|                 } |  | ||||||
|                 MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; |  | ||||||
|                 const Node value = |  | ||||||
|                     Operation(OperationCode::TextureQueryDimensions, meta, |  | ||||||
|                               GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); |  | ||||||
|                 SetTemporary(bb, indexer++, value); |  | ||||||
|             } |  | ||||||
|             for (u32 i = 0; i < indexer; ++i) { |  | ||||||
|                 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |  | ||||||
|             } |  | ||||||
|             break; |  | ||||||
|         } |  | ||||||
|         default: |  | ||||||
|             UNIMPLEMENTED_MSG("Unhandled texture query type: {}", instr.txq.query_type.Value()); |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::TMML_B: |  | ||||||
|         is_bindless = true; |  | ||||||
|         [[fallthrough]]; |  | ||||||
|     case OpCode::Id::TMML: { |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), |  | ||||||
|                              "NDV is not implemented"); |  | ||||||
| 
 |  | ||||||
|         const auto texture_type = instr.tmml.texture_type.Value(); |  | ||||||
|         const bool is_array = instr.tmml.array != 0; |  | ||||||
|         SamplerInfo info; |  | ||||||
|         info.type = texture_type; |  | ||||||
|         info.is_array = is_array; |  | ||||||
|         Node index_var; |  | ||||||
|         const std::optional<SamplerEntry> sampler = |  | ||||||
|             is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var) |  | ||||||
|                         : GetSampler(instr.sampler, info); |  | ||||||
| 
 |  | ||||||
|         if (!sampler) { |  | ||||||
|             u32 indexer = 0; |  | ||||||
|             for (u32 element = 0; element < 2; ++element) { |  | ||||||
|                 if (!instr.tmml.IsComponentEnabled(element)) { |  | ||||||
|                     continue; |  | ||||||
|                 } |  | ||||||
|                 const Node value = Immediate(0); |  | ||||||
|                 SetTemporary(bb, indexer++, value); |  | ||||||
|             } |  | ||||||
|             for (u32 i = 0; i < indexer; ++i) { |  | ||||||
|                 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |  | ||||||
|             } |  | ||||||
|             break; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         const u64 base_index = is_array ? 1 : 0; |  | ||||||
|         const u64 num_components = [texture_type] { |  | ||||||
|             switch (texture_type) { |  | ||||||
|             case TextureType::Texture1D: |  | ||||||
|                 return 1; |  | ||||||
|             case TextureType::Texture2D: |  | ||||||
|                 return 2; |  | ||||||
|             case TextureType::TextureCube: |  | ||||||
|                 return 3; |  | ||||||
|             default: |  | ||||||
|                 UNIMPLEMENTED_MSG("Unhandled texture type {}", texture_type); |  | ||||||
|                 return 2; |  | ||||||
|             } |  | ||||||
|         }(); |  | ||||||
|         // TODO: What's the array component used for?
 |  | ||||||
| 
 |  | ||||||
|         std::vector<Node> coords; |  | ||||||
|         coords.reserve(num_components); |  | ||||||
|         for (u64 component = 0; component < num_components; ++component) { |  | ||||||
|             coords.push_back(GetRegister(instr.gpr8.Value() + base_index + component)); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         u32 indexer = 0; |  | ||||||
|         for (u32 element = 0; element < 2; ++element) { |  | ||||||
|             if (!instr.tmml.IsComponentEnabled(element)) { |  | ||||||
|                 continue; |  | ||||||
|             } |  | ||||||
|             MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; |  | ||||||
|             Node value = Operation(OperationCode::TextureQueryLod, meta, coords); |  | ||||||
|             SetTemporary(bb, indexer++, std::move(value)); |  | ||||||
|         } |  | ||||||
|         for (u32 i = 0; i < indexer; ++i) { |  | ||||||
|             SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::TLD: { |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented"); |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented"); |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented"); |  | ||||||
| 
 |  | ||||||
|         WriteTexInstructionFloat(bb, instr, GetTldCode(instr)); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::TLDS: { |  | ||||||
|         const TextureType texture_type{instr.tlds.GetTextureType()}; |  | ||||||
|         const bool is_array{instr.tlds.IsArrayTexture()}; |  | ||||||
| 
 |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI), |  | ||||||
|                              "AOFFI is not implemented"); |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); |  | ||||||
| 
 |  | ||||||
|         const Node4 components = GetTldsCode(instr, texture_type, is_array); |  | ||||||
| 
 |  | ||||||
|         if (instr.tlds.fp32_flag) { |  | ||||||
|             WriteTexsInstructionFloat(bb, instr, components); |  | ||||||
|         } else { |  | ||||||
|             WriteTexsInstructionHalfFloat(bb, instr, components); |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     default: |  | ||||||
|         UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return pc; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo( |  | ||||||
|     SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) { |  | ||||||
|     if (info.IsComplete()) { |  | ||||||
|         return info; |  | ||||||
|     } |  | ||||||
|     if (!sampler) { |  | ||||||
|         LOG_WARNING(HW_GPU, "Unknown sampler info"); |  | ||||||
|         info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D); |  | ||||||
|         info.is_array = info.is_array.value_or(false); |  | ||||||
|         info.is_shadow = info.is_shadow.value_or(false); |  | ||||||
|         info.is_buffer = info.is_buffer.value_or(false); |  | ||||||
|         return info; |  | ||||||
|     } |  | ||||||
|     info.type = info.type.value_or(sampler->texture_type); |  | ||||||
|     info.is_array = info.is_array.value_or(sampler->is_array != 0); |  | ||||||
|     info.is_shadow = info.is_shadow.value_or(sampler->is_shadow != 0); |  | ||||||
|     info.is_buffer = info.is_buffer.value_or(sampler->is_buffer != 0); |  | ||||||
|     return info; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::optional<SamplerEntry> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, |  | ||||||
|                                                  SamplerInfo sampler_info) { |  | ||||||
|     const u32 offset = static_cast<u32>(sampler.index.Value()); |  | ||||||
|     const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset)); |  | ||||||
| 
 |  | ||||||
|     // If this sampler has already been used, return the existing mapping.
 |  | ||||||
|     const auto it = |  | ||||||
|         std::find_if(used_samplers.begin(), used_samplers.end(), |  | ||||||
|                      [offset](const SamplerEntry& entry) { return entry.offset == offset; }); |  | ||||||
|     if (it != used_samplers.end()) { |  | ||||||
|         ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && |  | ||||||
|                it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); |  | ||||||
|         return *it; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     // Otherwise create a new mapping for this sampler
 |  | ||||||
|     const auto next_index = static_cast<u32>(used_samplers.size()); |  | ||||||
|     return used_samplers.emplace_back(next_index, offset, *info.type, *info.is_array, |  | ||||||
|                                       *info.is_shadow, *info.is_buffer, false); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::optional<SamplerEntry> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, |  | ||||||
|                                                          SamplerInfo info, Node& index_var) { |  | ||||||
|     const Node sampler_register = GetRegister(reg); |  | ||||||
|     const auto [base_node, tracked_sampler_info] = |  | ||||||
|         TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); |  | ||||||
|     if (!base_node) { |  | ||||||
|         UNREACHABLE(); |  | ||||||
|         return std::nullopt; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) { |  | ||||||
|         const u32 buffer = sampler_info->index; |  | ||||||
|         const u32 offset = sampler_info->offset; |  | ||||||
|         info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset)); |  | ||||||
| 
 |  | ||||||
|         // If this sampler has already been used, return the existing mapping.
 |  | ||||||
|         const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), |  | ||||||
|                                      [buffer, offset](const SamplerEntry& entry) { |  | ||||||
|                                          return entry.buffer == buffer && entry.offset == offset; |  | ||||||
|                                      }); |  | ||||||
|         if (it != used_samplers.end()) { |  | ||||||
|             ASSERT(it->is_bindless && it->type == info.type && it->is_array == info.is_array && |  | ||||||
|                    it->is_shadow == info.is_shadow); |  | ||||||
|             return *it; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         // Otherwise create a new mapping for this sampler
 |  | ||||||
|         const auto next_index = static_cast<u32>(used_samplers.size()); |  | ||||||
|         return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array, |  | ||||||
|                                           *info.is_shadow, *info.is_buffer, false); |  | ||||||
|     } |  | ||||||
|     if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) { |  | ||||||
|         const std::pair indices = sampler_info->indices; |  | ||||||
|         const std::pair offsets = sampler_info->offsets; |  | ||||||
|         info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets)); |  | ||||||
| 
 |  | ||||||
|         // Try to use an already created sampler if it exists
 |  | ||||||
|         const auto it = |  | ||||||
|             std::find_if(used_samplers.begin(), used_samplers.end(), |  | ||||||
|                          [indices, offsets](const SamplerEntry& entry) { |  | ||||||
|                              return offsets == std::pair{entry.offset, entry.secondary_offset} && |  | ||||||
|                                     indices == std::pair{entry.buffer, entry.secondary_buffer}; |  | ||||||
|                          }); |  | ||||||
|         if (it != used_samplers.end()) { |  | ||||||
|             ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array && |  | ||||||
|                    it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); |  | ||||||
|             return *it; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         // Otherwise create a new mapping for this sampler
 |  | ||||||
|         const u32 next_index = static_cast<u32>(used_samplers.size()); |  | ||||||
|         return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array, |  | ||||||
|                                           *info.is_shadow, *info.is_buffer); |  | ||||||
|     } |  | ||||||
|     if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) { |  | ||||||
|         const u32 base_offset = sampler_info->base_offset / 4; |  | ||||||
|         index_var = GetCustomVariable(sampler_info->bindless_var); |  | ||||||
|         info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset)); |  | ||||||
| 
 |  | ||||||
|         // If this sampler has already been used, return the existing mapping.
 |  | ||||||
|         const auto it = std::find_if( |  | ||||||
|             used_samplers.begin(), used_samplers.end(), |  | ||||||
|             [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; }); |  | ||||||
|         if (it != used_samplers.end()) { |  | ||||||
|             ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && |  | ||||||
|                    it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer && |  | ||||||
|                    it->is_indexed); |  | ||||||
|             return *it; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         uses_indexed_samplers = true; |  | ||||||
|         // Otherwise create a new mapping for this sampler
 |  | ||||||
|         const auto next_index = static_cast<u32>(used_samplers.size()); |  | ||||||
|         return used_samplers.emplace_back(next_index, base_offset, *info.type, *info.is_array, |  | ||||||
|                                           *info.is_shadow, *info.is_buffer, true); |  | ||||||
|     } |  | ||||||
|     return std::nullopt; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { |  | ||||||
|     u32 dest_elem = 0; |  | ||||||
|     for (u32 elem = 0; elem < 4; ++elem) { |  | ||||||
|         if (!instr.tex.IsComponentEnabled(elem)) { |  | ||||||
|             // Skip disabled components
 |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
|         SetTemporary(bb, dest_elem++, components[elem]); |  | ||||||
|     } |  | ||||||
|     // After writing values in temporals, move them to the real registers
 |  | ||||||
|     for (u32 i = 0; i < dest_elem; ++i) { |  | ||||||
|         SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components, |  | ||||||
|                                          bool ignore_mask) { |  | ||||||
|     // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
 |  | ||||||
|     // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
 |  | ||||||
| 
 |  | ||||||
|     u32 dest_elem = 0; |  | ||||||
|     for (u32 component = 0; component < 4; ++component) { |  | ||||||
|         if (!instr.texs.IsComponentEnabled(component) && !ignore_mask) |  | ||||||
|             continue; |  | ||||||
|         SetTemporary(bb, dest_elem++, components[component]); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     for (u32 i = 0; i < dest_elem; ++i) { |  | ||||||
|         if (i < 2) { |  | ||||||
|             // Write the first two swizzle components to gpr0 and gpr0+1
 |  | ||||||
|             SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i)); |  | ||||||
|         } else { |  | ||||||
|             ASSERT(instr.texs.HasTwoDestinations()); |  | ||||||
|             // Write the rest of the swizzle components to gpr28 and gpr28+1
 |  | ||||||
|             SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i)); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, |  | ||||||
|                                              const Node4& components, bool ignore_mask) { |  | ||||||
|     // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
 |  | ||||||
|     // float instruction).
 |  | ||||||
| 
 |  | ||||||
|     Node4 values; |  | ||||||
|     u32 dest_elem = 0; |  | ||||||
|     for (u32 component = 0; component < 4; ++component) { |  | ||||||
|         if (!instr.texs.IsComponentEnabled(component) && !ignore_mask) |  | ||||||
|             continue; |  | ||||||
|         values[dest_elem++] = components[component]; |  | ||||||
|     } |  | ||||||
|     if (dest_elem == 0) |  | ||||||
|         return; |  | ||||||
| 
 |  | ||||||
|     std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); }); |  | ||||||
| 
 |  | ||||||
|     const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]); |  | ||||||
|     if (dest_elem <= 2) { |  | ||||||
|         SetRegister(bb, instr.gpr0, first_value); |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     SetTemporary(bb, 0, first_value); |  | ||||||
|     SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); |  | ||||||
| 
 |  | ||||||
|     SetRegister(bb, instr.gpr0, GetTemporary(0)); |  | ||||||
|     SetRegister(bb, instr.gpr28, GetTemporary(1)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, |  | ||||||
|                                TextureProcessMode process_mode, std::vector<Node> coords, |  | ||||||
|                                Node array, Node depth_compare, u32 bias_offset, |  | ||||||
|                                std::vector<Node> aoffi, |  | ||||||
|                                std::optional<Tegra::Shader::Register> bindless_reg) { |  | ||||||
|     const bool is_array = array != nullptr; |  | ||||||
|     const bool is_shadow = depth_compare != nullptr; |  | ||||||
|     const bool is_bindless = bindless_reg.has_value(); |  | ||||||
| 
 |  | ||||||
|     ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow, |  | ||||||
|                "Illegal texture type"); |  | ||||||
| 
 |  | ||||||
|     SamplerInfo info; |  | ||||||
|     info.type = texture_type; |  | ||||||
|     info.is_array = is_array; |  | ||||||
|     info.is_shadow = is_shadow; |  | ||||||
|     info.is_buffer = false; |  | ||||||
| 
 |  | ||||||
|     Node index_var; |  | ||||||
|     const std::optional<SamplerEntry> sampler = |  | ||||||
|         is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var) |  | ||||||
|                     : GetSampler(instr.sampler, info); |  | ||||||
|     if (!sampler) { |  | ||||||
|         return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)}; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const bool lod_needed = process_mode == TextureProcessMode::LZ || |  | ||||||
|                             process_mode == TextureProcessMode::LL || |  | ||||||
|                             process_mode == TextureProcessMode::LLA; |  | ||||||
|     const OperationCode opcode = lod_needed ? OperationCode::TextureLod : OperationCode::Texture; |  | ||||||
| 
 |  | ||||||
|     Node bias; |  | ||||||
|     Node lod; |  | ||||||
|     switch (process_mode) { |  | ||||||
|     case TextureProcessMode::None: |  | ||||||
|         break; |  | ||||||
|     case TextureProcessMode::LZ: |  | ||||||
|         lod = Immediate(0.0f); |  | ||||||
|         break; |  | ||||||
|     case TextureProcessMode::LB: |  | ||||||
|         // If present, lod or bias are always stored in the register indexed by the gpr20 field with
 |  | ||||||
|         // an offset depending on the usage of the other registers.
 |  | ||||||
|         bias = GetRegister(instr.gpr20.Value() + bias_offset); |  | ||||||
|         break; |  | ||||||
|     case TextureProcessMode::LL: |  | ||||||
|         lod = GetRegister(instr.gpr20.Value() + bias_offset); |  | ||||||
|         break; |  | ||||||
|     default: |  | ||||||
|         UNIMPLEMENTED_MSG("Unimplemented process mode={}", process_mode); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     Node4 values; |  | ||||||
|     for (u32 element = 0; element < values.size(); ++element) { |  | ||||||
|         MetaTexture meta{*sampler, array, depth_compare, aoffi,    {}, {}, bias, |  | ||||||
|                          lod,      {},    element,       index_var}; |  | ||||||
|         values[element] = Operation(opcode, meta, coords); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return values; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, |  | ||||||
|                            TextureProcessMode process_mode, bool depth_compare, bool is_array, |  | ||||||
|                            bool is_aoffi, std::optional<Tegra::Shader::Register> bindless_reg) { |  | ||||||
|     const bool lod_bias_enabled{ |  | ||||||
|         (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)}; |  | ||||||
| 
 |  | ||||||
|     const bool is_bindless = bindless_reg.has_value(); |  | ||||||
| 
 |  | ||||||
|     u64 parameter_register = instr.gpr20.Value(); |  | ||||||
|     if (is_bindless) { |  | ||||||
|         ++parameter_register; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const u32 bias_lod_offset = (is_bindless ? 1 : 0); |  | ||||||
|     if (lod_bias_enabled) { |  | ||||||
|         ++parameter_register; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array, |  | ||||||
|                                                               lod_bias_enabled, 4, 5); |  | ||||||
|     const auto coord_count = std::get<0>(coord_counts); |  | ||||||
|     // If enabled arrays index is always stored in the gpr8 field
 |  | ||||||
|     const u64 array_register = instr.gpr8.Value(); |  | ||||||
|     // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
 |  | ||||||
|     const u64 coord_register = array_register + (is_array ? 1 : 0); |  | ||||||
| 
 |  | ||||||
|     std::vector<Node> coords; |  | ||||||
|     for (std::size_t i = 0; i < coord_count; ++i) { |  | ||||||
|         coords.push_back(GetRegister(coord_register + i)); |  | ||||||
|     } |  | ||||||
|     // 1D.DC in OpenGL the 2nd component is ignored.
 |  | ||||||
|     if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { |  | ||||||
|         coords.push_back(Immediate(0.0f)); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const Node array = is_array ? GetRegister(array_register) : nullptr; |  | ||||||
| 
 |  | ||||||
|     std::vector<Node> aoffi; |  | ||||||
|     if (is_aoffi) { |  | ||||||
|         aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     Node dc; |  | ||||||
|     if (depth_compare) { |  | ||||||
|         // Depth is always stored in the register signaled by gpr20 or in the next register if lod
 |  | ||||||
|         // or bias are used
 |  | ||||||
|         dc = GetRegister(parameter_register++); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset, |  | ||||||
|                           aoffi, bindless_reg); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, |  | ||||||
|                             TextureProcessMode process_mode, bool depth_compare, bool is_array) { |  | ||||||
|     const bool lod_bias_enabled = |  | ||||||
|         (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); |  | ||||||
| 
 |  | ||||||
|     const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array, |  | ||||||
|                                                               lod_bias_enabled, 4, 4); |  | ||||||
|     const auto coord_count = std::get<0>(coord_counts); |  | ||||||
| 
 |  | ||||||
|     // If enabled arrays index is always stored in the gpr8 field
 |  | ||||||
|     const u64 array_register = instr.gpr8.Value(); |  | ||||||
|     // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
 |  | ||||||
|     const u64 coord_register = array_register + (is_array ? 1 : 0); |  | ||||||
|     const u64 last_coord_register = |  | ||||||
|         (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) |  | ||||||
|             ? static_cast<u64>(instr.gpr20.Value()) |  | ||||||
|             : coord_register + 1; |  | ||||||
|     const u32 bias_offset = coord_count > 2 ? 1 : 0; |  | ||||||
| 
 |  | ||||||
|     std::vector<Node> coords; |  | ||||||
|     for (std::size_t i = 0; i < coord_count; ++i) { |  | ||||||
|         const bool last = (i == (coord_count - 1)) && (coord_count > 1); |  | ||||||
|         coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const Node array = is_array ? GetRegister(array_register) : nullptr; |  | ||||||
| 
 |  | ||||||
|     Node dc; |  | ||||||
|     if (depth_compare) { |  | ||||||
|         // Depth is always stored in the register signaled by gpr20 or in the next register if lod
 |  | ||||||
|         // or bias are used
 |  | ||||||
|         const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); |  | ||||||
|         dc = GetRegister(depth_register); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {}, |  | ||||||
|                           {}); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, |  | ||||||
|                             bool is_array, bool is_aoffi, bool is_ptp, bool is_bindless) { |  | ||||||
|     ASSERT_MSG(!(is_aoffi && is_ptp), "AOFFI and PTP can't be enabled at the same time"); |  | ||||||
| 
 |  | ||||||
|     const std::size_t coord_count = GetCoordCount(texture_type); |  | ||||||
| 
 |  | ||||||
|     // If enabled arrays index is always stored in the gpr8 field
 |  | ||||||
|     const u64 array_register = instr.gpr8.Value(); |  | ||||||
|     // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
 |  | ||||||
|     const u64 coord_register = array_register + (is_array ? 1 : 0); |  | ||||||
| 
 |  | ||||||
|     std::vector<Node> coords; |  | ||||||
|     for (std::size_t i = 0; i < coord_count; ++i) { |  | ||||||
|         coords.push_back(GetRegister(coord_register + i)); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     u64 parameter_register = instr.gpr20.Value(); |  | ||||||
| 
 |  | ||||||
|     SamplerInfo info; |  | ||||||
|     info.type = texture_type; |  | ||||||
|     info.is_array = is_array; |  | ||||||
|     info.is_shadow = depth_compare; |  | ||||||
| 
 |  | ||||||
|     Node index_var; |  | ||||||
|     const std::optional<SamplerEntry> sampler = |  | ||||||
|         is_bindless ? GetBindlessSampler(parameter_register++, info, index_var) |  | ||||||
|                     : GetSampler(instr.sampler, info); |  | ||||||
|     Node4 values; |  | ||||||
|     if (!sampler) { |  | ||||||
|         for (u32 element = 0; element < values.size(); ++element) { |  | ||||||
|             values[element] = Immediate(0); |  | ||||||
|         } |  | ||||||
|         return values; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     std::vector<Node> aoffi, ptp; |  | ||||||
|     if (is_aoffi) { |  | ||||||
|         aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true); |  | ||||||
|     } else if (is_ptp) { |  | ||||||
|         ptp = GetPtpCoordinates( |  | ||||||
|             {GetRegister(parameter_register++), GetRegister(parameter_register++)}); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     Node dc; |  | ||||||
|     if (depth_compare) { |  | ||||||
|         dc = GetRegister(parameter_register++); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component)) |  | ||||||
|                                        : Immediate(static_cast<u32>(instr.tld4.component)); |  | ||||||
| 
 |  | ||||||
|     for (u32 element = 0; element < values.size(); ++element) { |  | ||||||
|         auto coords_copy = coords; |  | ||||||
|         MetaTexture meta{ |  | ||||||
|             *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element, |  | ||||||
|             index_var}; |  | ||||||
|         values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return values; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { |  | ||||||
|     const auto texture_type{instr.tld.texture_type}; |  | ||||||
|     const bool is_array{instr.tld.is_array != 0}; |  | ||||||
|     const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL}; |  | ||||||
|     const std::size_t coord_count{GetCoordCount(texture_type)}; |  | ||||||
| 
 |  | ||||||
|     u64 gpr8_cursor{instr.gpr8.Value()}; |  | ||||||
|     const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr}; |  | ||||||
| 
 |  | ||||||
|     std::vector<Node> coords; |  | ||||||
|     coords.reserve(coord_count); |  | ||||||
|     for (std::size_t i = 0; i < coord_count; ++i) { |  | ||||||
|         coords.push_back(GetRegister(gpr8_cursor++)); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     u64 gpr20_cursor{instr.gpr20.Value()}; |  | ||||||
|     // const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr};
 |  | ||||||
|     const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)}; |  | ||||||
|     // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
 |  | ||||||
|     // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
 |  | ||||||
| 
 |  | ||||||
|     const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, {}); |  | ||||||
| 
 |  | ||||||
|     Node4 values; |  | ||||||
|     for (u32 element = 0; element < values.size(); ++element) { |  | ||||||
|         auto coords_copy = coords; |  | ||||||
|         MetaTexture meta{*sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}}; |  | ||||||
|         values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return values; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { |  | ||||||
|     SamplerInfo info; |  | ||||||
|     info.type = texture_type; |  | ||||||
|     info.is_array = is_array; |  | ||||||
|     info.is_shadow = false; |  | ||||||
|     const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info); |  | ||||||
| 
 |  | ||||||
|     const std::size_t type_coord_count = GetCoordCount(texture_type); |  | ||||||
|     const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; |  | ||||||
|     const bool aoffi_enabled = instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI); |  | ||||||
| 
 |  | ||||||
|     // If enabled arrays index is always stored in the gpr8 field
 |  | ||||||
|     const u64 array_register = instr.gpr8.Value(); |  | ||||||
|     // if is array gpr20 is used
 |  | ||||||
|     const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value(); |  | ||||||
| 
 |  | ||||||
|     const u64 last_coord_register = |  | ||||||
|         ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array |  | ||||||
|             ? static_cast<u64>(instr.gpr20.Value()) |  | ||||||
|             : coord_register + 1; |  | ||||||
| 
 |  | ||||||
|     std::vector<Node> coords; |  | ||||||
|     for (std::size_t i = 0; i < type_coord_count; ++i) { |  | ||||||
|         const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); |  | ||||||
|         coords.push_back( |  | ||||||
|             GetRegister(last && !aoffi_enabled ? last_coord_register : coord_register + i)); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const Node array = is_array ? GetRegister(array_register) : nullptr; |  | ||||||
|     // When lod is used always is in gpr20
 |  | ||||||
|     const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); |  | ||||||
| 
 |  | ||||||
|     std::vector<Node> aoffi; |  | ||||||
|     if (aoffi_enabled) { |  | ||||||
|         aoffi = GetAoffiCoordinates(GetRegister(instr.gpr20), type_coord_count, false); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     Node4 values; |  | ||||||
|     for (u32 element = 0; element < values.size(); ++element) { |  | ||||||
|         auto coords_copy = coords; |  | ||||||
|         MetaTexture meta{*sampler, array, {}, aoffi, {}, {}, {}, lod, {}, element, {}}; |  | ||||||
|         values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); |  | ||||||
|     } |  | ||||||
|     return values; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( |  | ||||||
|     TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, |  | ||||||
|     std::size_t max_coords, std::size_t max_inputs) { |  | ||||||
|     const std::size_t coord_count = GetCoordCount(texture_type); |  | ||||||
| 
 |  | ||||||
|     std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); |  | ||||||
|     const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); |  | ||||||
|     if (total_coord_count > max_coords || total_reg_count > max_inputs) { |  | ||||||
|         UNIMPLEMENTED_MSG("Unsupported Texture operation"); |  | ||||||
|         total_coord_count = std::min(total_coord_count, max_coords); |  | ||||||
|     } |  | ||||||
|     // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
 |  | ||||||
|     total_coord_count += |  | ||||||
|         (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0; |  | ||||||
| 
 |  | ||||||
|     return {coord_count, total_coord_count}; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, |  | ||||||
|                                                 bool is_tld4) { |  | ||||||
|     const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U}; |  | ||||||
|     const u32 size = is_tld4 ? 6 : 4; |  | ||||||
|     const s32 wrap_value = is_tld4 ? 32 : 8; |  | ||||||
|     const s32 diff_value = is_tld4 ? 64 : 16; |  | ||||||
|     const u32 mask = (1U << size) - 1; |  | ||||||
| 
 |  | ||||||
|     std::vector<Node> aoffi; |  | ||||||
|     aoffi.reserve(coord_count); |  | ||||||
| 
 |  | ||||||
|     const auto aoffi_immediate{ |  | ||||||
|         TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))}; |  | ||||||
|     if (!aoffi_immediate) { |  | ||||||
|         // Variable access, not supported on AMD.
 |  | ||||||
|         LOG_WARNING(HW_GPU, |  | ||||||
|                     "AOFFI constant folding failed, some hardware might have graphical issues"); |  | ||||||
|         for (std::size_t coord = 0; coord < coord_count; ++coord) { |  | ||||||
|             const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size); |  | ||||||
|             const Node condition = |  | ||||||
|                 Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value)); |  | ||||||
|             const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value)); |  | ||||||
|             aoffi.push_back(Operation(OperationCode::Select, condition, negative, value)); |  | ||||||
|         } |  | ||||||
|         return aoffi; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     for (std::size_t coord = 0; coord < coord_count; ++coord) { |  | ||||||
|         s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask; |  | ||||||
|         if (value >= wrap_value) { |  | ||||||
|             value -= diff_value; |  | ||||||
|         } |  | ||||||
|         aoffi.push_back(Immediate(value)); |  | ||||||
|     } |  | ||||||
|     return aoffi; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::vector<Node> ShaderIR::GetPtpCoordinates(std::array<Node, 2> ptp_regs) { |  | ||||||
|     static constexpr u32 num_entries = 8; |  | ||||||
| 
 |  | ||||||
|     std::vector<Node> ptp; |  | ||||||
|     ptp.reserve(num_entries); |  | ||||||
| 
 |  | ||||||
|     const auto global_size = static_cast<s64>(global_code.size()); |  | ||||||
|     const std::optional low = TrackImmediate(ptp_regs[0], global_code, global_size); |  | ||||||
|     const std::optional high = TrackImmediate(ptp_regs[1], global_code, global_size); |  | ||||||
|     if (!low || !high) { |  | ||||||
|         for (u32 entry = 0; entry < num_entries; ++entry) { |  | ||||||
|             const u32 reg = entry / 4; |  | ||||||
|             const u32 offset = entry % 4; |  | ||||||
|             const Node value = BitfieldExtract(ptp_regs[reg], offset * 8, 6); |  | ||||||
|             const Node condition = |  | ||||||
|                 Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(32)); |  | ||||||
|             const Node negative = Operation(OperationCode::IAdd, value, Immediate(-64)); |  | ||||||
|             ptp.push_back(Operation(OperationCode::Select, condition, negative, value)); |  | ||||||
|         } |  | ||||||
|         return ptp; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const u64 immediate = (static_cast<u64>(*high) << 32) | static_cast<u64>(*low); |  | ||||||
|     for (u32 entry = 0; entry < num_entries; ++entry) { |  | ||||||
|         s32 value = (immediate >> (entry * 8)) & 0b111111; |  | ||||||
|         if (value >= 32) { |  | ||||||
|             value -= 64; |  | ||||||
|         } |  | ||||||
|         ptp.push_back(Immediate(value)); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return ptp; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,169 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using std::move; |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| using Tegra::Shader::Pred; |  | ||||||
| using Tegra::Shader::VideoType; |  | ||||||
| using Tegra::Shader::VmadShr; |  | ||||||
| using Tegra::Shader::VmnmxOperation; |  | ||||||
| using Tegra::Shader::VmnmxType; |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) { |  | ||||||
|     const Instruction instr = {program_code[pc]}; |  | ||||||
|     const auto opcode = OpCode::Decode(instr); |  | ||||||
| 
 |  | ||||||
|     if (opcode->get().GetId() == OpCode::Id::VMNMX) { |  | ||||||
|         DecodeVMNMX(bb, instr); |  | ||||||
|         return pc; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const Node op_a = |  | ||||||
|         GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, |  | ||||||
|                         instr.video.type_a, instr.video.byte_height_a); |  | ||||||
|     const Node op_b = [this, instr] { |  | ||||||
|         if (instr.video.use_register_b) { |  | ||||||
|             return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b, |  | ||||||
|                                    instr.video.signed_b, instr.video.type_b, |  | ||||||
|                                    instr.video.byte_height_b); |  | ||||||
|         } |  | ||||||
|         if (instr.video.signed_b) { |  | ||||||
|             const auto imm = static_cast<s16>(instr.alu.GetImm20_16()); |  | ||||||
|             return Immediate(static_cast<u32>(imm)); |  | ||||||
|         } else { |  | ||||||
|             return Immediate(instr.alu.GetImm20_16()); |  | ||||||
|         } |  | ||||||
|     }(); |  | ||||||
| 
 |  | ||||||
|     switch (opcode->get().GetId()) { |  | ||||||
|     case OpCode::Id::VMAD: { |  | ||||||
|         const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1; |  | ||||||
|         const Node op_c = GetRegister(instr.gpr39); |  | ||||||
| 
 |  | ||||||
|         Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b); |  | ||||||
|         value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c); |  | ||||||
| 
 |  | ||||||
|         if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) { |  | ||||||
|             const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15); |  | ||||||
|             value = |  | ||||||
|                 SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |  | ||||||
|         SetRegister(bb, instr.gpr0, value); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::VSETP: { |  | ||||||
|         // We can't use the constant predicate as destination.
 |  | ||||||
|         ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); |  | ||||||
| 
 |  | ||||||
|         const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1; |  | ||||||
|         const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b); |  | ||||||
|         const Node second_pred = GetPredicate(instr.vsetp.pred39, false); |  | ||||||
| 
 |  | ||||||
|         const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op); |  | ||||||
| 
 |  | ||||||
|         // Set the primary predicate to the result of Predicate OP SecondPredicate
 |  | ||||||
|         SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred)); |  | ||||||
| 
 |  | ||||||
|         if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { |  | ||||||
|             // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
 |  | ||||||
|             // if enabled
 |  | ||||||
|             const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred); |  | ||||||
|             SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred)); |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     default: |  | ||||||
|         UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName()); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return pc; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type, |  | ||||||
|                                u64 byte_height) { |  | ||||||
|     if (!is_chunk) { |  | ||||||
|         return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     switch (type) { |  | ||||||
|     case VideoType::Size16_Low: |  | ||||||
|         return BitfieldExtract(op, 0, 16); |  | ||||||
|     case VideoType::Size16_High: |  | ||||||
|         return BitfieldExtract(op, 16, 16); |  | ||||||
|     case VideoType::Size32: |  | ||||||
|         // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used
 |  | ||||||
|         // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort.
 |  | ||||||
|         UNIMPLEMENTED(); |  | ||||||
|         return Immediate(0); |  | ||||||
|     case VideoType::Invalid: |  | ||||||
|         UNREACHABLE_MSG("Invalid instruction encoding"); |  | ||||||
|         return Immediate(0); |  | ||||||
|     default: |  | ||||||
|         UNREACHABLE(); |  | ||||||
|         return Immediate(0); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) { |  | ||||||
|     UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register); |  | ||||||
|     UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32); |  | ||||||
|     UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32); |  | ||||||
|     UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed); |  | ||||||
|     UNIMPLEMENTED_IF(instr.vmnmx.sat); |  | ||||||
|     UNIMPLEMENTED_IF(instr.generates_cc); |  | ||||||
| 
 |  | ||||||
|     Node op_a = GetRegister(instr.gpr8); |  | ||||||
|     Node op_b = GetRegister(instr.gpr20); |  | ||||||
|     Node op_c = GetRegister(instr.gpr39); |  | ||||||
| 
 |  | ||||||
|     const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed
 |  | ||||||
|     const bool is_oper2_signed = instr.vmnmx.is_dest_signed; |  | ||||||
| 
 |  | ||||||
|     const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin; |  | ||||||
|     Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b)); |  | ||||||
| 
 |  | ||||||
|     switch (instr.vmnmx.operation) { |  | ||||||
|     case VmnmxOperation::Mrg_16H: |  | ||||||
|         value = BitfieldInsert(move(op_c), move(value), 16, 16); |  | ||||||
|         break; |  | ||||||
|     case VmnmxOperation::Mrg_16L: |  | ||||||
|         value = BitfieldInsert(move(op_c), move(value), 0, 16); |  | ||||||
|         break; |  | ||||||
|     case VmnmxOperation::Mrg_8B0: |  | ||||||
|         value = BitfieldInsert(move(op_c), move(value), 0, 8); |  | ||||||
|         break; |  | ||||||
|     case VmnmxOperation::Mrg_8B2: |  | ||||||
|         value = BitfieldInsert(move(op_c), move(value), 16, 8); |  | ||||||
|         break; |  | ||||||
|     case VmnmxOperation::Acc: |  | ||||||
|         value = Operation(OperationCode::IAdd, move(value), move(op_c)); |  | ||||||
|         break; |  | ||||||
|     case VmnmxOperation::Min: |  | ||||||
|         value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c)); |  | ||||||
|         break; |  | ||||||
|     case VmnmxOperation::Max: |  | ||||||
|         value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c)); |  | ||||||
|         break; |  | ||||||
|     case VmnmxOperation::Nop: |  | ||||||
|         break; |  | ||||||
|     default: |  | ||||||
|         UNREACHABLE(); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     SetRegister(bb, instr.gpr0, move(value)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,117 +0,0 @@ | ||||||
| // Copyright 2019 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| using Tegra::Shader::Pred; |  | ||||||
| using Tegra::Shader::ShuffleOperation; |  | ||||||
| using Tegra::Shader::VoteOperation; |  | ||||||
| 
 |  | ||||||
| namespace { |  | ||||||
| 
 |  | ||||||
| OperationCode GetOperationCode(VoteOperation vote_op) { |  | ||||||
|     switch (vote_op) { |  | ||||||
|     case VoteOperation::All: |  | ||||||
|         return OperationCode::VoteAll; |  | ||||||
|     case VoteOperation::Any: |  | ||||||
|         return OperationCode::VoteAny; |  | ||||||
|     case VoteOperation::Eq: |  | ||||||
|         return OperationCode::VoteEqual; |  | ||||||
|     default: |  | ||||||
|         UNREACHABLE_MSG("Invalid vote operation={}", vote_op); |  | ||||||
|         return OperationCode::VoteAll; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // Anonymous namespace
 |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { |  | ||||||
|     const Instruction instr = {program_code[pc]}; |  | ||||||
|     const auto opcode = OpCode::Decode(instr); |  | ||||||
| 
 |  | ||||||
|     // Signal the backend that this shader uses warp instructions.
 |  | ||||||
|     uses_warps = true; |  | ||||||
| 
 |  | ||||||
|     switch (opcode->get().GetId()) { |  | ||||||
|     case OpCode::Id::VOTE: { |  | ||||||
|         const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0); |  | ||||||
|         const Node active = Operation(OperationCode::BallotThread, value); |  | ||||||
|         const Node vote = Operation(GetOperationCode(instr.vote.operation), value); |  | ||||||
|         SetRegister(bb, instr.gpr0, active); |  | ||||||
|         SetPredicate(bb, instr.vote.dest_pred, vote); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::SHFL: { |  | ||||||
|         Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) |  | ||||||
|                                            : GetRegister(instr.gpr39); |  | ||||||
|         Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) |  | ||||||
|                                              : GetRegister(instr.gpr20); |  | ||||||
| 
 |  | ||||||
|         Node thread_id = Operation(OperationCode::ThreadId); |  | ||||||
|         Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU)); |  | ||||||
|         Node seg_mask = BitfieldExtract(mask, 8, 16); |  | ||||||
| 
 |  | ||||||
|         Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask); |  | ||||||
|         Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask); |  | ||||||
|         Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id, |  | ||||||
|                                        Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask)); |  | ||||||
| 
 |  | ||||||
|         Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] { |  | ||||||
|             switch (instr.shfl.operation) { |  | ||||||
|             case ShuffleOperation::Idx: |  | ||||||
|                 return Operation(OperationCode::IBitwiseOr, |  | ||||||
|                                  Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask), |  | ||||||
|                                  min_thread_id); |  | ||||||
|             case ShuffleOperation::Down: |  | ||||||
|                 return Operation(OperationCode::IAdd, thread_id, index); |  | ||||||
|             case ShuffleOperation::Up: |  | ||||||
|                 return Operation(OperationCode::IAdd, thread_id, |  | ||||||
|                                  Operation(OperationCode::INegate, index)); |  | ||||||
|             case ShuffleOperation::Bfly: |  | ||||||
|                 return Operation(OperationCode::IBitwiseXor, thread_id, index); |  | ||||||
|             } |  | ||||||
|             UNREACHABLE(); |  | ||||||
|             return Immediate(0U); |  | ||||||
|         }(); |  | ||||||
| 
 |  | ||||||
|         Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] { |  | ||||||
|             if (instr.shfl.operation == ShuffleOperation::Up) { |  | ||||||
|                 return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id); |  | ||||||
|             } else { |  | ||||||
|                 return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id); |  | ||||||
|             } |  | ||||||
|         }(); |  | ||||||
| 
 |  | ||||||
|         SetPredicate(bb, instr.shfl.pred48, in_bounds); |  | ||||||
|         SetRegister( |  | ||||||
|             bb, instr.gpr0, |  | ||||||
|             Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id)); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case OpCode::Id::FSWZADD: { |  | ||||||
|         UNIMPLEMENTED_IF(instr.fswzadd.ndv); |  | ||||||
| 
 |  | ||||||
|         Node op_a = GetRegister(instr.gpr8); |  | ||||||
|         Node op_b = GetRegister(instr.gpr20); |  | ||||||
|         Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle)); |  | ||||||
|         SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask)); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     default: |  | ||||||
|         UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName()); |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return pc; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,156 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::OpCode; |  | ||||||
| using Tegra::Shader::PredCondition; |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { |  | ||||||
|     const Instruction instr = {program_code[pc]}; |  | ||||||
|     const auto opcode = OpCode::Decode(instr); |  | ||||||
| 
 |  | ||||||
|     UNIMPLEMENTED_IF(instr.xmad.sign_a); |  | ||||||
|     UNIMPLEMENTED_IF(instr.xmad.sign_b); |  | ||||||
|     UNIMPLEMENTED_IF_MSG(instr.generates_cc, |  | ||||||
|                          "Condition codes generation in XMAD is not implemented"); |  | ||||||
| 
 |  | ||||||
|     Node op_a = GetRegister(instr.gpr8); |  | ||||||
| 
 |  | ||||||
|     // TODO(bunnei): Needs to be fixed once op_a or op_b is signed
 |  | ||||||
|     UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b); |  | ||||||
|     const bool is_signed_a = instr.xmad.sign_a == 1; |  | ||||||
|     const bool is_signed_b = instr.xmad.sign_b == 1; |  | ||||||
|     const bool is_signed_c = is_signed_a; |  | ||||||
| 
 |  | ||||||
|     auto [is_merge, is_psl, is_high_b, mode, op_b_binding, |  | ||||||
|           op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> { |  | ||||||
|         switch (opcode->get().GetId()) { |  | ||||||
|         case OpCode::Id::XMAD_CR: |  | ||||||
|             return {instr.xmad.merge_56, |  | ||||||
|                     instr.xmad.product_shift_left_second, |  | ||||||
|                     instr.xmad.high_b, |  | ||||||
|                     instr.xmad.mode_cbf, |  | ||||||
|                     GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), |  | ||||||
|                     GetRegister(instr.gpr39)}; |  | ||||||
|         case OpCode::Id::XMAD_RR: |  | ||||||
|             return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr, |  | ||||||
|                     instr.xmad.mode,     GetRegister(instr.gpr20),      GetRegister(instr.gpr39)}; |  | ||||||
|         case OpCode::Id::XMAD_RC: |  | ||||||
|             return {false, |  | ||||||
|                     false, |  | ||||||
|                     instr.xmad.high_b, |  | ||||||
|                     instr.xmad.mode_cbf, |  | ||||||
|                     GetRegister(instr.gpr39), |  | ||||||
|                     GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; |  | ||||||
|         case OpCode::Id::XMAD_IMM: |  | ||||||
|             return {instr.xmad.merge_37, |  | ||||||
|                     instr.xmad.product_shift_left, |  | ||||||
|                     false, |  | ||||||
|                     instr.xmad.mode, |  | ||||||
|                     Immediate(static_cast<u32>(instr.xmad.imm20_16)), |  | ||||||
|                     GetRegister(instr.gpr39)}; |  | ||||||
|         default: |  | ||||||
|             UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName()); |  | ||||||
|             return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)}; |  | ||||||
|         } |  | ||||||
|     }(); |  | ||||||
| 
 |  | ||||||
|     op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a), |  | ||||||
|                            instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16)); |  | ||||||
| 
 |  | ||||||
|     const Node original_b = op_b_binding; |  | ||||||
|     const Node op_b = |  | ||||||
|         SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b_binding), |  | ||||||
|                         is_high_b ? Immediate(16) : Immediate(0), Immediate(16)); |  | ||||||
| 
 |  | ||||||
|     // we already check sign_a and sign_b is difference or not before so just use one in here.
 |  | ||||||
|     Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b); |  | ||||||
|     if (is_psl) { |  | ||||||
|         product = |  | ||||||
|             SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_a, product, Immediate(16)); |  | ||||||
|     } |  | ||||||
|     SetTemporary(bb, 0, product); |  | ||||||
|     product = GetTemporary(0); |  | ||||||
| 
 |  | ||||||
|     Node original_c = op_c; |  | ||||||
|     const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
 |  | ||||||
|     op_c = [&] { |  | ||||||
|         switch (set_mode) { |  | ||||||
|         case Tegra::Shader::XmadMode::None: |  | ||||||
|             return original_c; |  | ||||||
|         case Tegra::Shader::XmadMode::CLo: |  | ||||||
|             return BitfieldExtract(std::move(original_c), 0, 16); |  | ||||||
|         case Tegra::Shader::XmadMode::CHi: |  | ||||||
|             return BitfieldExtract(std::move(original_c), 16, 16); |  | ||||||
|         case Tegra::Shader::XmadMode::CBcc: { |  | ||||||
|             Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, |  | ||||||
|                                              original_b, Immediate(16)); |  | ||||||
|             return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c), |  | ||||||
|                                    std::move(shifted_b)); |  | ||||||
|         } |  | ||||||
|         case Tegra::Shader::XmadMode::CSfu: { |  | ||||||
|             const Node comp_a = |  | ||||||
|                 GetPredicateComparisonInteger(PredCondition::EQ, is_signed_a, op_a, Immediate(0)); |  | ||||||
|             const Node comp_b = |  | ||||||
|                 GetPredicateComparisonInteger(PredCondition::EQ, is_signed_b, op_b, Immediate(0)); |  | ||||||
|             const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b); |  | ||||||
| 
 |  | ||||||
|             const Node comp_minus_a = GetPredicateComparisonInteger( |  | ||||||
|                 PredCondition::NE, is_signed_a, |  | ||||||
|                 SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a, |  | ||||||
|                                 Immediate(0x80000000)), |  | ||||||
|                 Immediate(0)); |  | ||||||
|             const Node comp_minus_b = GetPredicateComparisonInteger( |  | ||||||
|                 PredCondition::NE, is_signed_b, |  | ||||||
|                 SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b, |  | ||||||
|                                 Immediate(0x80000000)), |  | ||||||
|                 Immediate(0)); |  | ||||||
| 
 |  | ||||||
|             Node new_c = Operation( |  | ||||||
|                 OperationCode::Select, comp_minus_a, |  | ||||||
|                 SignedOperation(OperationCode::IAdd, is_signed_c, original_c, Immediate(-65536)), |  | ||||||
|                 original_c); |  | ||||||
|             new_c = Operation( |  | ||||||
|                 OperationCode::Select, comp_minus_b, |  | ||||||
|                 SignedOperation(OperationCode::IAdd, is_signed_c, new_c, Immediate(-65536)), |  | ||||||
|                 std::move(new_c)); |  | ||||||
| 
 |  | ||||||
|             return Operation(OperationCode::Select, comp, original_c, std::move(new_c)); |  | ||||||
|         } |  | ||||||
|         default: |  | ||||||
|             UNREACHABLE(); |  | ||||||
|             return Immediate(0); |  | ||||||
|         } |  | ||||||
|     }(); |  | ||||||
| 
 |  | ||||||
|     SetTemporary(bb, 1, op_c); |  | ||||||
|     op_c = GetTemporary(1); |  | ||||||
| 
 |  | ||||||
|     // TODO(Rodrigo): Use an appropiate sign for this operation
 |  | ||||||
|     Node sum = SignedOperation(OperationCode::IAdd, is_signed_a, product, std::move(op_c)); |  | ||||||
|     SetTemporary(bb, 2, sum); |  | ||||||
|     sum = GetTemporary(2); |  | ||||||
|     if (is_merge) { |  | ||||||
|         const Node a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(sum), |  | ||||||
|                                        Immediate(0), Immediate(16)); |  | ||||||
|         const Node b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, original_b, |  | ||||||
|                                        Immediate(16)); |  | ||||||
|         sum = SignedOperation(OperationCode::IBitwiseOr, is_signed_a, a, b); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     SetInternalFlagsFromInteger(bb, sum, instr.generates_cc); |  | ||||||
|     SetRegister(bb, instr.gpr0, std::move(sum)); |  | ||||||
| 
 |  | ||||||
|     return pc; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,93 +0,0 @@ | ||||||
| // Copyright 2019 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include <memory> |  | ||||||
| #include <variant> |  | ||||||
| 
 |  | ||||||
| #include "video_core/shader/expr.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| namespace { |  | ||||||
| bool ExprIsBoolean(const Expr& expr) { |  | ||||||
|     return std::holds_alternative<ExprBoolean>(*expr); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool ExprBooleanGet(const Expr& expr) { |  | ||||||
|     return std::get_if<ExprBoolean>(expr.get())->value; |  | ||||||
| } |  | ||||||
| } // Anonymous namespace
 |  | ||||||
| 
 |  | ||||||
| bool ExprAnd::operator==(const ExprAnd& b) const { |  | ||||||
|     return (*operand1 == *b.operand1) && (*operand2 == *b.operand2); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool ExprAnd::operator!=(const ExprAnd& b) const { |  | ||||||
|     return !operator==(b); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool ExprOr::operator==(const ExprOr& b) const { |  | ||||||
|     return (*operand1 == *b.operand1) && (*operand2 == *b.operand2); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool ExprOr::operator!=(const ExprOr& b) const { |  | ||||||
|     return !operator==(b); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool ExprNot::operator==(const ExprNot& b) const { |  | ||||||
|     return *operand1 == *b.operand1; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool ExprNot::operator!=(const ExprNot& b) const { |  | ||||||
|     return !operator==(b); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Expr MakeExprNot(Expr first) { |  | ||||||
|     if (std::holds_alternative<ExprNot>(*first)) { |  | ||||||
|         return std::get_if<ExprNot>(first.get())->operand1; |  | ||||||
|     } |  | ||||||
|     return MakeExpr<ExprNot>(std::move(first)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Expr MakeExprAnd(Expr first, Expr second) { |  | ||||||
|     if (ExprIsBoolean(first)) { |  | ||||||
|         return ExprBooleanGet(first) ? second : first; |  | ||||||
|     } |  | ||||||
|     if (ExprIsBoolean(second)) { |  | ||||||
|         return ExprBooleanGet(second) ? first : second; |  | ||||||
|     } |  | ||||||
|     return MakeExpr<ExprAnd>(std::move(first), std::move(second)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Expr MakeExprOr(Expr first, Expr second) { |  | ||||||
|     if (ExprIsBoolean(first)) { |  | ||||||
|         return ExprBooleanGet(first) ? first : second; |  | ||||||
|     } |  | ||||||
|     if (ExprIsBoolean(second)) { |  | ||||||
|         return ExprBooleanGet(second) ? second : first; |  | ||||||
|     } |  | ||||||
|     return MakeExpr<ExprOr>(std::move(first), std::move(second)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool ExprAreEqual(const Expr& first, const Expr& second) { |  | ||||||
|     return (*first) == (*second); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool ExprAreOpposite(const Expr& first, const Expr& second) { |  | ||||||
|     if (std::holds_alternative<ExprNot>(*first)) { |  | ||||||
|         return ExprAreEqual(std::get_if<ExprNot>(first.get())->operand1, second); |  | ||||||
|     } |  | ||||||
|     if (std::holds_alternative<ExprNot>(*second)) { |  | ||||||
|         return ExprAreEqual(std::get_if<ExprNot>(second.get())->operand1, first); |  | ||||||
|     } |  | ||||||
|     return false; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool ExprIsTrue(const Expr& first) { |  | ||||||
|     if (ExprIsBoolean(first)) { |  | ||||||
|         return ExprBooleanGet(first); |  | ||||||
|     } |  | ||||||
|     return false; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,156 +0,0 @@ | ||||||
| // Copyright 2019 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #pragma once |  | ||||||
| 
 |  | ||||||
| #include <memory> |  | ||||||
| #include <variant> |  | ||||||
| 
 |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using Tegra::Shader::ConditionCode; |  | ||||||
| using Tegra::Shader::Pred; |  | ||||||
| 
 |  | ||||||
| class ExprAnd; |  | ||||||
| class ExprBoolean; |  | ||||||
| class ExprCondCode; |  | ||||||
| class ExprGprEqual; |  | ||||||
| class ExprNot; |  | ||||||
| class ExprOr; |  | ||||||
| class ExprPredicate; |  | ||||||
| class ExprVar; |  | ||||||
| 
 |  | ||||||
| using ExprData = std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd, |  | ||||||
|                               ExprBoolean, ExprGprEqual>; |  | ||||||
| using Expr = std::shared_ptr<ExprData>; |  | ||||||
| 
 |  | ||||||
| class ExprAnd final { |  | ||||||
| public: |  | ||||||
|     explicit ExprAnd(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {} |  | ||||||
| 
 |  | ||||||
|     bool operator==(const ExprAnd& b) const; |  | ||||||
|     bool operator!=(const ExprAnd& b) const; |  | ||||||
| 
 |  | ||||||
|     Expr operand1; |  | ||||||
|     Expr operand2; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| class ExprOr final { |  | ||||||
| public: |  | ||||||
|     explicit ExprOr(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {} |  | ||||||
| 
 |  | ||||||
|     bool operator==(const ExprOr& b) const; |  | ||||||
|     bool operator!=(const ExprOr& b) const; |  | ||||||
| 
 |  | ||||||
|     Expr operand1; |  | ||||||
|     Expr operand2; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| class ExprNot final { |  | ||||||
| public: |  | ||||||
|     explicit ExprNot(Expr a) : operand1{std::move(a)} {} |  | ||||||
| 
 |  | ||||||
|     bool operator==(const ExprNot& b) const; |  | ||||||
|     bool operator!=(const ExprNot& b) const; |  | ||||||
| 
 |  | ||||||
|     Expr operand1; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| class ExprVar final { |  | ||||||
| public: |  | ||||||
|     explicit ExprVar(u32 index) : var_index{index} {} |  | ||||||
| 
 |  | ||||||
|     bool operator==(const ExprVar& b) const { |  | ||||||
|         return var_index == b.var_index; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool operator!=(const ExprVar& b) const { |  | ||||||
|         return !operator==(b); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     u32 var_index; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| class ExprPredicate final { |  | ||||||
| public: |  | ||||||
|     explicit ExprPredicate(u32 predicate_) : predicate{predicate_} {} |  | ||||||
| 
 |  | ||||||
|     bool operator==(const ExprPredicate& b) const { |  | ||||||
|         return predicate == b.predicate; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool operator!=(const ExprPredicate& b) const { |  | ||||||
|         return !operator==(b); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     u32 predicate; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| class ExprCondCode final { |  | ||||||
| public: |  | ||||||
|     explicit ExprCondCode(ConditionCode condition_code) : cc{condition_code} {} |  | ||||||
| 
 |  | ||||||
|     bool operator==(const ExprCondCode& b) const { |  | ||||||
|         return cc == b.cc; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool operator!=(const ExprCondCode& b) const { |  | ||||||
|         return !operator==(b); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     ConditionCode cc; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| class ExprBoolean final { |  | ||||||
| public: |  | ||||||
|     explicit ExprBoolean(bool val) : value{val} {} |  | ||||||
| 
 |  | ||||||
|     bool operator==(const ExprBoolean& b) const { |  | ||||||
|         return value == b.value; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool operator!=(const ExprBoolean& b) const { |  | ||||||
|         return !operator==(b); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool value; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| class ExprGprEqual final { |  | ||||||
| public: |  | ||||||
|     explicit ExprGprEqual(u32 gpr_, u32 value_) : gpr{gpr_}, value{value_} {} |  | ||||||
| 
 |  | ||||||
|     bool operator==(const ExprGprEqual& b) const { |  | ||||||
|         return gpr == b.gpr && value == b.value; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool operator!=(const ExprGprEqual& b) const { |  | ||||||
|         return !operator==(b); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     u32 gpr; |  | ||||||
|     u32 value; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| template <typename T, typename... Args> |  | ||||||
| Expr MakeExpr(Args&&... args) { |  | ||||||
|     static_assert(std::is_convertible_v<T, ExprData>); |  | ||||||
|     return std::make_shared<ExprData>(T(std::forward<Args>(args)...)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool ExprAreEqual(const Expr& first, const Expr& second); |  | ||||||
| 
 |  | ||||||
| bool ExprAreOpposite(const Expr& first, const Expr& second); |  | ||||||
| 
 |  | ||||||
| Expr MakeExprNot(Expr first); |  | ||||||
| 
 |  | ||||||
| Expr MakeExprAnd(Expr first, Expr second); |  | ||||||
| 
 |  | ||||||
| Expr MakeExprOr(Expr first, Expr second); |  | ||||||
| 
 |  | ||||||
| bool ExprIsTrue(const Expr& first); |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,76 +0,0 @@ | ||||||
| // Copyright 2020 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include <algorithm> |  | ||||||
| #include <cstddef> |  | ||||||
| 
 |  | ||||||
| #include <boost/container_hash/hash.hpp> |  | ||||||
| 
 |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "core/core.h" |  | ||||||
| #include "video_core/engines/maxwell_3d.h" |  | ||||||
| #include "video_core/memory_manager.h" |  | ||||||
| #include "video_core/shader/memory_util.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d, |  | ||||||
|                           Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) { |  | ||||||
|     const auto& shader_config{maxwell3d.regs.shader_config[static_cast<std::size_t>(program)]}; |  | ||||||
|     return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { |  | ||||||
|     // Sched instructions appear once every 4 instructions.
 |  | ||||||
|     constexpr std::size_t SchedPeriod = 4; |  | ||||||
|     const std::size_t absolute_offset = offset - main_offset; |  | ||||||
|     return (absolute_offset % SchedPeriod) == 0; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) { |  | ||||||
|     // This is the encoded version of BRA that jumps to itself. All Nvidia
 |  | ||||||
|     // shaders end with one.
 |  | ||||||
|     static constexpr u64 SELF_JUMPING_BRANCH = 0xE2400FFFFF07000FULL; |  | ||||||
|     static constexpr u64 MASK = 0xFFFFFFFFFF7FFFFFULL; |  | ||||||
| 
 |  | ||||||
|     const std::size_t start_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; |  | ||||||
|     std::size_t offset = start_offset; |  | ||||||
|     while (offset < program.size()) { |  | ||||||
|         const u64 instruction = program[offset]; |  | ||||||
|         if (!IsSchedInstruction(offset, start_offset)) { |  | ||||||
|             if ((instruction & MASK) == SELF_JUMPING_BRANCH) { |  | ||||||
|                 // End on Maxwell's "nop" instruction
 |  | ||||||
|                 break; |  | ||||||
|             } |  | ||||||
|             if (instruction == 0) { |  | ||||||
|                 break; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|         ++offset; |  | ||||||
|     } |  | ||||||
|     // The last instruction is included in the program size
 |  | ||||||
|     return std::min(offset + 1, program.size()); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr, |  | ||||||
|                           const u8* host_ptr, bool is_compute) { |  | ||||||
|     ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); |  | ||||||
|     ASSERT_OR_EXECUTE(host_ptr != nullptr, { return code; }); |  | ||||||
|     memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64)); |  | ||||||
|     code.resize(CalculateProgramSize(code, is_compute)); |  | ||||||
|     return code; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code, |  | ||||||
|                         const ProgramCode& code_b) { |  | ||||||
|     size_t unique_identifier = boost::hash_value(code); |  | ||||||
|     if (is_a) { |  | ||||||
|         // VertexA programs include two programs
 |  | ||||||
|         boost::hash_combine(unique_identifier, boost::hash_value(code_b)); |  | ||||||
|     } |  | ||||||
|     return static_cast<u64>(unique_identifier); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,43 +0,0 @@ | ||||||
| // Copyright 2020 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #pragma once |  | ||||||
| 
 |  | ||||||
| #include <cstddef> |  | ||||||
| #include <vector> |  | ||||||
| 
 |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/engines/maxwell_3d.h" |  | ||||||
| #include "video_core/engines/shader_type.h" |  | ||||||
| 
 |  | ||||||
| namespace Tegra { |  | ||||||
| class MemoryManager; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using ProgramCode = std::vector<u64>; |  | ||||||
| 
 |  | ||||||
| constexpr u32 STAGE_MAIN_OFFSET = 10; |  | ||||||
| constexpr u32 KERNEL_MAIN_OFFSET = 0; |  | ||||||
| 
 |  | ||||||
| /// Gets the address for the specified shader stage program
 |  | ||||||
| GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d, |  | ||||||
|                           Tegra::Engines::Maxwell3D::Regs::ShaderProgram program); |  | ||||||
| 
 |  | ||||||
| /// Gets if the current instruction offset is a scheduler instruction
 |  | ||||||
| bool IsSchedInstruction(std::size_t offset, std::size_t main_offset); |  | ||||||
| 
 |  | ||||||
| /// Calculates the size of a program stream
 |  | ||||||
| std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute); |  | ||||||
| 
 |  | ||||||
| /// Gets the shader program code from memory for the specified address
 |  | ||||||
| ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr, |  | ||||||
|                           const u8* host_ptr, bool is_compute); |  | ||||||
| 
 |  | ||||||
| /// Hashes one (or two) program streams
 |  | ||||||
| u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code, |  | ||||||
|                         const ProgramCode& code_b = {}); |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,701 +0,0 @@ | ||||||
| // Copyright 2019 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #pragma once |  | ||||||
| 
 |  | ||||||
| #include <array> |  | ||||||
| #include <cstddef> |  | ||||||
| #include <memory> |  | ||||||
| #include <optional> |  | ||||||
| #include <string> |  | ||||||
| #include <tuple> |  | ||||||
| #include <utility> |  | ||||||
| #include <variant> |  | ||||||
| #include <vector> |  | ||||||
| 
 |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| enum class OperationCode { |  | ||||||
|     Assign, /// (float& dest, float src) -> void
 |  | ||||||
| 
 |  | ||||||
|     Select, /// (MetaArithmetic, bool pred, float a, float b) -> float
 |  | ||||||
| 
 |  | ||||||
|     FAdd,          /// (MetaArithmetic, float a, float b) -> float
 |  | ||||||
|     FMul,          /// (MetaArithmetic, float a, float b) -> float
 |  | ||||||
|     FDiv,          /// (MetaArithmetic, float a, float b) -> float
 |  | ||||||
|     FFma,          /// (MetaArithmetic, float a, float b, float c) -> float
 |  | ||||||
|     FNegate,       /// (MetaArithmetic, float a) -> float
 |  | ||||||
|     FAbsolute,     /// (MetaArithmetic, float a) -> float
 |  | ||||||
|     FClamp,        /// (MetaArithmetic, float value, float min, float max) -> float
 |  | ||||||
|     FCastHalf0,    /// (MetaArithmetic, f16vec2 a) -> float
 |  | ||||||
|     FCastHalf1,    /// (MetaArithmetic, f16vec2 a) -> float
 |  | ||||||
|     FMin,          /// (MetaArithmetic, float a, float b) -> float
 |  | ||||||
|     FMax,          /// (MetaArithmetic, float a, float b) -> float
 |  | ||||||
|     FCos,          /// (MetaArithmetic, float a) -> float
 |  | ||||||
|     FSin,          /// (MetaArithmetic, float a) -> float
 |  | ||||||
|     FExp2,         /// (MetaArithmetic, float a) -> float
 |  | ||||||
|     FLog2,         /// (MetaArithmetic, float a) -> float
 |  | ||||||
|     FInverseSqrt,  /// (MetaArithmetic, float a) -> float
 |  | ||||||
|     FSqrt,         /// (MetaArithmetic, float a) -> float
 |  | ||||||
|     FRoundEven,    /// (MetaArithmetic, float a) -> float
 |  | ||||||
|     FFloor,        /// (MetaArithmetic, float a) -> float
 |  | ||||||
|     FCeil,         /// (MetaArithmetic, float a) -> float
 |  | ||||||
|     FTrunc,        /// (MetaArithmetic, float a) -> float
 |  | ||||||
|     FCastInteger,  /// (MetaArithmetic, int a) -> float
 |  | ||||||
|     FCastUInteger, /// (MetaArithmetic, uint a) -> float
 |  | ||||||
|     FSwizzleAdd,   /// (float a, float b, uint mask) -> float
 |  | ||||||
| 
 |  | ||||||
|     IAdd,                  /// (MetaArithmetic, int a, int b) -> int
 |  | ||||||
|     IMul,                  /// (MetaArithmetic, int a, int b) -> int
 |  | ||||||
|     IDiv,                  /// (MetaArithmetic, int a, int b) -> int
 |  | ||||||
|     INegate,               /// (MetaArithmetic, int a) -> int
 |  | ||||||
|     IAbsolute,             /// (MetaArithmetic, int a) -> int
 |  | ||||||
|     IMin,                  /// (MetaArithmetic, int a, int b) -> int
 |  | ||||||
|     IMax,                  /// (MetaArithmetic, int a, int b) -> int
 |  | ||||||
|     ICastFloat,            /// (MetaArithmetic, float a) -> int
 |  | ||||||
|     ICastUnsigned,         /// (MetaArithmetic, uint a) -> int
 |  | ||||||
|     ILogicalShiftLeft,     /// (MetaArithmetic, int a, uint b) -> int
 |  | ||||||
|     ILogicalShiftRight,    /// (MetaArithmetic, int a, uint b) -> int
 |  | ||||||
|     IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int
 |  | ||||||
|     IBitwiseAnd,           /// (MetaArithmetic, int a, int b) -> int
 |  | ||||||
|     IBitwiseOr,            /// (MetaArithmetic, int a, int b) -> int
 |  | ||||||
|     IBitwiseXor,           /// (MetaArithmetic, int a, int b) -> int
 |  | ||||||
|     IBitwiseNot,           /// (MetaArithmetic, int a) -> int
 |  | ||||||
|     IBitfieldInsert,       /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int
 |  | ||||||
|     IBitfieldExtract,      /// (MetaArithmetic, int value, int offset, int offset) -> int
 |  | ||||||
|     IBitCount,             /// (MetaArithmetic, int) -> int
 |  | ||||||
|     IBitMSB,               /// (MetaArithmetic, int) -> int
 |  | ||||||
| 
 |  | ||||||
|     UAdd,                  /// (MetaArithmetic, uint a, uint b) -> uint
 |  | ||||||
|     UMul,                  /// (MetaArithmetic, uint a, uint b) -> uint
 |  | ||||||
|     UDiv,                  /// (MetaArithmetic, uint a, uint b) -> uint
 |  | ||||||
|     UMin,                  /// (MetaArithmetic, uint a, uint b) -> uint
 |  | ||||||
|     UMax,                  /// (MetaArithmetic, uint a, uint b) -> uint
 |  | ||||||
|     UCastFloat,            /// (MetaArithmetic, float a) -> uint
 |  | ||||||
|     UCastSigned,           /// (MetaArithmetic, int a) -> uint
 |  | ||||||
|     ULogicalShiftLeft,     /// (MetaArithmetic, uint a, uint b) -> uint
 |  | ||||||
|     ULogicalShiftRight,    /// (MetaArithmetic, uint a, uint b) -> uint
 |  | ||||||
|     UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint
 |  | ||||||
|     UBitwiseAnd,           /// (MetaArithmetic, uint a, uint b) -> uint
 |  | ||||||
|     UBitwiseOr,            /// (MetaArithmetic, uint a, uint b) -> uint
 |  | ||||||
|     UBitwiseXor,           /// (MetaArithmetic, uint a, uint b) -> uint
 |  | ||||||
|     UBitwiseNot,           /// (MetaArithmetic, uint a) -> uint
 |  | ||||||
|     UBitfieldInsert,  /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint
 |  | ||||||
|     UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
 |  | ||||||
|     UBitCount,        /// (MetaArithmetic, uint) -> uint
 |  | ||||||
|     UBitMSB,          /// (MetaArithmetic, uint) -> uint
 |  | ||||||
| 
 |  | ||||||
|     HAdd,       /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
 |  | ||||||
|     HMul,       /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
 |  | ||||||
|     HFma,       /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
 |  | ||||||
|     HAbsolute,  /// (f16vec2 a) -> f16vec2
 |  | ||||||
|     HNegate,    /// (f16vec2 a, bool first, bool second) -> f16vec2
 |  | ||||||
|     HClamp,     /// (f16vec2 src, float min, float max) -> f16vec2
 |  | ||||||
|     HCastFloat, /// (MetaArithmetic, float a) -> f16vec2
 |  | ||||||
|     HUnpack,    /// (Tegra::Shader::HalfType, T value) -> f16vec2
 |  | ||||||
|     HMergeF32,  /// (f16vec2 src) -> float
 |  | ||||||
|     HMergeH0,   /// (f16vec2 dest, f16vec2 src) -> f16vec2
 |  | ||||||
|     HMergeH1,   /// (f16vec2 dest, f16vec2 src) -> f16vec2
 |  | ||||||
|     HPack2,     /// (float a, float b) -> f16vec2
 |  | ||||||
| 
 |  | ||||||
|     LogicalAssign, /// (bool& dst, bool src) -> void
 |  | ||||||
|     LogicalAnd,    /// (bool a, bool b) -> bool
 |  | ||||||
|     LogicalOr,     /// (bool a, bool b) -> bool
 |  | ||||||
|     LogicalXor,    /// (bool a, bool b) -> bool
 |  | ||||||
|     LogicalNegate, /// (bool a) -> bool
 |  | ||||||
|     LogicalPick2,  /// (bool2 pair, uint index) -> bool
 |  | ||||||
|     LogicalAnd2,   /// (bool2 a) -> bool
 |  | ||||||
| 
 |  | ||||||
|     LogicalFOrdLessThan,       /// (float a, float b) -> bool
 |  | ||||||
|     LogicalFOrdEqual,          /// (float a, float b) -> bool
 |  | ||||||
|     LogicalFOrdLessEqual,      /// (float a, float b) -> bool
 |  | ||||||
|     LogicalFOrdGreaterThan,    /// (float a, float b) -> bool
 |  | ||||||
|     LogicalFOrdNotEqual,       /// (float a, float b) -> bool
 |  | ||||||
|     LogicalFOrdGreaterEqual,   /// (float a, float b) -> bool
 |  | ||||||
|     LogicalFOrdered,           /// (float a, float b) -> bool
 |  | ||||||
|     LogicalFUnordered,         /// (float a, float b) -> bool
 |  | ||||||
|     LogicalFUnordLessThan,     /// (float a, float b) -> bool
 |  | ||||||
|     LogicalFUnordEqual,        /// (float a, float b) -> bool
 |  | ||||||
|     LogicalFUnordLessEqual,    /// (float a, float b) -> bool
 |  | ||||||
|     LogicalFUnordGreaterThan,  /// (float a, float b) -> bool
 |  | ||||||
|     LogicalFUnordNotEqual,     /// (float a, float b) -> bool
 |  | ||||||
|     LogicalFUnordGreaterEqual, /// (float a, float b) -> bool
 |  | ||||||
| 
 |  | ||||||
|     LogicalILessThan,     /// (int a, int b) -> bool
 |  | ||||||
|     LogicalIEqual,        /// (int a, int b) -> bool
 |  | ||||||
|     LogicalILessEqual,    /// (int a, int b) -> bool
 |  | ||||||
|     LogicalIGreaterThan,  /// (int a, int b) -> bool
 |  | ||||||
|     LogicalINotEqual,     /// (int a, int b) -> bool
 |  | ||||||
|     LogicalIGreaterEqual, /// (int a, int b) -> bool
 |  | ||||||
| 
 |  | ||||||
|     LogicalULessThan,     /// (uint a, uint b) -> bool
 |  | ||||||
|     LogicalUEqual,        /// (uint a, uint b) -> bool
 |  | ||||||
|     LogicalULessEqual,    /// (uint a, uint b) -> bool
 |  | ||||||
|     LogicalUGreaterThan,  /// (uint a, uint b) -> bool
 |  | ||||||
|     LogicalUNotEqual,     /// (uint a, uint b) -> bool
 |  | ||||||
|     LogicalUGreaterEqual, /// (uint a, uint b) -> bool
 |  | ||||||
| 
 |  | ||||||
|     LogicalAddCarry, /// (uint a, uint b) -> bool
 |  | ||||||
| 
 |  | ||||||
|     Logical2HLessThan,            /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
 |  | ||||||
|     Logical2HEqual,               /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
 |  | ||||||
|     Logical2HLessEqual,           /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
 |  | ||||||
|     Logical2HGreaterThan,         /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
 |  | ||||||
|     Logical2HNotEqual,            /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
 |  | ||||||
|     Logical2HGreaterEqual,        /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
 |  | ||||||
|     Logical2HLessThanWithNan,     /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
 |  | ||||||
|     Logical2HEqualWithNan,        /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
 |  | ||||||
|     Logical2HLessEqualWithNan,    /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
 |  | ||||||
|     Logical2HGreaterThanWithNan,  /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
 |  | ||||||
|     Logical2HNotEqualWithNan,     /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
 |  | ||||||
|     Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
 |  | ||||||
| 
 |  | ||||||
|     Texture,                /// (MetaTexture, float[N] coords) -> float4
 |  | ||||||
|     TextureLod,             /// (MetaTexture, float[N] coords) -> float4
 |  | ||||||
|     TextureGather,          /// (MetaTexture, float[N] coords) -> float4
 |  | ||||||
|     TextureQueryDimensions, /// (MetaTexture, float a) -> float4
 |  | ||||||
|     TextureQueryLod,        /// (MetaTexture, float[N] coords) -> float4
 |  | ||||||
|     TexelFetch,             /// (MetaTexture, int[N], int) -> float4
 |  | ||||||
|     TextureGradient,        /// (MetaTexture, float[N] coords, float[N*2] derivates) -> float4
 |  | ||||||
| 
 |  | ||||||
|     ImageLoad,  /// (MetaImage, int[N] coords) -> void
 |  | ||||||
|     ImageStore, /// (MetaImage, int[N] coords) -> void
 |  | ||||||
| 
 |  | ||||||
|     AtomicImageAdd,      /// (MetaImage, int[N] coords) -> void
 |  | ||||||
|     AtomicImageAnd,      /// (MetaImage, int[N] coords) -> void
 |  | ||||||
|     AtomicImageOr,       /// (MetaImage, int[N] coords) -> void
 |  | ||||||
|     AtomicImageXor,      /// (MetaImage, int[N] coords) -> void
 |  | ||||||
|     AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
 |  | ||||||
| 
 |  | ||||||
|     AtomicUExchange, /// (memory, uint) -> uint
 |  | ||||||
|     AtomicUAdd,      /// (memory, uint) -> uint
 |  | ||||||
|     AtomicUMin,      /// (memory, uint) -> uint
 |  | ||||||
|     AtomicUMax,      /// (memory, uint) -> uint
 |  | ||||||
|     AtomicUAnd,      /// (memory, uint) -> uint
 |  | ||||||
|     AtomicUOr,       /// (memory, uint) -> uint
 |  | ||||||
|     AtomicUXor,      /// (memory, uint) -> uint
 |  | ||||||
| 
 |  | ||||||
|     AtomicIExchange, /// (memory, int) -> int
 |  | ||||||
|     AtomicIAdd,      /// (memory, int) -> int
 |  | ||||||
|     AtomicIMin,      /// (memory, int) -> int
 |  | ||||||
|     AtomicIMax,      /// (memory, int) -> int
 |  | ||||||
|     AtomicIAnd,      /// (memory, int) -> int
 |  | ||||||
|     AtomicIOr,       /// (memory, int) -> int
 |  | ||||||
|     AtomicIXor,      /// (memory, int) -> int
 |  | ||||||
| 
 |  | ||||||
|     ReduceUAdd, /// (memory, uint) -> void
 |  | ||||||
|     ReduceUMin, /// (memory, uint) -> void
 |  | ||||||
|     ReduceUMax, /// (memory, uint) -> void
 |  | ||||||
|     ReduceUAnd, /// (memory, uint) -> void
 |  | ||||||
|     ReduceUOr,  /// (memory, uint) -> void
 |  | ||||||
|     ReduceUXor, /// (memory, uint) -> void
 |  | ||||||
| 
 |  | ||||||
|     ReduceIAdd, /// (memory, int) -> void
 |  | ||||||
|     ReduceIMin, /// (memory, int) -> void
 |  | ||||||
|     ReduceIMax, /// (memory, int) -> void
 |  | ||||||
|     ReduceIAnd, /// (memory, int) -> void
 |  | ||||||
|     ReduceIOr,  /// (memory, int) -> void
 |  | ||||||
|     ReduceIXor, /// (memory, int) -> void
 |  | ||||||
| 
 |  | ||||||
|     Branch,         /// (uint branch_target) -> void
 |  | ||||||
|     BranchIndirect, /// (uint branch_target) -> void
 |  | ||||||
|     PushFlowStack,  /// (uint branch_target) -> void
 |  | ||||||
|     PopFlowStack,   /// () -> void
 |  | ||||||
|     Exit,           /// () -> void
 |  | ||||||
|     Discard,        /// () -> void
 |  | ||||||
| 
 |  | ||||||
|     EmitVertex,   /// () -> void
 |  | ||||||
|     EndPrimitive, /// () -> void
 |  | ||||||
| 
 |  | ||||||
|     InvocationId,       /// () -> int
 |  | ||||||
|     YNegate,            /// () -> float
 |  | ||||||
|     LocalInvocationIdX, /// () -> uint
 |  | ||||||
|     LocalInvocationIdY, /// () -> uint
 |  | ||||||
|     LocalInvocationIdZ, /// () -> uint
 |  | ||||||
|     WorkGroupIdX,       /// () -> uint
 |  | ||||||
|     WorkGroupIdY,       /// () -> uint
 |  | ||||||
|     WorkGroupIdZ,       /// () -> uint
 |  | ||||||
| 
 |  | ||||||
|     BallotThread, /// (bool) -> uint
 |  | ||||||
|     VoteAll,      /// (bool) -> bool
 |  | ||||||
|     VoteAny,      /// (bool) -> bool
 |  | ||||||
|     VoteEqual,    /// (bool) -> bool
 |  | ||||||
| 
 |  | ||||||
|     ThreadId,       /// () -> uint
 |  | ||||||
|     ThreadEqMask,   /// () -> uint
 |  | ||||||
|     ThreadGeMask,   /// () -> uint
 |  | ||||||
|     ThreadGtMask,   /// () -> uint
 |  | ||||||
|     ThreadLeMask,   /// () -> uint
 |  | ||||||
|     ThreadLtMask,   /// () -> uint
 |  | ||||||
|     ShuffleIndexed, /// (uint value, uint index) -> uint
 |  | ||||||
| 
 |  | ||||||
|     Barrier,             /// () -> void
 |  | ||||||
|     MemoryBarrierGroup,  /// () -> void
 |  | ||||||
|     MemoryBarrierGlobal, /// () -> void
 |  | ||||||
| 
 |  | ||||||
|     Amount, |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| enum class InternalFlag { |  | ||||||
|     Zero = 0, |  | ||||||
|     Sign = 1, |  | ||||||
|     Carry = 2, |  | ||||||
|     Overflow = 3, |  | ||||||
|     Amount = 4, |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| enum class MetaStackClass { |  | ||||||
|     Ssy, |  | ||||||
|     Pbk, |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| class OperationNode; |  | ||||||
| class ConditionalNode; |  | ||||||
| class GprNode; |  | ||||||
| class CustomVarNode; |  | ||||||
| class ImmediateNode; |  | ||||||
| class InternalFlagNode; |  | ||||||
| class PredicateNode; |  | ||||||
| class AbufNode; |  | ||||||
| class CbufNode; |  | ||||||
| class LmemNode; |  | ||||||
| class PatchNode; |  | ||||||
| class SmemNode; |  | ||||||
| class GmemNode; |  | ||||||
| class CommentNode; |  | ||||||
| 
 |  | ||||||
| using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode, |  | ||||||
|                               InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode, |  | ||||||
|                               LmemNode, SmemNode, GmemNode, CommentNode>; |  | ||||||
| using Node = std::shared_ptr<NodeData>; |  | ||||||
| using Node4 = std::array<Node, 4>; |  | ||||||
| using NodeBlock = std::vector<Node>; |  | ||||||
| 
 |  | ||||||
| struct ArraySamplerNode; |  | ||||||
| struct BindlessSamplerNode; |  | ||||||
| struct SeparateSamplerNode; |  | ||||||
| 
 |  | ||||||
| using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>; |  | ||||||
| using TrackSampler = std::shared_ptr<TrackSamplerData>; |  | ||||||
| 
 |  | ||||||
| struct SamplerEntry { |  | ||||||
|     /// Bound samplers constructor
 |  | ||||||
|     explicit SamplerEntry(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, bool is_array_, |  | ||||||
|                           bool is_shadow_, bool is_buffer_, bool is_indexed_) |  | ||||||
|         : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_}, |  | ||||||
|           is_buffer{is_buffer_}, is_indexed{is_indexed_} {} |  | ||||||
| 
 |  | ||||||
|     /// Separate sampler constructor
 |  | ||||||
|     explicit SamplerEntry(u32 index_, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers, |  | ||||||
|                           Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_, |  | ||||||
|                           bool is_buffer_) |  | ||||||
|         : index{index_}, offset{offsets.first}, secondary_offset{offsets.second}, |  | ||||||
|           buffer{buffers.first}, secondary_buffer{buffers.second}, type{type_}, is_array{is_array_}, |  | ||||||
|           is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {} |  | ||||||
| 
 |  | ||||||
|     /// Bindless samplers constructor
 |  | ||||||
|     explicit SamplerEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::TextureType type_, |  | ||||||
|                           bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_) |  | ||||||
|         : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_}, |  | ||||||
|           is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} { |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     u32 index = 0;            ///< Emulated index given for the this sampler.
 |  | ||||||
|     u32 offset = 0;           ///< Offset in the const buffer from where the sampler is being read.
 |  | ||||||
|     u32 secondary_offset = 0; ///< Secondary offset in the const buffer.
 |  | ||||||
|     u32 buffer = 0;           ///< Buffer where the bindless sampler is read.
 |  | ||||||
|     u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read.
 |  | ||||||
|     u32 size = 1;             ///< Size of the sampler.
 |  | ||||||
| 
 |  | ||||||
|     Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
 |  | ||||||
|     bool is_array = false;     ///< Whether the texture is being sampled as an array texture or not.
 |  | ||||||
|     bool is_shadow = false;    ///< Whether the texture is being sampled as a depth texture or not.
 |  | ||||||
|     bool is_buffer = false;    ///< Whether the texture is a texture buffer without sampler.
 |  | ||||||
|     bool is_bindless = false;  ///< Whether this sampler belongs to a bindless texture or not.
 |  | ||||||
|     bool is_indexed = false;   ///< Whether this sampler is an indexed array of textures.
 |  | ||||||
|     bool is_separated = false; ///< Whether the image and sampler is separated or not.
 |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /// Represents a tracked bindless sampler into a direct const buffer
 |  | ||||||
| struct ArraySamplerNode { |  | ||||||
|     u32 index; |  | ||||||
|     u32 base_offset; |  | ||||||
|     u32 bindless_var; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /// Represents a tracked separate sampler image pair that was folded statically
 |  | ||||||
| struct SeparateSamplerNode { |  | ||||||
|     std::pair<u32, u32> indices; |  | ||||||
|     std::pair<u32, u32> offsets; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /// Represents a tracked bindless sampler into a direct const buffer
 |  | ||||||
| struct BindlessSamplerNode { |  | ||||||
|     u32 index; |  | ||||||
|     u32 offset; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| struct ImageEntry { |  | ||||||
| public: |  | ||||||
|     /// Bound images constructor
 |  | ||||||
|     explicit ImageEntry(u32 index_, u32 offset_, Tegra::Shader::ImageType type_) |  | ||||||
|         : index{index_}, offset{offset_}, type{type_} {} |  | ||||||
| 
 |  | ||||||
|     /// Bindless samplers constructor
 |  | ||||||
|     explicit ImageEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_) |  | ||||||
|         : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {} |  | ||||||
| 
 |  | ||||||
|     void MarkWrite() { |  | ||||||
|         is_written = true; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void MarkRead() { |  | ||||||
|         is_read = true; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void MarkAtomic() { |  | ||||||
|         MarkWrite(); |  | ||||||
|         MarkRead(); |  | ||||||
|         is_atomic = true; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     u32 index = 0; |  | ||||||
|     u32 offset = 0; |  | ||||||
|     u32 buffer = 0; |  | ||||||
| 
 |  | ||||||
|     Tegra::Shader::ImageType type{}; |  | ||||||
|     bool is_bindless = false; |  | ||||||
|     bool is_written = false; |  | ||||||
|     bool is_read = false; |  | ||||||
|     bool is_atomic = false; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| struct GlobalMemoryBase { |  | ||||||
|     u32 cbuf_index = 0; |  | ||||||
|     u32 cbuf_offset = 0; |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] bool operator<(const GlobalMemoryBase& rhs) const { |  | ||||||
|         return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset); |  | ||||||
|     } |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /// Parameters describing an arithmetic operation
 |  | ||||||
| struct MetaArithmetic { |  | ||||||
|     bool precise{}; ///< Whether the operation can be constraint or not
 |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /// Parameters describing a texture sampler
 |  | ||||||
| struct MetaTexture { |  | ||||||
|     SamplerEntry sampler; |  | ||||||
|     Node array; |  | ||||||
|     Node depth_compare; |  | ||||||
|     std::vector<Node> aoffi; |  | ||||||
|     std::vector<Node> ptp; |  | ||||||
|     std::vector<Node> derivates; |  | ||||||
|     Node bias; |  | ||||||
|     Node lod; |  | ||||||
|     Node component; |  | ||||||
|     u32 element{}; |  | ||||||
|     Node index; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| struct MetaImage { |  | ||||||
|     const ImageEntry& image; |  | ||||||
|     std::vector<Node> values; |  | ||||||
|     u32 element{}; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /// Parameters that modify an operation but are not part of any particular operand
 |  | ||||||
| using Meta = |  | ||||||
|     std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>; |  | ||||||
| 
 |  | ||||||
| class AmendNode { |  | ||||||
| public: |  | ||||||
|     [[nodiscard]] std::optional<std::size_t> GetAmendIndex() const { |  | ||||||
|         if (amend_index == amend_null_index) { |  | ||||||
|             return std::nullopt; |  | ||||||
|         } |  | ||||||
|         return {amend_index}; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void SetAmendIndex(std::size_t index) { |  | ||||||
|         amend_index = index; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void ClearAmend() { |  | ||||||
|         amend_index = amend_null_index; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL; |  | ||||||
|     std::size_t amend_index{amend_null_index}; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /// Holds any kind of operation that can be done in the IR
 |  | ||||||
| class OperationNode final : public AmendNode { |  | ||||||
| public: |  | ||||||
|     explicit OperationNode(OperationCode code_) : OperationNode(code_, Meta{}) {} |  | ||||||
| 
 |  | ||||||
|     explicit OperationNode(OperationCode code_, Meta meta_) |  | ||||||
|         : OperationNode(code_, std::move(meta_), std::vector<Node>{}) {} |  | ||||||
| 
 |  | ||||||
|     explicit OperationNode(OperationCode code_, std::vector<Node> operands_) |  | ||||||
|         : OperationNode(code_, Meta{}, std::move(operands_)) {} |  | ||||||
| 
 |  | ||||||
|     explicit OperationNode(OperationCode code_, Meta meta_, std::vector<Node> operands_) |  | ||||||
|         : code{code_}, meta{std::move(meta_)}, operands{std::move(operands_)} {} |  | ||||||
| 
 |  | ||||||
|     template <typename... Args> |  | ||||||
|     explicit OperationNode(OperationCode code_, Meta meta_, Args&&... operands_) |  | ||||||
|         : code{code_}, meta{std::move(meta_)}, operands{operands_...} {} |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] OperationCode GetCode() const { |  | ||||||
|         return code; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] const Meta& GetMeta() const { |  | ||||||
|         return meta; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] std::size_t GetOperandsCount() const { |  | ||||||
|         return operands.size(); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] const Node& operator[](std::size_t operand_index) const { |  | ||||||
|         return operands.at(operand_index); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     OperationCode code{}; |  | ||||||
|     Meta meta{}; |  | ||||||
|     std::vector<Node> operands; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /// Encloses inside any kind of node that returns a boolean conditionally-executed code
 |  | ||||||
| class ConditionalNode final : public AmendNode { |  | ||||||
| public: |  | ||||||
|     explicit ConditionalNode(Node condition_, std::vector<Node>&& code_) |  | ||||||
|         : condition{std::move(condition_)}, code{std::move(code_)} {} |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] const Node& GetCondition() const { |  | ||||||
|         return condition; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] const std::vector<Node>& GetCode() const { |  | ||||||
|         return code; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     Node condition;         ///< Condition to be satisfied
 |  | ||||||
|     std::vector<Node> code; ///< Code to execute
 |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /// A general purpose register
 |  | ||||||
| class GprNode final { |  | ||||||
| public: |  | ||||||
|     explicit constexpr GprNode(Tegra::Shader::Register index_) : index{index_} {} |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] constexpr u32 GetIndex() const { |  | ||||||
|         return static_cast<u32>(index); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     Tegra::Shader::Register index{}; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /// A custom variable
 |  | ||||||
| class CustomVarNode final { |  | ||||||
| public: |  | ||||||
|     explicit constexpr CustomVarNode(u32 index_) : index{index_} {} |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] constexpr u32 GetIndex() const { |  | ||||||
|         return index; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     u32 index{}; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /// A 32-bits value that represents an immediate value
 |  | ||||||
| class ImmediateNode final { |  | ||||||
| public: |  | ||||||
|     explicit constexpr ImmediateNode(u32 value_) : value{value_} {} |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] constexpr u32 GetValue() const { |  | ||||||
|         return value; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     u32 value{}; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /// One of Maxwell's internal flags
 |  | ||||||
| class InternalFlagNode final { |  | ||||||
| public: |  | ||||||
|     explicit constexpr InternalFlagNode(InternalFlag flag_) : flag{flag_} {} |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] constexpr InternalFlag GetFlag() const { |  | ||||||
|         return flag; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     InternalFlag flag{}; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /// A predicate register, it can be negated without additional nodes
 |  | ||||||
| class PredicateNode final { |  | ||||||
| public: |  | ||||||
|     explicit constexpr PredicateNode(Tegra::Shader::Pred index_, bool negated_) |  | ||||||
|         : index{index_}, negated{negated_} {} |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] constexpr Tegra::Shader::Pred GetIndex() const { |  | ||||||
|         return index; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] constexpr bool IsNegated() const { |  | ||||||
|         return negated; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     Tegra::Shader::Pred index{}; |  | ||||||
|     bool negated{}; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /// Attribute buffer memory (known as attributes or varyings in GLSL terms)
 |  | ||||||
| class AbufNode final { |  | ||||||
| public: |  | ||||||
|     // Initialize for standard attributes (index is explicit).
 |  | ||||||
|     explicit AbufNode(Tegra::Shader::Attribute::Index index_, u32 element_, Node buffer_ = {}) |  | ||||||
|         : buffer{std::move(buffer_)}, index{index_}, element{element_} {} |  | ||||||
| 
 |  | ||||||
|     // Initialize for physical attributes (index is a variable value).
 |  | ||||||
|     explicit AbufNode(Node physical_address_, Node buffer_ = {}) |  | ||||||
|         : physical_address{std::move(physical_address_)}, buffer{std::move(buffer_)} {} |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] Tegra::Shader::Attribute::Index GetIndex() const { |  | ||||||
|         return index; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] u32 GetElement() const { |  | ||||||
|         return element; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] const Node& GetBuffer() const { |  | ||||||
|         return buffer; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] bool IsPhysicalBuffer() const { |  | ||||||
|         return static_cast<bool>(physical_address); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] const Node& GetPhysicalAddress() const { |  | ||||||
|         return physical_address; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     Node physical_address; |  | ||||||
|     Node buffer; |  | ||||||
|     Tegra::Shader::Attribute::Index index{}; |  | ||||||
|     u32 element{}; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /// Patch memory (used to communicate tessellation stages).
 |  | ||||||
| class PatchNode final { |  | ||||||
| public: |  | ||||||
|     explicit constexpr PatchNode(u32 offset_) : offset{offset_} {} |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] constexpr u32 GetOffset() const { |  | ||||||
|         return offset; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     u32 offset{}; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /// Constant buffer node, usually mapped to uniform buffers in GLSL
 |  | ||||||
| class CbufNode final { |  | ||||||
| public: |  | ||||||
|     explicit CbufNode(u32 index_, Node offset_) : index{index_}, offset{std::move(offset_)} {} |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] u32 GetIndex() const { |  | ||||||
|         return index; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] const Node& GetOffset() const { |  | ||||||
|         return offset; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     u32 index{}; |  | ||||||
|     Node offset; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /// Local memory node
 |  | ||||||
| class LmemNode final { |  | ||||||
| public: |  | ||||||
|     explicit LmemNode(Node address_) : address{std::move(address_)} {} |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] const Node& GetAddress() const { |  | ||||||
|         return address; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     Node address; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /// Shared memory node
 |  | ||||||
| class SmemNode final { |  | ||||||
| public: |  | ||||||
|     explicit SmemNode(Node address_) : address{std::move(address_)} {} |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] const Node& GetAddress() const { |  | ||||||
|         return address; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     Node address; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /// Global memory node
 |  | ||||||
| class GmemNode final { |  | ||||||
| public: |  | ||||||
|     explicit GmemNode(Node real_address_, Node base_address_, const GlobalMemoryBase& descriptor_) |  | ||||||
|         : real_address{std::move(real_address_)}, base_address{std::move(base_address_)}, |  | ||||||
|           descriptor{descriptor_} {} |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] const Node& GetRealAddress() const { |  | ||||||
|         return real_address; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] const Node& GetBaseAddress() const { |  | ||||||
|         return base_address; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] const GlobalMemoryBase& GetDescriptor() const { |  | ||||||
|         return descriptor; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     Node real_address; |  | ||||||
|     Node base_address; |  | ||||||
|     GlobalMemoryBase descriptor; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /// Commentary, can be dropped
 |  | ||||||
| class CommentNode final { |  | ||||||
| public: |  | ||||||
|     explicit CommentNode(std::string text_) : text{std::move(text_)} {} |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] const std::string& GetText() const { |  | ||||||
|         return text; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     std::string text; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,115 +0,0 @@ | ||||||
| // Copyright 2019 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include <cstring> |  | ||||||
| #include <vector> |  | ||||||
| 
 |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| Node Conditional(Node condition, std::vector<Node> code) { |  | ||||||
|     return MakeNode<ConditionalNode>(std::move(condition), std::move(code)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node Comment(std::string text) { |  | ||||||
|     return MakeNode<CommentNode>(std::move(text)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node Immediate(u32 value) { |  | ||||||
|     return MakeNode<ImmediateNode>(value); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node Immediate(s32 value) { |  | ||||||
|     return Immediate(static_cast<u32>(value)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node Immediate(f32 value) { |  | ||||||
|     u32 integral; |  | ||||||
|     std::memcpy(&integral, &value, sizeof(u32)); |  | ||||||
|     return Immediate(integral); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) { |  | ||||||
|     if (is_signed) { |  | ||||||
|         return operation_code; |  | ||||||
|     } |  | ||||||
|     switch (operation_code) { |  | ||||||
|     case OperationCode::FCastInteger: |  | ||||||
|         return OperationCode::FCastUInteger; |  | ||||||
|     case OperationCode::IAdd: |  | ||||||
|         return OperationCode::UAdd; |  | ||||||
|     case OperationCode::IMul: |  | ||||||
|         return OperationCode::UMul; |  | ||||||
|     case OperationCode::IDiv: |  | ||||||
|         return OperationCode::UDiv; |  | ||||||
|     case OperationCode::IMin: |  | ||||||
|         return OperationCode::UMin; |  | ||||||
|     case OperationCode::IMax: |  | ||||||
|         return OperationCode::UMax; |  | ||||||
|     case OperationCode::ICastFloat: |  | ||||||
|         return OperationCode::UCastFloat; |  | ||||||
|     case OperationCode::ICastUnsigned: |  | ||||||
|         return OperationCode::UCastSigned; |  | ||||||
|     case OperationCode::ILogicalShiftLeft: |  | ||||||
|         return OperationCode::ULogicalShiftLeft; |  | ||||||
|     case OperationCode::ILogicalShiftRight: |  | ||||||
|         return OperationCode::ULogicalShiftRight; |  | ||||||
|     case OperationCode::IArithmeticShiftRight: |  | ||||||
|         return OperationCode::UArithmeticShiftRight; |  | ||||||
|     case OperationCode::IBitwiseAnd: |  | ||||||
|         return OperationCode::UBitwiseAnd; |  | ||||||
|     case OperationCode::IBitwiseOr: |  | ||||||
|         return OperationCode::UBitwiseOr; |  | ||||||
|     case OperationCode::IBitwiseXor: |  | ||||||
|         return OperationCode::UBitwiseXor; |  | ||||||
|     case OperationCode::IBitwiseNot: |  | ||||||
|         return OperationCode::UBitwiseNot; |  | ||||||
|     case OperationCode::IBitfieldExtract: |  | ||||||
|         return OperationCode::UBitfieldExtract; |  | ||||||
|     case OperationCode::IBitfieldInsert: |  | ||||||
|         return OperationCode::UBitfieldInsert; |  | ||||||
|     case OperationCode::IBitCount: |  | ||||||
|         return OperationCode::UBitCount; |  | ||||||
|     case OperationCode::LogicalILessThan: |  | ||||||
|         return OperationCode::LogicalULessThan; |  | ||||||
|     case OperationCode::LogicalIEqual: |  | ||||||
|         return OperationCode::LogicalUEqual; |  | ||||||
|     case OperationCode::LogicalILessEqual: |  | ||||||
|         return OperationCode::LogicalULessEqual; |  | ||||||
|     case OperationCode::LogicalIGreaterThan: |  | ||||||
|         return OperationCode::LogicalUGreaterThan; |  | ||||||
|     case OperationCode::LogicalINotEqual: |  | ||||||
|         return OperationCode::LogicalUNotEqual; |  | ||||||
|     case OperationCode::LogicalIGreaterEqual: |  | ||||||
|         return OperationCode::LogicalUGreaterEqual; |  | ||||||
|     case OperationCode::AtomicIExchange: |  | ||||||
|         return OperationCode::AtomicUExchange; |  | ||||||
|     case OperationCode::AtomicIAdd: |  | ||||||
|         return OperationCode::AtomicUAdd; |  | ||||||
|     case OperationCode::AtomicIMin: |  | ||||||
|         return OperationCode::AtomicUMin; |  | ||||||
|     case OperationCode::AtomicIMax: |  | ||||||
|         return OperationCode::AtomicUMax; |  | ||||||
|     case OperationCode::AtomicIAnd: |  | ||||||
|         return OperationCode::AtomicUAnd; |  | ||||||
|     case OperationCode::AtomicIOr: |  | ||||||
|         return OperationCode::AtomicUOr; |  | ||||||
|     case OperationCode::AtomicIXor: |  | ||||||
|         return OperationCode::AtomicUXor; |  | ||||||
|     case OperationCode::INegate: |  | ||||||
|         UNREACHABLE_MSG("Can't negate an unsigned integer"); |  | ||||||
|         return {}; |  | ||||||
|     case OperationCode::IAbsolute: |  | ||||||
|         UNREACHABLE_MSG("Can't apply absolute to an unsigned integer"); |  | ||||||
|         return {}; |  | ||||||
|     default: |  | ||||||
|         UNREACHABLE_MSG("Unknown signed operation with code={}", operation_code); |  | ||||||
|         return {}; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,71 +0,0 @@ | ||||||
| // Copyright 2019 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #pragma once |  | ||||||
| 
 |  | ||||||
| #include <memory> |  | ||||||
| #include <string> |  | ||||||
| #include <tuple> |  | ||||||
| #include <type_traits> |  | ||||||
| #include <utility> |  | ||||||
| #include <vector> |  | ||||||
| 
 |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/shader/node.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| /// This arithmetic operation cannot be constraint
 |  | ||||||
| inline constexpr MetaArithmetic PRECISE = {true}; |  | ||||||
| /// This arithmetic operation can be optimized away
 |  | ||||||
| inline constexpr MetaArithmetic NO_PRECISE = {false}; |  | ||||||
| 
 |  | ||||||
| /// Creates a conditional node
 |  | ||||||
| Node Conditional(Node condition, std::vector<Node> code); |  | ||||||
| 
 |  | ||||||
| /// Creates a commentary node
 |  | ||||||
| Node Comment(std::string text); |  | ||||||
| 
 |  | ||||||
| /// Creates an u32 immediate
 |  | ||||||
| Node Immediate(u32 value); |  | ||||||
| 
 |  | ||||||
| /// Creates a s32 immediate
 |  | ||||||
| Node Immediate(s32 value); |  | ||||||
| 
 |  | ||||||
| /// Creates a f32 immediate
 |  | ||||||
| Node Immediate(f32 value); |  | ||||||
| 
 |  | ||||||
| /// Converts an signed operation code to an unsigned operation code
 |  | ||||||
| OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed); |  | ||||||
| 
 |  | ||||||
| template <typename T, typename... Args> |  | ||||||
| Node MakeNode(Args&&... args) { |  | ||||||
|     static_assert(std::is_convertible_v<T, NodeData>); |  | ||||||
|     return std::make_shared<NodeData>(T(std::forward<Args>(args)...)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| template <typename T, typename... Args> |  | ||||||
| TrackSampler MakeTrackSampler(Args&&... args) { |  | ||||||
|     static_assert(std::is_convertible_v<T, TrackSamplerData>); |  | ||||||
|     return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...}); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| template <typename... Args> |  | ||||||
| Node Operation(OperationCode code, Args&&... args) { |  | ||||||
|     if constexpr (sizeof...(args) == 0) { |  | ||||||
|         return MakeNode<OperationNode>(code); |  | ||||||
|     } else if constexpr (std::is_convertible_v<std::tuple_element_t<0, std::tuple<Args...>>, |  | ||||||
|                                                Meta>) { |  | ||||||
|         return MakeNode<OperationNode>(code, std::forward<Args>(args)...); |  | ||||||
|     } else { |  | ||||||
|         return MakeNode<OperationNode>(code, Meta{}, std::forward<Args>(args)...); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| template <typename... Args> |  | ||||||
| Node SignedOperation(OperationCode code, bool is_signed, Args&&... args) { |  | ||||||
|     return Operation(SignedToUnsignedCode(code, is_signed), std::forward<Args>(args)...); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,181 +0,0 @@ | ||||||
| // Copyright 2019 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include <algorithm> |  | ||||||
| #include <tuple> |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/engines/kepler_compute.h" |  | ||||||
| #include "video_core/engines/maxwell_3d.h" |  | ||||||
| #include "video_core/engines/shader_type.h" |  | ||||||
| #include "video_core/shader/registry.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using Tegra::Engines::ConstBufferEngineInterface; |  | ||||||
| using Tegra::Engines::SamplerDescriptor; |  | ||||||
| using Tegra::Engines::ShaderType; |  | ||||||
| 
 |  | ||||||
| namespace { |  | ||||||
| 
 |  | ||||||
| GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { |  | ||||||
|     if (shader_stage == ShaderType::Compute) { |  | ||||||
|         return {}; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     auto& graphics = dynamic_cast<Tegra::Engines::Maxwell3D&>(engine); |  | ||||||
| 
 |  | ||||||
|     return { |  | ||||||
|         .tfb_layouts = graphics.regs.tfb_layouts, |  | ||||||
|         .tfb_varying_locs = graphics.regs.tfb_varying_locs, |  | ||||||
|         .primitive_topology = graphics.regs.draw.topology, |  | ||||||
|         .tessellation_primitive = graphics.regs.tess_mode.prim, |  | ||||||
|         .tessellation_spacing = graphics.regs.tess_mode.spacing, |  | ||||||
|         .tfb_enabled = graphics.regs.tfb_enabled != 0, |  | ||||||
|         .tessellation_clockwise = graphics.regs.tess_mode.cw.Value() != 0, |  | ||||||
|     }; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { |  | ||||||
|     if (shader_stage != ShaderType::Compute) { |  | ||||||
|         return {}; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     auto& compute = dynamic_cast<Tegra::Engines::KeplerCompute&>(engine); |  | ||||||
|     const auto& launch = compute.launch_description; |  | ||||||
| 
 |  | ||||||
|     return { |  | ||||||
|         .workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z}, |  | ||||||
|         .shared_memory_size_in_words = launch.shared_alloc, |  | ||||||
|         .local_memory_size_in_words = launch.local_pos_alloc, |  | ||||||
|     }; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // Anonymous namespace
 |  | ||||||
| 
 |  | ||||||
| Registry::Registry(ShaderType shader_stage, const SerializedRegistryInfo& info) |  | ||||||
|     : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile}, |  | ||||||
|       bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {} |  | ||||||
| 
 |  | ||||||
| Registry::Registry(ShaderType shader_stage, ConstBufferEngineInterface& engine_) |  | ||||||
|     : stage{shader_stage}, engine{&engine_}, bound_buffer{engine_.GetBoundBuffer()}, |  | ||||||
|       graphics_info{MakeGraphicsInfo(shader_stage, engine_)}, compute_info{MakeComputeInfo( |  | ||||||
|                                                                   shader_stage, engine_)} {} |  | ||||||
| 
 |  | ||||||
| Registry::~Registry() = default; |  | ||||||
| 
 |  | ||||||
| std::optional<u32> Registry::ObtainKey(u32 buffer, u32 offset) { |  | ||||||
|     const std::pair<u32, u32> key = {buffer, offset}; |  | ||||||
|     const auto iter = keys.find(key); |  | ||||||
|     if (iter != keys.end()) { |  | ||||||
|         return iter->second; |  | ||||||
|     } |  | ||||||
|     if (!engine) { |  | ||||||
|         return std::nullopt; |  | ||||||
|     } |  | ||||||
|     const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); |  | ||||||
|     keys.emplace(key, value); |  | ||||||
|     return value; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) { |  | ||||||
|     const u32 key = offset; |  | ||||||
|     const auto iter = bound_samplers.find(key); |  | ||||||
|     if (iter != bound_samplers.end()) { |  | ||||||
|         return iter->second; |  | ||||||
|     } |  | ||||||
|     if (!engine) { |  | ||||||
|         return std::nullopt; |  | ||||||
|     } |  | ||||||
|     const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); |  | ||||||
|     bound_samplers.emplace(key, value); |  | ||||||
|     return value; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler( |  | ||||||
|     std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) { |  | ||||||
|     SeparateSamplerKey key; |  | ||||||
|     key.buffers = buffers; |  | ||||||
|     key.offsets = offsets; |  | ||||||
|     const auto iter = separate_samplers.find(key); |  | ||||||
|     if (iter != separate_samplers.end()) { |  | ||||||
|         return iter->second; |  | ||||||
|     } |  | ||||||
|     if (!engine) { |  | ||||||
|         return std::nullopt; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first); |  | ||||||
|     const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second); |  | ||||||
|     const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2); |  | ||||||
|     separate_samplers.emplace(key, value); |  | ||||||
|     return value; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::optional<SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, u32 offset) { |  | ||||||
|     const std::pair key = {buffer, offset}; |  | ||||||
|     const auto iter = bindless_samplers.find(key); |  | ||||||
|     if (iter != bindless_samplers.end()) { |  | ||||||
|         return iter->second; |  | ||||||
|     } |  | ||||||
|     if (!engine) { |  | ||||||
|         return std::nullopt; |  | ||||||
|     } |  | ||||||
|     const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); |  | ||||||
|     bindless_samplers.emplace(key, value); |  | ||||||
|     return value; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void Registry::InsertKey(u32 buffer, u32 offset, u32 value) { |  | ||||||
|     keys.insert_or_assign({buffer, offset}, value); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { |  | ||||||
|     bound_samplers.insert_or_assign(offset, sampler); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { |  | ||||||
|     bindless_samplers.insert_or_assign({buffer, offset}, sampler); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool Registry::IsConsistent() const { |  | ||||||
|     if (!engine) { |  | ||||||
|         return true; |  | ||||||
|     } |  | ||||||
|     return std::all_of(keys.begin(), keys.end(), |  | ||||||
|                        [this](const auto& pair) { |  | ||||||
|                            const auto [cbuf, offset] = pair.first; |  | ||||||
|                            const auto value = pair.second; |  | ||||||
|                            return value == engine->AccessConstBuffer32(stage, cbuf, offset); |  | ||||||
|                        }) && |  | ||||||
|            std::all_of(bound_samplers.begin(), bound_samplers.end(), |  | ||||||
|                        [this](const auto& sampler) { |  | ||||||
|                            const auto [key, value] = sampler; |  | ||||||
|                            return value == engine->AccessBoundSampler(stage, key); |  | ||||||
|                        }) && |  | ||||||
|            std::all_of(bindless_samplers.begin(), bindless_samplers.end(), |  | ||||||
|                        [this](const auto& sampler) { |  | ||||||
|                            const auto [cbuf, offset] = sampler.first; |  | ||||||
|                            const auto value = sampler.second; |  | ||||||
|                            return value == engine->AccessBindlessSampler(stage, cbuf, offset); |  | ||||||
|                        }); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool Registry::HasEqualKeys(const Registry& rhs) const { |  | ||||||
|     return std::tie(keys, bound_samplers, bindless_samplers) == |  | ||||||
|            std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| const GraphicsInfo& Registry::GetGraphicsInfo() const { |  | ||||||
|     ASSERT(stage != Tegra::Engines::ShaderType::Compute); |  | ||||||
|     return graphics_info; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| const ComputeInfo& Registry::GetComputeInfo() const { |  | ||||||
|     ASSERT(stage == Tegra::Engines::ShaderType::Compute); |  | ||||||
|     return compute_info; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,172 +0,0 @@ | ||||||
| // Copyright 2019 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #pragma once |  | ||||||
| 
 |  | ||||||
| #include <array> |  | ||||||
| #include <optional> |  | ||||||
| #include <type_traits> |  | ||||||
| #include <unordered_map> |  | ||||||
| #include <utility> |  | ||||||
| 
 |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "common/hash.h" |  | ||||||
| #include "video_core/engines/const_buffer_engine_interface.h" |  | ||||||
| #include "video_core/engines/maxwell_3d.h" |  | ||||||
| #include "video_core/engines/shader_type.h" |  | ||||||
| #include "video_core/guest_driver.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| struct SeparateSamplerKey { |  | ||||||
|     std::pair<u32, u32> buffers; |  | ||||||
|     std::pair<u32, u32> offsets; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
| 
 |  | ||||||
| namespace std { |  | ||||||
| 
 |  | ||||||
| template <> |  | ||||||
| struct hash<VideoCommon::Shader::SeparateSamplerKey> { |  | ||||||
|     std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept { |  | ||||||
|         return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^ |  | ||||||
|                                 key.offsets.second); |  | ||||||
|     } |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| template <> |  | ||||||
| struct equal_to<VideoCommon::Shader::SeparateSamplerKey> { |  | ||||||
|     bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs, |  | ||||||
|                     const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept { |  | ||||||
|         return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets; |  | ||||||
|     } |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| } // namespace std
 |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; |  | ||||||
| using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; |  | ||||||
| using SeparateSamplerMap = |  | ||||||
|     std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>; |  | ||||||
| using BindlessSamplerMap = |  | ||||||
|     std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; |  | ||||||
| 
 |  | ||||||
| struct GraphicsInfo { |  | ||||||
|     using Maxwell = Tegra::Engines::Maxwell3D::Regs; |  | ||||||
| 
 |  | ||||||
|     std::array<Maxwell::TransformFeedbackLayout, Maxwell::NumTransformFeedbackBuffers> |  | ||||||
|         tfb_layouts{}; |  | ||||||
|     std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{}; |  | ||||||
|     Maxwell::PrimitiveTopology primitive_topology{}; |  | ||||||
|     Maxwell::TessellationPrimitive tessellation_primitive{}; |  | ||||||
|     Maxwell::TessellationSpacing tessellation_spacing{}; |  | ||||||
|     bool tfb_enabled = false; |  | ||||||
|     bool tessellation_clockwise = false; |  | ||||||
| }; |  | ||||||
| static_assert(std::is_trivially_copyable_v<GraphicsInfo> && |  | ||||||
|               std::is_standard_layout_v<GraphicsInfo>); |  | ||||||
| 
 |  | ||||||
| struct ComputeInfo { |  | ||||||
|     std::array<u32, 3> workgroup_size{}; |  | ||||||
|     u32 shared_memory_size_in_words = 0; |  | ||||||
|     u32 local_memory_size_in_words = 0; |  | ||||||
| }; |  | ||||||
| static_assert(std::is_trivially_copyable_v<ComputeInfo> && std::is_standard_layout_v<ComputeInfo>); |  | ||||||
| 
 |  | ||||||
| struct SerializedRegistryInfo { |  | ||||||
|     VideoCore::GuestDriverProfile guest_driver_profile; |  | ||||||
|     u32 bound_buffer = 0; |  | ||||||
|     GraphicsInfo graphics; |  | ||||||
|     ComputeInfo compute; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| /**
 |  | ||||||
|  * The Registry is a class use to interface the 3D and compute engines with the shader compiler. |  | ||||||
|  * With it, the shader can obtain required data from GPU state and store it for disk shader |  | ||||||
|  * compilation. |  | ||||||
|  */ |  | ||||||
| class Registry { |  | ||||||
| public: |  | ||||||
|     explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info); |  | ||||||
| 
 |  | ||||||
|     explicit Registry(Tegra::Engines::ShaderType shader_stage, |  | ||||||
|                       Tegra::Engines::ConstBufferEngineInterface& engine_); |  | ||||||
| 
 |  | ||||||
|     ~Registry(); |  | ||||||
| 
 |  | ||||||
|     /// Retrieves a key from the registry, if it's registered, it will give the registered value, if
 |  | ||||||
|     /// not it will obtain it from maxwell3d and register it.
 |  | ||||||
|     std::optional<u32> ObtainKey(u32 buffer, u32 offset); |  | ||||||
| 
 |  | ||||||
|     std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); |  | ||||||
| 
 |  | ||||||
|     std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler( |  | ||||||
|         std::pair<u32, u32> buffers, std::pair<u32, u32> offsets); |  | ||||||
| 
 |  | ||||||
|     std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); |  | ||||||
| 
 |  | ||||||
|     /// Inserts a key.
 |  | ||||||
|     void InsertKey(u32 buffer, u32 offset, u32 value); |  | ||||||
| 
 |  | ||||||
|     /// Inserts a bound sampler key.
 |  | ||||||
|     void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); |  | ||||||
| 
 |  | ||||||
|     /// Inserts a bindless sampler key.
 |  | ||||||
|     void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); |  | ||||||
| 
 |  | ||||||
|     /// Checks keys and samplers against engine's current const buffers.
 |  | ||||||
|     /// Returns true if they are the same value, false otherwise.
 |  | ||||||
|     bool IsConsistent() const; |  | ||||||
| 
 |  | ||||||
|     /// Returns true if the keys are equal to the other ones in the registry.
 |  | ||||||
|     bool HasEqualKeys(const Registry& rhs) const; |  | ||||||
| 
 |  | ||||||
|     /// Returns graphics information from this shader
 |  | ||||||
|     const GraphicsInfo& GetGraphicsInfo() const; |  | ||||||
| 
 |  | ||||||
|     /// Returns compute information from this shader
 |  | ||||||
|     const ComputeInfo& GetComputeInfo() const; |  | ||||||
| 
 |  | ||||||
|     /// Gives an getter to the const buffer keys in the database.
 |  | ||||||
|     const KeyMap& GetKeys() const { |  | ||||||
|         return keys; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     /// Gets samplers database.
 |  | ||||||
|     const BoundSamplerMap& GetBoundSamplers() const { |  | ||||||
|         return bound_samplers; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     /// Gets bindless samplers database.
 |  | ||||||
|     const BindlessSamplerMap& GetBindlessSamplers() const { |  | ||||||
|         return bindless_samplers; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     /// Gets bound buffer used on this shader
 |  | ||||||
|     u32 GetBoundBuffer() const { |  | ||||||
|         return bound_buffer; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     /// Obtains access to the guest driver's profile.
 |  | ||||||
|     VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { |  | ||||||
|         return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     const Tegra::Engines::ShaderType stage; |  | ||||||
|     VideoCore::GuestDriverProfile stored_guest_driver_profile; |  | ||||||
|     Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; |  | ||||||
|     KeyMap keys; |  | ||||||
|     BoundSamplerMap bound_samplers; |  | ||||||
|     SeparateSamplerMap separate_samplers; |  | ||||||
|     BindlessSamplerMap bindless_samplers; |  | ||||||
|     u32 bound_buffer; |  | ||||||
|     GraphicsInfo graphics_info; |  | ||||||
|     ComputeInfo compute_info; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,464 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include <algorithm> |  | ||||||
| #include <array> |  | ||||||
| #include <cmath> |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "common/logging/log.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/shader/node.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/registry.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| using Tegra::Shader::Attribute; |  | ||||||
| using Tegra::Shader::Instruction; |  | ||||||
| using Tegra::Shader::IpaMode; |  | ||||||
| using Tegra::Shader::Pred; |  | ||||||
| using Tegra::Shader::PredCondition; |  | ||||||
| using Tegra::Shader::PredOperation; |  | ||||||
| using Tegra::Shader::Register; |  | ||||||
| 
 |  | ||||||
| ShaderIR::ShaderIR(const ProgramCode& program_code_, u32 main_offset_, CompilerSettings settings_, |  | ||||||
|                    Registry& registry_) |  | ||||||
|     : program_code{program_code_}, main_offset{main_offset_}, settings{settings_}, registry{ |  | ||||||
|                                                                                        registry_} { |  | ||||||
|     Decode(); |  | ||||||
|     PostDecode(); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| ShaderIR::~ShaderIR() = default; |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::GetRegister(Register reg) { |  | ||||||
|     if (reg != Register::ZeroIndex) { |  | ||||||
|         used_registers.insert(static_cast<u32>(reg)); |  | ||||||
|     } |  | ||||||
|     return MakeNode<GprNode>(reg); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::GetCustomVariable(u32 id) { |  | ||||||
|     return MakeNode<CustomVarNode>(id); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::GetImmediate19(Instruction instr) { |  | ||||||
|     return Immediate(instr.alu.GetImm20_19()); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::GetImmediate32(Instruction instr) { |  | ||||||
|     return Immediate(instr.alu.GetImm20_32()); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) { |  | ||||||
|     const auto index = static_cast<u32>(index_); |  | ||||||
|     const auto offset = static_cast<u32>(offset_); |  | ||||||
| 
 |  | ||||||
|     used_cbufs.try_emplace(index).first->second.MarkAsUsed(offset); |  | ||||||
| 
 |  | ||||||
|     return MakeNode<CbufNode>(index, Immediate(offset)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { |  | ||||||
|     const auto index = static_cast<u32>(index_); |  | ||||||
|     const auto offset = static_cast<u32>(offset_); |  | ||||||
| 
 |  | ||||||
|     used_cbufs.try_emplace(index).first->second.MarkAsUsedIndirect(); |  | ||||||
| 
 |  | ||||||
|     Node final_offset = [&] { |  | ||||||
|         // Attempt to inline constant buffer without a variable offset. This is done to allow
 |  | ||||||
|         // tracking LDC calls.
 |  | ||||||
|         if (const auto gpr = std::get_if<GprNode>(&*node)) { |  | ||||||
|             if (gpr->GetIndex() == Register::ZeroIndex) { |  | ||||||
|                 return Immediate(offset); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|         return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset)); |  | ||||||
|     }(); |  | ||||||
|     return MakeNode<CbufNode>(index, std::move(final_offset)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::GetPredicate(u64 pred_, bool negated) { |  | ||||||
|     const auto pred = static_cast<Pred>(pred_); |  | ||||||
|     if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) { |  | ||||||
|         used_predicates.insert(pred); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return MakeNode<PredicateNode>(pred, negated); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::GetPredicate(bool immediate) { |  | ||||||
|     return GetPredicate(static_cast<u64>(immediate ? Pred::UnusedIndex : Pred::NeverExecute)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { |  | ||||||
|     MarkAttributeUsage(index, element); |  | ||||||
|     used_input_attributes.emplace(index); |  | ||||||
|     return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { |  | ||||||
|     uses_physical_attributes = true; |  | ||||||
|     return MakeNode<AbufNode>(GetRegister(physical_address), buffer); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { |  | ||||||
|     MarkAttributeUsage(index, element); |  | ||||||
|     used_output_attributes.insert(index); |  | ||||||
|     return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const { |  | ||||||
|     Node node = MakeNode<InternalFlagNode>(flag); |  | ||||||
|     if (negated) { |  | ||||||
|         return Operation(OperationCode::LogicalNegate, std::move(node)); |  | ||||||
|     } |  | ||||||
|     return node; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::GetLocalMemory(Node address) { |  | ||||||
|     return MakeNode<LmemNode>(std::move(address)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::GetSharedMemory(Node address) { |  | ||||||
|     return MakeNode<SmemNode>(std::move(address)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::GetTemporary(u32 id) { |  | ||||||
|     return GetRegister(Register::ZeroIndex + 1 + id); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { |  | ||||||
|     if (absolute) { |  | ||||||
|         value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value)); |  | ||||||
|     } |  | ||||||
|     if (negate) { |  | ||||||
|         value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value)); |  | ||||||
|     } |  | ||||||
|     return value; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) { |  | ||||||
|     if (!saturate) { |  | ||||||
|         return value; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     Node positive_zero = Immediate(std::copysignf(0, 1)); |  | ||||||
|     Node positive_one = Immediate(1.0f); |  | ||||||
|     return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero), |  | ||||||
|                      std::move(positive_one)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) { |  | ||||||
|     switch (size) { |  | ||||||
|     case Register::Size::Byte: |  | ||||||
|         value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, |  | ||||||
|                                 std::move(value), Immediate(24)); |  | ||||||
|         value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, |  | ||||||
|                                 std::move(value), Immediate(24)); |  | ||||||
|         return value; |  | ||||||
|     case Register::Size::Short: |  | ||||||
|         value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, |  | ||||||
|                                 std::move(value), Immediate(16)); |  | ||||||
|         value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, |  | ||||||
|                                 std::move(value), Immediate(16)); |  | ||||||
|         return value; |  | ||||||
|     case Register::Size::Word: |  | ||||||
|         // Default - do nothing
 |  | ||||||
|         return value; |  | ||||||
|     default: |  | ||||||
|         UNREACHABLE_MSG("Unimplemented conversion size: {}", size); |  | ||||||
|         return value; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed) { |  | ||||||
|     if (!is_signed) { |  | ||||||
|         // Absolute or negate on an unsigned is pointless
 |  | ||||||
|         return value; |  | ||||||
|     } |  | ||||||
|     if (absolute) { |  | ||||||
|         value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value)); |  | ||||||
|     } |  | ||||||
|     if (negate) { |  | ||||||
|         value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value)); |  | ||||||
|     } |  | ||||||
|     return value; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { |  | ||||||
|     Node value = Immediate(instr.half_imm.PackImmediates()); |  | ||||||
|     if (!has_negation) { |  | ||||||
|         return value; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); |  | ||||||
|     Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); |  | ||||||
| 
 |  | ||||||
|     return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate), |  | ||||||
|                      std::move(second_negate)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { |  | ||||||
|     return Operation(OperationCode::HUnpack, type, std::move(value)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { |  | ||||||
|     switch (merge) { |  | ||||||
|     case Tegra::Shader::HalfMerge::H0_H1: |  | ||||||
|         return src; |  | ||||||
|     case Tegra::Shader::HalfMerge::F32: |  | ||||||
|         return Operation(OperationCode::HMergeF32, std::move(src)); |  | ||||||
|     case Tegra::Shader::HalfMerge::Mrg_H0: |  | ||||||
|         return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src)); |  | ||||||
|     case Tegra::Shader::HalfMerge::Mrg_H1: |  | ||||||
|         return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src)); |  | ||||||
|     } |  | ||||||
|     UNREACHABLE(); |  | ||||||
|     return src; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { |  | ||||||
|     if (absolute) { |  | ||||||
|         value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value)); |  | ||||||
|     } |  | ||||||
|     if (negate) { |  | ||||||
|         value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true), |  | ||||||
|                           GetPredicate(true)); |  | ||||||
|     } |  | ||||||
|     return value; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) { |  | ||||||
|     if (!saturate) { |  | ||||||
|         return value; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     Node positive_zero = Immediate(std::copysignf(0, 1)); |  | ||||||
|     Node positive_one = Immediate(1.0f); |  | ||||||
|     return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero), |  | ||||||
|                      std::move(positive_one)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { |  | ||||||
|     if (condition == PredCondition::T) { |  | ||||||
|         return GetPredicate(true); |  | ||||||
|     } else if (condition == PredCondition::F) { |  | ||||||
|         return GetPredicate(false); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     static constexpr std::array comparison_table{ |  | ||||||
|         OperationCode(0), |  | ||||||
|         OperationCode::LogicalFOrdLessThan,       // LT
 |  | ||||||
|         OperationCode::LogicalFOrdEqual,          // EQ
 |  | ||||||
|         OperationCode::LogicalFOrdLessEqual,      // LE
 |  | ||||||
|         OperationCode::LogicalFOrdGreaterThan,    // GT
 |  | ||||||
|         OperationCode::LogicalFOrdNotEqual,       // NE
 |  | ||||||
|         OperationCode::LogicalFOrdGreaterEqual,   // GE
 |  | ||||||
|         OperationCode::LogicalFOrdered,           // NUM
 |  | ||||||
|         OperationCode::LogicalFUnordered,         // NAN
 |  | ||||||
|         OperationCode::LogicalFUnordLessThan,     // LTU
 |  | ||||||
|         OperationCode::LogicalFUnordEqual,        // EQU
 |  | ||||||
|         OperationCode::LogicalFUnordLessEqual,    // LEU
 |  | ||||||
|         OperationCode::LogicalFUnordGreaterThan,  // GTU
 |  | ||||||
|         OperationCode::LogicalFUnordNotEqual,     // NEU
 |  | ||||||
|         OperationCode::LogicalFUnordGreaterEqual, // GEU
 |  | ||||||
|     }; |  | ||||||
|     const std::size_t index = static_cast<std::size_t>(condition); |  | ||||||
|     ASSERT_MSG(index < std::size(comparison_table), "Invalid condition={}", index); |  | ||||||
| 
 |  | ||||||
|     return Operation(comparison_table[index], op_a, op_b); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, |  | ||||||
|                                              Node op_b) { |  | ||||||
|     static constexpr std::array comparison_table{ |  | ||||||
|         std::pair{PredCondition::LT, OperationCode::LogicalILessThan}, |  | ||||||
|         std::pair{PredCondition::EQ, OperationCode::LogicalIEqual}, |  | ||||||
|         std::pair{PredCondition::LE, OperationCode::LogicalILessEqual}, |  | ||||||
|         std::pair{PredCondition::GT, OperationCode::LogicalIGreaterThan}, |  | ||||||
|         std::pair{PredCondition::NE, OperationCode::LogicalINotEqual}, |  | ||||||
|         std::pair{PredCondition::GE, OperationCode::LogicalIGreaterEqual}, |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     const auto comparison = |  | ||||||
|         std::find_if(comparison_table.cbegin(), comparison_table.cend(), |  | ||||||
|                      [condition](const auto entry) { return condition == entry.first; }); |  | ||||||
|     UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), |  | ||||||
|                          "Unknown predicate comparison operation"); |  | ||||||
| 
 |  | ||||||
|     return SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a), |  | ||||||
|                            std::move(op_b)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, |  | ||||||
|                                           Node op_b) { |  | ||||||
|     static constexpr std::array comparison_table{ |  | ||||||
|         std::pair{PredCondition::LT, OperationCode::Logical2HLessThan}, |  | ||||||
|         std::pair{PredCondition::EQ, OperationCode::Logical2HEqual}, |  | ||||||
|         std::pair{PredCondition::LE, OperationCode::Logical2HLessEqual}, |  | ||||||
|         std::pair{PredCondition::GT, OperationCode::Logical2HGreaterThan}, |  | ||||||
|         std::pair{PredCondition::NE, OperationCode::Logical2HNotEqual}, |  | ||||||
|         std::pair{PredCondition::GE, OperationCode::Logical2HGreaterEqual}, |  | ||||||
|         std::pair{PredCondition::LTU, OperationCode::Logical2HLessThanWithNan}, |  | ||||||
|         std::pair{PredCondition::LEU, OperationCode::Logical2HLessEqualWithNan}, |  | ||||||
|         std::pair{PredCondition::GTU, OperationCode::Logical2HGreaterThanWithNan}, |  | ||||||
|         std::pair{PredCondition::NEU, OperationCode::Logical2HNotEqualWithNan}, |  | ||||||
|         std::pair{PredCondition::GEU, OperationCode::Logical2HGreaterEqualWithNan}, |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     const auto comparison = |  | ||||||
|         std::find_if(comparison_table.cbegin(), comparison_table.cend(), |  | ||||||
|                      [condition](const auto entry) { return condition == entry.first; }); |  | ||||||
|     UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), |  | ||||||
|                          "Unknown predicate comparison operation"); |  | ||||||
| 
 |  | ||||||
|     return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { |  | ||||||
|     static constexpr std::array operation_table{ |  | ||||||
|         OperationCode::LogicalAnd, |  | ||||||
|         OperationCode::LogicalOr, |  | ||||||
|         OperationCode::LogicalXor, |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     const auto index = static_cast<std::size_t>(operation); |  | ||||||
|     if (index >= operation_table.size()) { |  | ||||||
|         UNIMPLEMENTED_MSG("Unknown predicate operation."); |  | ||||||
|         return {}; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     return operation_table[index]; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::GetConditionCode(ConditionCode cc) const { |  | ||||||
|     switch (cc) { |  | ||||||
|     case ConditionCode::NEU: |  | ||||||
|         return GetInternalFlag(InternalFlag::Zero, true); |  | ||||||
|     case ConditionCode::FCSM_TR: |  | ||||||
|         UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented"); |  | ||||||
|         return MakeNode<PredicateNode>(Pred::NeverExecute, false); |  | ||||||
|     default: |  | ||||||
|         UNIMPLEMENTED_MSG("Unimplemented condition code: {}", cc); |  | ||||||
|         return MakeNode<PredicateNode>(Pred::NeverExecute, false); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) { |  | ||||||
|     bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src))); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) { |  | ||||||
|     bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src))); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) { |  | ||||||
|     bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value))); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) { |  | ||||||
|     bb.push_back( |  | ||||||
|         Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value))); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ShaderIR::SetSharedMemory(NodeBlock& bb, Node address, Node value) { |  | ||||||
|     bb.push_back( |  | ||||||
|         Operation(OperationCode::Assign, GetSharedMemory(std::move(address)), std::move(value))); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) { |  | ||||||
|     SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) { |  | ||||||
|     if (!sets_cc) { |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|     Node zerop = Operation(OperationCode::LogicalFOrdEqual, std::move(value), Immediate(0.0f)); |  | ||||||
|     SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); |  | ||||||
|     LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc) { |  | ||||||
|     if (!sets_cc) { |  | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|     Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0)); |  | ||||||
|     SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); |  | ||||||
|     LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { |  | ||||||
|     return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value), |  | ||||||
|                      Immediate(offset), Immediate(bits)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) { |  | ||||||
|     return Operation(OperationCode::UBitfieldInsert, NO_PRECISE, base, insert, Immediate(offset), |  | ||||||
|                      Immediate(bits)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void ShaderIR::MarkAttributeUsage(Attribute::Index index, u64 element) { |  | ||||||
|     switch (index) { |  | ||||||
|     case Attribute::Index::LayerViewportPointSize: |  | ||||||
|         switch (element) { |  | ||||||
|         case 0: |  | ||||||
|             UNIMPLEMENTED(); |  | ||||||
|             break; |  | ||||||
|         case 1: |  | ||||||
|             uses_layer = true; |  | ||||||
|             break; |  | ||||||
|         case 2: |  | ||||||
|             uses_viewport_index = true; |  | ||||||
|             break; |  | ||||||
|         case 3: |  | ||||||
|             uses_point_size = true; |  | ||||||
|             break; |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     case Attribute::Index::TessCoordInstanceIDVertexID: |  | ||||||
|         switch (element) { |  | ||||||
|         case 2: |  | ||||||
|             uses_instance_id = true; |  | ||||||
|             break; |  | ||||||
|         case 3: |  | ||||||
|             uses_vertex_id = true; |  | ||||||
|             break; |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     case Attribute::Index::ClipDistances0123: |  | ||||||
|     case Attribute::Index::ClipDistances4567: { |  | ||||||
|         const u64 clip_index = (index == Attribute::Index::ClipDistances4567 ? 4 : 0) + element; |  | ||||||
|         used_clip_distances.at(clip_index) = true; |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
|     case Attribute::Index::FrontColor: |  | ||||||
|     case Attribute::Index::FrontSecondaryColor: |  | ||||||
|     case Attribute::Index::BackColor: |  | ||||||
|     case Attribute::Index::BackSecondaryColor: |  | ||||||
|         uses_legacy_varyings = true; |  | ||||||
|         break; |  | ||||||
|     default: |  | ||||||
|         if (index >= Attribute::Index::TexCoord_0 && index <= Attribute::Index::TexCoord_7) { |  | ||||||
|             uses_legacy_varyings = true; |  | ||||||
|         } |  | ||||||
|         break; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::size_t ShaderIR::DeclareAmend(Node new_amend) { |  | ||||||
|     const auto id = amend_code.size(); |  | ||||||
|     amend_code.push_back(std::move(new_amend)); |  | ||||||
|     return id; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| u32 ShaderIR::NewCustomVariable() { |  | ||||||
|     return num_custom_variables++; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,479 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #pragma once |  | ||||||
| 
 |  | ||||||
| #include <array> |  | ||||||
| #include <list> |  | ||||||
| #include <map> |  | ||||||
| #include <optional> |  | ||||||
| #include <set> |  | ||||||
| #include <tuple> |  | ||||||
| #include <vector> |  | ||||||
| 
 |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/engines/maxwell_3d.h" |  | ||||||
| #include "video_core/engines/shader_bytecode.h" |  | ||||||
| #include "video_core/engines/shader_header.h" |  | ||||||
| #include "video_core/shader/ast.h" |  | ||||||
| #include "video_core/shader/compiler_settings.h" |  | ||||||
| #include "video_core/shader/memory_util.h" |  | ||||||
| #include "video_core/shader/node.h" |  | ||||||
| #include "video_core/shader/registry.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| struct ShaderBlock; |  | ||||||
| 
 |  | ||||||
| constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; |  | ||||||
| 
 |  | ||||||
| struct ConstBuffer { |  | ||||||
|     constexpr explicit ConstBuffer(u32 max_offset_, bool is_indirect_) |  | ||||||
|         : max_offset{max_offset_}, is_indirect{is_indirect_} {} |  | ||||||
| 
 |  | ||||||
|     constexpr ConstBuffer() = default; |  | ||||||
| 
 |  | ||||||
|     void MarkAsUsed(u64 offset) { |  | ||||||
|         max_offset = std::max(max_offset, static_cast<u32>(offset)); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void MarkAsUsedIndirect() { |  | ||||||
|         is_indirect = true; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool IsIndirect() const { |  | ||||||
|         return is_indirect; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     u32 GetSize() const { |  | ||||||
|         return max_offset + static_cast<u32>(sizeof(float)); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     u32 GetMaxOffset() const { |  | ||||||
|         return max_offset; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     u32 max_offset = 0; |  | ||||||
|     bool is_indirect = false; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| struct GlobalMemoryUsage { |  | ||||||
|     bool is_read{}; |  | ||||||
|     bool is_written{}; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| class ShaderIR final { |  | ||||||
| public: |  | ||||||
|     explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_, |  | ||||||
|                       CompilerSettings settings_, Registry& registry_); |  | ||||||
|     ~ShaderIR(); |  | ||||||
| 
 |  | ||||||
|     const std::map<u32, NodeBlock>& GetBasicBlocks() const { |  | ||||||
|         return basic_blocks; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const std::set<u32>& GetRegisters() const { |  | ||||||
|         return used_registers; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const std::set<Tegra::Shader::Pred>& GetPredicates() const { |  | ||||||
|         return used_predicates; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const std::set<Tegra::Shader::Attribute::Index>& GetInputAttributes() const { |  | ||||||
|         return used_input_attributes; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const std::set<Tegra::Shader::Attribute::Index>& GetOutputAttributes() const { |  | ||||||
|         return used_output_attributes; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const std::map<u32, ConstBuffer>& GetConstantBuffers() const { |  | ||||||
|         return used_cbufs; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const std::list<SamplerEntry>& GetSamplers() const { |  | ||||||
|         return used_samplers; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const std::list<ImageEntry>& GetImages() const { |  | ||||||
|         return used_images; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances() |  | ||||||
|         const { |  | ||||||
|         return used_clip_distances; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const std::map<GlobalMemoryBase, GlobalMemoryUsage>& GetGlobalMemory() const { |  | ||||||
|         return used_global_memory; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     std::size_t GetLength() const { |  | ||||||
|         return static_cast<std::size_t>(coverage_end * sizeof(u64)); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool UsesLayer() const { |  | ||||||
|         return uses_layer; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool UsesViewportIndex() const { |  | ||||||
|         return uses_viewport_index; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool UsesPointSize() const { |  | ||||||
|         return uses_point_size; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool UsesInstanceId() const { |  | ||||||
|         return uses_instance_id; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool UsesVertexId() const { |  | ||||||
|         return uses_vertex_id; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool UsesLegacyVaryings() const { |  | ||||||
|         return uses_legacy_varyings; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool UsesYNegate() const { |  | ||||||
|         return uses_y_negate; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool UsesWarps() const { |  | ||||||
|         return uses_warps; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool HasPhysicalAttributes() const { |  | ||||||
|         return uses_physical_attributes; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const Tegra::Shader::Header& GetHeader() const { |  | ||||||
|         return header; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool IsFlowStackDisabled() const { |  | ||||||
|         return disable_flow_stack; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool IsDecompiled() const { |  | ||||||
|         return decompiled; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const ASTManager& GetASTManager() const { |  | ||||||
|         return program_manager; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     ASTNode GetASTProgram() const { |  | ||||||
|         return program_manager.GetProgram(); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     u32 GetASTNumVariables() const { |  | ||||||
|         return program_manager.GetVariables(); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     u32 ConvertAddressToNvidiaSpace(u32 address) const { |  | ||||||
|         return (address - main_offset) * static_cast<u32>(sizeof(Tegra::Shader::Instruction)); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     /// Returns a condition code evaluated from internal flags
 |  | ||||||
|     Node GetConditionCode(Tegra::Shader::ConditionCode cc) const; |  | ||||||
| 
 |  | ||||||
|     const Node& GetAmendNode(std::size_t index) const { |  | ||||||
|         return amend_code[index]; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     u32 GetNumCustomVariables() const { |  | ||||||
|         return num_custom_variables; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     friend class ASTDecoder; |  | ||||||
| 
 |  | ||||||
|     struct SamplerInfo { |  | ||||||
|         std::optional<Tegra::Shader::TextureType> type; |  | ||||||
|         std::optional<bool> is_array; |  | ||||||
|         std::optional<bool> is_shadow; |  | ||||||
|         std::optional<bool> is_buffer; |  | ||||||
| 
 |  | ||||||
|         constexpr bool IsComplete() const noexcept { |  | ||||||
|             return type && is_array && is_shadow && is_buffer; |  | ||||||
|         } |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     void Decode(); |  | ||||||
|     void PostDecode(); |  | ||||||
| 
 |  | ||||||
|     NodeBlock DecodeRange(u32 begin, u32 end); |  | ||||||
|     void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); |  | ||||||
|     void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block); |  | ||||||
| 
 |  | ||||||
|     /**
 |  | ||||||
|      * Decodes a single instruction from Tegra to IR. |  | ||||||
|      * @param bb Basic block where the nodes will be written to. |  | ||||||
|      * @param pc Program counter. Offset to decode. |  | ||||||
|      * @return Next address to decode. |  | ||||||
|      */ |  | ||||||
|     u32 DecodeInstr(NodeBlock& bb, u32 pc); |  | ||||||
| 
 |  | ||||||
|     u32 DecodeArithmetic(NodeBlock& bb, u32 pc); |  | ||||||
|     u32 DecodeArithmeticImmediate(NodeBlock& bb, u32 pc); |  | ||||||
|     u32 DecodeBfe(NodeBlock& bb, u32 pc); |  | ||||||
|     u32 DecodeBfi(NodeBlock& bb, u32 pc); |  | ||||||
|     u32 DecodeShift(NodeBlock& bb, u32 pc); |  | ||||||
|     u32 DecodeArithmeticInteger(NodeBlock& bb, u32 pc); |  | ||||||
|     u32 DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc); |  | ||||||
|     u32 DecodeArithmeticHalf(NodeBlock& bb, u32 pc); |  | ||||||
|     u32 DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc); |  | ||||||
|     u32 DecodeFfma(NodeBlock& bb, u32 pc); |  | ||||||
|     u32 DecodeHfma2(NodeBlock& bb, u32 pc); |  | ||||||
|     u32 DecodeConversion(NodeBlock& bb, u32 pc); |  | ||||||
|     u32 DecodeWarp(NodeBlock& bb, u32 pc); |  | ||||||
|     u32 DecodeMemory(NodeBlock& bb, u32 pc); |  | ||||||
|     u32 DecodeTexture(NodeBlock& bb, u32 pc); |  | ||||||
|     u32 DecodeImage(NodeBlock& bb, u32 pc); |  | ||||||
|     u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); |  | ||||||
|     u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); |  | ||||||
|     u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); |  | ||||||
|     u32 DecodePredicateSetRegister(NodeBlock& bb, u32 pc); |  | ||||||
|     u32 DecodePredicateSetPredicate(NodeBlock& bb, u32 pc); |  | ||||||
|     u32 DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc); |  | ||||||
|     u32 DecodeFloatSet(NodeBlock& bb, u32 pc); |  | ||||||
|     u32 DecodeIntegerSet(NodeBlock& bb, u32 pc); |  | ||||||
|     u32 DecodeHalfSet(NodeBlock& bb, u32 pc); |  | ||||||
|     u32 DecodeVideo(NodeBlock& bb, u32 pc); |  | ||||||
|     u32 DecodeXmad(NodeBlock& bb, u32 pc); |  | ||||||
|     u32 DecodeOther(NodeBlock& bb, u32 pc); |  | ||||||
| 
 |  | ||||||
|     /// Generates a node for a passed register.
 |  | ||||||
|     Node GetRegister(Tegra::Shader::Register reg); |  | ||||||
|     /// Generates a node for a custom variable
 |  | ||||||
|     Node GetCustomVariable(u32 id); |  | ||||||
|     /// Generates a node representing a 19-bit immediate value
 |  | ||||||
|     Node GetImmediate19(Tegra::Shader::Instruction instr); |  | ||||||
|     /// Generates a node representing a 32-bit immediate value
 |  | ||||||
|     Node GetImmediate32(Tegra::Shader::Instruction instr); |  | ||||||
|     /// Generates a node representing a constant buffer
 |  | ||||||
|     Node GetConstBuffer(u64 index, u64 offset); |  | ||||||
|     /// Generates a node representing a constant buffer with a variadic offset
 |  | ||||||
|     Node GetConstBufferIndirect(u64 index, u64 offset, Node node); |  | ||||||
|     /// Generates a node for a passed predicate. It can be optionally negated
 |  | ||||||
|     Node GetPredicate(u64 pred, bool negated = false); |  | ||||||
|     /// Generates a predicate node for an immediate true or false value
 |  | ||||||
|     Node GetPredicate(bool immediate); |  | ||||||
|     /// Generates a node representing an input attribute. Keeps track of used attributes.
 |  | ||||||
|     Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer = {}); |  | ||||||
|     /// Generates a node representing a physical input attribute.
 |  | ||||||
|     Node GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer = {}); |  | ||||||
|     /// Generates a node representing an output attribute. Keeps track of used attributes.
 |  | ||||||
|     Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer); |  | ||||||
|     /// Generates a node representing an internal flag
 |  | ||||||
|     Node GetInternalFlag(InternalFlag flag, bool negated = false) const; |  | ||||||
|     /// Generates a node representing a local memory address
 |  | ||||||
|     Node GetLocalMemory(Node address); |  | ||||||
|     /// Generates a node representing a shared memory address
 |  | ||||||
|     Node GetSharedMemory(Node address); |  | ||||||
|     /// Generates a temporary, internally it uses a post-RZ register
 |  | ||||||
|     Node GetTemporary(u32 id); |  | ||||||
| 
 |  | ||||||
|     /// Sets a register. src value must be a number-evaluated node.
 |  | ||||||
|     void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src); |  | ||||||
|     /// Sets a predicate. src value must be a bool-evaluated node
 |  | ||||||
|     void SetPredicate(NodeBlock& bb, u64 dest, Node src); |  | ||||||
|     /// Sets an internal flag. src value must be a bool-evaluated node
 |  | ||||||
|     void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); |  | ||||||
|     /// Sets a local memory address with a value.
 |  | ||||||
|     void SetLocalMemory(NodeBlock& bb, Node address, Node value); |  | ||||||
|     /// Sets a shared memory address with a value.
 |  | ||||||
|     void SetSharedMemory(NodeBlock& bb, Node address, Node value); |  | ||||||
|     /// Sets a temporary. Internally it uses a post-RZ register
 |  | ||||||
|     void SetTemporary(NodeBlock& bb, u32 id, Node value); |  | ||||||
| 
 |  | ||||||
|     /// Sets internal flags from a float
 |  | ||||||
|     void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true); |  | ||||||
|     /// Sets internal flags from an integer
 |  | ||||||
|     void SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc = true); |  | ||||||
| 
 |  | ||||||
|     /// Conditionally absolute/negated float. Absolute is applied first
 |  | ||||||
|     Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate); |  | ||||||
|     /// Conditionally saturates a float
 |  | ||||||
|     Node GetSaturatedFloat(Node value, bool saturate = true); |  | ||||||
| 
 |  | ||||||
|     /// Converts an integer to different sizes.
 |  | ||||||
|     Node ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed); |  | ||||||
|     /// Conditionally absolute/negated integer. Absolute is applied first
 |  | ||||||
|     Node GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed); |  | ||||||
| 
 |  | ||||||
|     /// Unpacks a half immediate from an instruction
 |  | ||||||
|     Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation); |  | ||||||
|     /// Unpacks a binary value into a half float pair with a type format
 |  | ||||||
|     Node UnpackHalfFloat(Node value, Tegra::Shader::HalfType type); |  | ||||||
|     /// Merges a half pair into another value
 |  | ||||||
|     Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge); |  | ||||||
|     /// Conditionally absolute/negated half float pair. Absolute is applied first
 |  | ||||||
|     Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate); |  | ||||||
|     /// Conditionally saturates a half float pair
 |  | ||||||
|     Node GetSaturatedHalfFloat(Node value, bool saturate = true); |  | ||||||
| 
 |  | ||||||
|     /// Get image component value by type and size
 |  | ||||||
|     std::pair<Node, bool> GetComponentValue(Tegra::Texture::ComponentType component_type, |  | ||||||
|                                             u32 component_size, Node original_value); |  | ||||||
| 
 |  | ||||||
|     /// Returns a predicate comparing two floats
 |  | ||||||
|     Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); |  | ||||||
|     /// Returns a predicate comparing two integers
 |  | ||||||
|     Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed, |  | ||||||
|                                        Node op_a, Node op_b); |  | ||||||
|     /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared
 |  | ||||||
|     Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); |  | ||||||
| 
 |  | ||||||
|     /// Returns a predicate combiner operation
 |  | ||||||
|     OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); |  | ||||||
| 
 |  | ||||||
|     /// Queries the missing sampler info from the execution context.
 |  | ||||||
|     SamplerInfo GetSamplerInfo(SamplerInfo info, |  | ||||||
|                                std::optional<Tegra::Engines::SamplerDescriptor> sampler); |  | ||||||
| 
 |  | ||||||
|     /// Accesses a texture sampler.
 |  | ||||||
|     std::optional<SamplerEntry> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); |  | ||||||
| 
 |  | ||||||
|     /// Accesses a texture sampler for a bindless texture.
 |  | ||||||
|     std::optional<SamplerEntry> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, |  | ||||||
|                                                    Node& index_var); |  | ||||||
| 
 |  | ||||||
|     /// Accesses an image.
 |  | ||||||
|     ImageEntry& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); |  | ||||||
| 
 |  | ||||||
|     /// Access a bindless image sampler.
 |  | ||||||
|     ImageEntry& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); |  | ||||||
| 
 |  | ||||||
|     /// Extracts a sequence of bits from a node
 |  | ||||||
|     Node BitfieldExtract(Node value, u32 offset, u32 bits); |  | ||||||
| 
 |  | ||||||
|     /// Inserts a sequence of bits from a node
 |  | ||||||
|     Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits); |  | ||||||
| 
 |  | ||||||
|     /// Marks the usage of a input or output attribute.
 |  | ||||||
|     void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element); |  | ||||||
| 
 |  | ||||||
|     /// Decodes VMNMX instruction and inserts its code into the passed basic block.
 |  | ||||||
|     void DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr); |  | ||||||
| 
 |  | ||||||
|     void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, |  | ||||||
|                                   const Node4& components); |  | ||||||
| 
 |  | ||||||
|     void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, |  | ||||||
|                                    const Node4& components, bool ignore_mask = false); |  | ||||||
|     void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, |  | ||||||
|                                        const Node4& components, bool ignore_mask = false); |  | ||||||
| 
 |  | ||||||
|     Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |  | ||||||
|                      Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, |  | ||||||
|                      bool is_array, bool is_aoffi, |  | ||||||
|                      std::optional<Tegra::Shader::Register> bindless_reg); |  | ||||||
| 
 |  | ||||||
|     Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |  | ||||||
|                       Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, |  | ||||||
|                       bool is_array); |  | ||||||
| 
 |  | ||||||
|     Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |  | ||||||
|                       bool depth_compare, bool is_array, bool is_aoffi, bool is_ptp, |  | ||||||
|                       bool is_bindless); |  | ||||||
| 
 |  | ||||||
|     Node4 GetTldCode(Tegra::Shader::Instruction instr); |  | ||||||
| 
 |  | ||||||
|     Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |  | ||||||
|                       bool is_array); |  | ||||||
| 
 |  | ||||||
|     std::tuple<std::size_t, std::size_t> ValidateAndGetCoordinateElement( |  | ||||||
|         Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, |  | ||||||
|         bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); |  | ||||||
| 
 |  | ||||||
|     std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4); |  | ||||||
| 
 |  | ||||||
|     std::vector<Node> GetPtpCoordinates(std::array<Node, 2> ptp_regs); |  | ||||||
| 
 |  | ||||||
|     Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |  | ||||||
|                          Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, |  | ||||||
|                          Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi, |  | ||||||
|                          std::optional<Tegra::Shader::Register> bindless_reg); |  | ||||||
| 
 |  | ||||||
|     Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, |  | ||||||
|                          u64 byte_height); |  | ||||||
| 
 |  | ||||||
|     void WriteLogicOperation(NodeBlock& bb, Tegra::Shader::Register dest, |  | ||||||
|                              Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b, |  | ||||||
|                              Tegra::Shader::PredicateResultMode predicate_mode, |  | ||||||
|                              Tegra::Shader::Pred predicate, bool sets_cc); |  | ||||||
|     void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, |  | ||||||
|                               Node op_c, Node imm_lut, bool sets_cc); |  | ||||||
| 
 |  | ||||||
|     std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; |  | ||||||
| 
 |  | ||||||
|     std::pair<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code, |  | ||||||
|                                                        s64 cursor); |  | ||||||
| 
 |  | ||||||
|     std::pair<Node, TrackSampler> HandleBindlessIndirectRead(const CbufNode& cbuf, |  | ||||||
|                                                              const OperationNode& operation, |  | ||||||
|                                                              Node gpr, Node base_offset, |  | ||||||
|                                                              Node tracked, const NodeBlock& code, |  | ||||||
|                                                              s64 cursor); |  | ||||||
| 
 |  | ||||||
|     std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; |  | ||||||
| 
 |  | ||||||
|     std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, |  | ||||||
|                                        s64 cursor) const; |  | ||||||
| 
 |  | ||||||
|     std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb, |  | ||||||
|                                                                Tegra::Shader::Instruction instr, |  | ||||||
|                                                                bool is_read, bool is_write); |  | ||||||
| 
 |  | ||||||
|     /// Register new amending code and obtain the reference id.
 |  | ||||||
|     std::size_t DeclareAmend(Node new_amend); |  | ||||||
| 
 |  | ||||||
|     u32 NewCustomVariable(); |  | ||||||
| 
 |  | ||||||
|     const ProgramCode& program_code; |  | ||||||
|     const u32 main_offset; |  | ||||||
|     const CompilerSettings settings; |  | ||||||
|     Registry& registry; |  | ||||||
| 
 |  | ||||||
|     bool decompiled{}; |  | ||||||
|     bool disable_flow_stack{}; |  | ||||||
| 
 |  | ||||||
|     u32 coverage_begin{}; |  | ||||||
|     u32 coverage_end{}; |  | ||||||
| 
 |  | ||||||
|     std::map<u32, NodeBlock> basic_blocks; |  | ||||||
|     NodeBlock global_code; |  | ||||||
|     ASTManager program_manager{true, true}; |  | ||||||
|     std::vector<Node> amend_code; |  | ||||||
|     u32 num_custom_variables{}; |  | ||||||
| 
 |  | ||||||
|     std::set<u32> used_registers; |  | ||||||
|     std::set<Tegra::Shader::Pred> used_predicates; |  | ||||||
|     std::set<Tegra::Shader::Attribute::Index> used_input_attributes; |  | ||||||
|     std::set<Tegra::Shader::Attribute::Index> used_output_attributes; |  | ||||||
|     std::map<u32, ConstBuffer> used_cbufs; |  | ||||||
|     std::list<SamplerEntry> used_samplers; |  | ||||||
|     std::list<ImageEntry> used_images; |  | ||||||
|     std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; |  | ||||||
|     std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; |  | ||||||
|     bool uses_layer{}; |  | ||||||
|     bool uses_viewport_index{}; |  | ||||||
|     bool uses_point_size{}; |  | ||||||
|     bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes
 |  | ||||||
|     bool uses_instance_id{}; |  | ||||||
|     bool uses_vertex_id{}; |  | ||||||
|     bool uses_legacy_varyings{}; |  | ||||||
|     bool uses_y_negate{}; |  | ||||||
|     bool uses_warps{}; |  | ||||||
|     bool uses_indexed_samplers{}; |  | ||||||
| 
 |  | ||||||
|     Tegra::Shader::Header header; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,236 +0,0 @@ | ||||||
| // Copyright 2018 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include <algorithm> |  | ||||||
| #include <utility> |  | ||||||
| #include <variant> |  | ||||||
| 
 |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/shader/node.h" |  | ||||||
| #include "video_core/shader/node_helper.h" |  | ||||||
| #include "video_core/shader/shader_ir.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| namespace { |  | ||||||
| 
 |  | ||||||
| std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, |  | ||||||
|                                    OperationCode operation_code) { |  | ||||||
|     for (; cursor >= 0; --cursor) { |  | ||||||
|         Node node = code.at(cursor); |  | ||||||
| 
 |  | ||||||
|         if (const auto operation = std::get_if<OperationNode>(&*node)) { |  | ||||||
|             if (operation->GetCode() == operation_code) { |  | ||||||
|                 return {std::move(node), cursor}; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { |  | ||||||
|             const auto& conditional_code = conditional->GetCode(); |  | ||||||
|             auto result = FindOperation( |  | ||||||
|                 conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); |  | ||||||
|             auto& found = result.first; |  | ||||||
|             if (found) { |  | ||||||
|                 return {std::move(found), cursor}; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     return {}; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& operation) { |  | ||||||
|     if (operation.GetCode() != OperationCode::UAdd) { |  | ||||||
|         return std::nullopt; |  | ||||||
|     } |  | ||||||
|     Node gpr; |  | ||||||
|     Node offset; |  | ||||||
|     ASSERT(operation.GetOperandsCount() == 2); |  | ||||||
|     for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) { |  | ||||||
|         Node operand = operation[i]; |  | ||||||
|         if (std::holds_alternative<ImmediateNode>(*operand)) { |  | ||||||
|             offset = operation[i]; |  | ||||||
|         } else if (std::holds_alternative<GprNode>(*operand)) { |  | ||||||
|             gpr = operation[i]; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     if (offset && gpr) { |  | ||||||
|         return std::make_pair(gpr, offset); |  | ||||||
|     } |  | ||||||
|     return std::nullopt; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool AmendNodeCv(std::size_t amend_index, Node node) { |  | ||||||
|     if (const auto operation = std::get_if<OperationNode>(&*node)) { |  | ||||||
|         operation->SetAmendIndex(amend_index); |  | ||||||
|         return true; |  | ||||||
|     } |  | ||||||
|     if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { |  | ||||||
|         conditional->SetAmendIndex(amend_index); |  | ||||||
|         return true; |  | ||||||
|     } |  | ||||||
|     return false; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // Anonymous namespace
 |  | ||||||
| 
 |  | ||||||
| std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, |  | ||||||
|                                                              s64 cursor) { |  | ||||||
|     if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { |  | ||||||
|         const u32 cbuf_index = cbuf->GetIndex(); |  | ||||||
| 
 |  | ||||||
|         // Constant buffer found, test if it's an immediate
 |  | ||||||
|         const auto& offset = cbuf->GetOffset(); |  | ||||||
|         if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { |  | ||||||
|             auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf_index, immediate->GetValue()); |  | ||||||
|             return {tracked, track}; |  | ||||||
|         } |  | ||||||
|         if (const auto operation = std::get_if<OperationNode>(&*offset)) { |  | ||||||
|             const u32 bound_buffer = registry.GetBoundBuffer(); |  | ||||||
|             if (bound_buffer != cbuf_index) { |  | ||||||
|                 return {}; |  | ||||||
|             } |  | ||||||
|             if (const std::optional pair = DecoupleIndirectRead(*operation)) { |  | ||||||
|                 auto [gpr, base_offset] = *pair; |  | ||||||
|                 return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked, |  | ||||||
|                                                   code, cursor); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|         return {}; |  | ||||||
|     } |  | ||||||
|     if (const auto gpr = std::get_if<GprNode>(&*tracked)) { |  | ||||||
|         if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { |  | ||||||
|             return {}; |  | ||||||
|         } |  | ||||||
|         // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
 |  | ||||||
|         // register that it uses as operand
 |  | ||||||
|         const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); |  | ||||||
|         if (!source) { |  | ||||||
|             return {}; |  | ||||||
|         } |  | ||||||
|         return TrackBindlessSampler(source, code, new_cursor); |  | ||||||
|     } |  | ||||||
|     if (const auto operation = std::get_if<OperationNode>(&*tracked)) { |  | ||||||
|         const OperationNode& op = *operation; |  | ||||||
| 
 |  | ||||||
|         const OperationCode opcode = operation->GetCode(); |  | ||||||
|         if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) { |  | ||||||
|             ASSERT(op.GetOperandsCount() == 2); |  | ||||||
|             auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor); |  | ||||||
|             auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor); |  | ||||||
|             if (node_a && node_b) { |  | ||||||
|                 auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b}, |  | ||||||
|                                                                    std::pair{offset_a, offset_b}); |  | ||||||
|                 return {tracked, std::move(track)}; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|         std::size_t i = op.GetOperandsCount(); |  | ||||||
|         while (i--) { |  | ||||||
|             if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) { |  | ||||||
|                 // Constant buffer found in operand.
 |  | ||||||
|                 return found; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|         return {}; |  | ||||||
|     } |  | ||||||
|     if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { |  | ||||||
|         const auto& conditional_code = conditional->GetCode(); |  | ||||||
|         return TrackBindlessSampler(tracked, conditional_code, |  | ||||||
|                                     static_cast<s64>(conditional_code.size())); |  | ||||||
|     } |  | ||||||
|     return {}; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::pair<Node, TrackSampler> ShaderIR::HandleBindlessIndirectRead( |  | ||||||
|     const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked, |  | ||||||
|     const NodeBlock& code, s64 cursor) { |  | ||||||
|     const auto offset_imm = std::get<ImmediateNode>(*base_offset); |  | ||||||
|     const auto& gpu_driver = registry.AccessGuestDriverProfile(); |  | ||||||
|     const u32 bindless_cv = NewCustomVariable(); |  | ||||||
|     const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize(); |  | ||||||
|     Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size)); |  | ||||||
| 
 |  | ||||||
|     Node cv_node = GetCustomVariable(bindless_cv); |  | ||||||
|     Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op)); |  | ||||||
|     const std::size_t amend_index = DeclareAmend(std::move(amend_op)); |  | ||||||
|     AmendNodeCv(amend_index, code[cursor]); |  | ||||||
| 
 |  | ||||||
|     // TODO: Implement bindless index custom variable
 |  | ||||||
|     auto track = |  | ||||||
|         MakeTrackSampler<ArraySamplerNode>(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv); |  | ||||||
|     return {tracked, track}; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, |  | ||||||
|                                                s64 cursor) const { |  | ||||||
|     if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { |  | ||||||
|         // Constant buffer found, test if it's an immediate
 |  | ||||||
|         const auto& offset = cbuf->GetOffset(); |  | ||||||
|         if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { |  | ||||||
|             return {tracked, cbuf->GetIndex(), immediate->GetValue()}; |  | ||||||
|         } |  | ||||||
|         return {}; |  | ||||||
|     } |  | ||||||
|     if (const auto gpr = std::get_if<GprNode>(&*tracked)) { |  | ||||||
|         if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { |  | ||||||
|             return {}; |  | ||||||
|         } |  | ||||||
|         // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
 |  | ||||||
|         // register that it uses as operand
 |  | ||||||
|         const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); |  | ||||||
|         if (!source) { |  | ||||||
|             return {}; |  | ||||||
|         } |  | ||||||
|         return TrackCbuf(source, code, new_cursor); |  | ||||||
|     } |  | ||||||
|     if (const auto operation = std::get_if<OperationNode>(&*tracked)) { |  | ||||||
|         for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { |  | ||||||
|             if (auto found = TrackCbuf((*operation)[i - 1], code, cursor); std::get<0>(found)) { |  | ||||||
|                 // Cbuf found in operand.
 |  | ||||||
|                 return found; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|         return {}; |  | ||||||
|     } |  | ||||||
|     if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { |  | ||||||
|         const auto& conditional_code = conditional->GetCode(); |  | ||||||
|         return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size())); |  | ||||||
|     } |  | ||||||
|     return {}; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { |  | ||||||
|     // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register
 |  | ||||||
|     // that it uses as operand
 |  | ||||||
|     const auto result = TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1); |  | ||||||
|     const auto& found = result.first; |  | ||||||
|     if (!found) { |  | ||||||
|         return std::nullopt; |  | ||||||
|     } |  | ||||||
|     if (const auto immediate = std::get_if<ImmediateNode>(&*found)) { |  | ||||||
|         return immediate->GetValue(); |  | ||||||
|     } |  | ||||||
|     return std::nullopt; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, |  | ||||||
|                                              s64 cursor) const { |  | ||||||
|     for (; cursor >= 0; --cursor) { |  | ||||||
|         const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign); |  | ||||||
|         if (!found_node) { |  | ||||||
|             return {}; |  | ||||||
|         } |  | ||||||
|         const auto operation = std::get_if<OperationNode>(&*found_node); |  | ||||||
|         ASSERT(operation); |  | ||||||
| 
 |  | ||||||
|         const auto& target = (*operation)[0]; |  | ||||||
|         if (const auto gpr_target = std::get_if<GprNode>(&*target)) { |  | ||||||
|             if (gpr_target->GetIndex() == tracked->GetIndex()) { |  | ||||||
|                 return {(*operation)[1], new_cursor}; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     return {}; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,115 +0,0 @@ | ||||||
| // Copyright 2020 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #include <algorithm> |  | ||||||
| #include <array> |  | ||||||
| #include <unordered_map> |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/engines/maxwell_3d.h" |  | ||||||
| #include "video_core/shader/registry.h" |  | ||||||
| #include "video_core/shader/transform_feedback.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| namespace { |  | ||||||
| 
 |  | ||||||
| using Maxwell = Tegra::Engines::Maxwell3D::Regs; |  | ||||||
| 
 |  | ||||||
| // TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20
 |  | ||||||
| 
 |  | ||||||
| /// Attribute offsets that describe a vector
 |  | ||||||
| constexpr std::array VECTORS = { |  | ||||||
|     28,  // gl_Position
 |  | ||||||
|     32,  // Generic 0
 |  | ||||||
|     36,  // Generic 1
 |  | ||||||
|     40,  // Generic 2
 |  | ||||||
|     44,  // Generic 3
 |  | ||||||
|     48,  // Generic 4
 |  | ||||||
|     52,  // Generic 5
 |  | ||||||
|     56,  // Generic 6
 |  | ||||||
|     60,  // Generic 7
 |  | ||||||
|     64,  // Generic 8
 |  | ||||||
|     68,  // Generic 9
 |  | ||||||
|     72,  // Generic 10
 |  | ||||||
|     76,  // Generic 11
 |  | ||||||
|     80,  // Generic 12
 |  | ||||||
|     84,  // Generic 13
 |  | ||||||
|     88,  // Generic 14
 |  | ||||||
|     92,  // Generic 15
 |  | ||||||
|     96,  // Generic 16
 |  | ||||||
|     100, // Generic 17
 |  | ||||||
|     104, // Generic 18
 |  | ||||||
|     108, // Generic 19
 |  | ||||||
|     112, // Generic 20
 |  | ||||||
|     116, // Generic 21
 |  | ||||||
|     120, // Generic 22
 |  | ||||||
|     124, // Generic 23
 |  | ||||||
|     128, // Generic 24
 |  | ||||||
|     132, // Generic 25
 |  | ||||||
|     136, // Generic 26
 |  | ||||||
|     140, // Generic 27
 |  | ||||||
|     144, // Generic 28
 |  | ||||||
|     148, // Generic 29
 |  | ||||||
|     152, // Generic 30
 |  | ||||||
|     156, // Generic 31
 |  | ||||||
|     160, // gl_FrontColor
 |  | ||||||
|     164, // gl_FrontSecondaryColor
 |  | ||||||
|     160, // gl_BackColor
 |  | ||||||
|     164, // gl_BackSecondaryColor
 |  | ||||||
|     192, // gl_TexCoord[0]
 |  | ||||||
|     196, // gl_TexCoord[1]
 |  | ||||||
|     200, // gl_TexCoord[2]
 |  | ||||||
|     204, // gl_TexCoord[3]
 |  | ||||||
|     208, // gl_TexCoord[4]
 |  | ||||||
|     212, // gl_TexCoord[5]
 |  | ||||||
|     216, // gl_TexCoord[6]
 |  | ||||||
|     220, // gl_TexCoord[7]
 |  | ||||||
| }; |  | ||||||
| } // namespace
 |  | ||||||
| 
 |  | ||||||
| std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) { |  | ||||||
| 
 |  | ||||||
|     std::unordered_map<u8, VaryingTFB> tfb; |  | ||||||
| 
 |  | ||||||
|     for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { |  | ||||||
|         const auto& locations = info.tfb_varying_locs[buffer]; |  | ||||||
|         const auto& layout = info.tfb_layouts[buffer]; |  | ||||||
|         const std::size_t varying_count = layout.varying_count; |  | ||||||
| 
 |  | ||||||
|         std::size_t highest = 0; |  | ||||||
| 
 |  | ||||||
|         for (std::size_t offset = 0; offset < varying_count; ++offset) { |  | ||||||
|             const std::size_t base_offset = offset; |  | ||||||
|             const u8 location = locations[offset]; |  | ||||||
| 
 |  | ||||||
|             VaryingTFB varying; |  | ||||||
|             varying.buffer = layout.stream; |  | ||||||
|             varying.stride = layout.stride; |  | ||||||
|             varying.offset = offset * sizeof(u32); |  | ||||||
|             varying.components = 1; |  | ||||||
| 
 |  | ||||||
|             if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) { |  | ||||||
|                 UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); |  | ||||||
| 
 |  | ||||||
|                 const u8 base_index = location / 4; |  | ||||||
|                 while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { |  | ||||||
|                     ++offset; |  | ||||||
|                     ++varying.components; |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second; |  | ||||||
|             UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored"); |  | ||||||
| 
 |  | ||||||
|             highest = std::max(highest, (base_offset + varying.components) * sizeof(u32)); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         UNIMPLEMENTED_IF(highest != layout.stride); |  | ||||||
|     } |  | ||||||
|     return tfb; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
|  | @ -1,23 +0,0 @@ | ||||||
| // Copyright 2020 yuzu Emulator Project
 |  | ||||||
| // Licensed under GPLv2 or any later version
 |  | ||||||
| // Refer to the license.txt file included.
 |  | ||||||
| 
 |  | ||||||
| #pragma once |  | ||||||
| 
 |  | ||||||
| #include <unordered_map> |  | ||||||
| 
 |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/shader/registry.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| 
 |  | ||||||
| struct VaryingTFB { |  | ||||||
|     std::size_t buffer; |  | ||||||
|     std::size_t stride; |  | ||||||
|     std::size_t offset; |  | ||||||
|     std::size_t components; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info); |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCommon::Shader
 |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 ReinUsesLisp
						ReinUsesLisp