forked from eden-emu/eden
		
	async shaders
This commit is contained in:
		
							parent
							
								
									c783cf443e
								
							
						
					
					
						commit
						468bd9c1b0
					
				
					 16 changed files with 598 additions and 64 deletions
				
			
		|  | @ -98,6 +98,8 @@ add_library(video_core STATIC | ||||||
|     sampler_cache.cpp |     sampler_cache.cpp | ||||||
|     sampler_cache.h |     sampler_cache.h | ||||||
|     shader_cache.h |     shader_cache.h | ||||||
|  |     shader_notify.cpp | ||||||
|  |     shader_notify.h | ||||||
|     shader/decode/arithmetic.cpp |     shader/decode/arithmetic.cpp | ||||||
|     shader/decode/arithmetic_immediate.cpp |     shader/decode/arithmetic_immediate.cpp | ||||||
|     shader/decode/bfe.cpp |     shader/decode/bfe.cpp | ||||||
|  | @ -128,6 +130,8 @@ add_library(video_core STATIC | ||||||
|     shader/decode/other.cpp |     shader/decode/other.cpp | ||||||
|     shader/ast.cpp |     shader/ast.cpp | ||||||
|     shader/ast.h |     shader/ast.h | ||||||
|  |     shader/async_shaders.cpp | ||||||
|  |     shader/async_shaders.h | ||||||
|     shader/compiler_settings.cpp |     shader/compiler_settings.cpp | ||||||
|     shader/compiler_settings.h |     shader/compiler_settings.h | ||||||
|     shader/control_flow.cpp |     shader/control_flow.cpp | ||||||
|  |  | ||||||
|  | @ -20,6 +20,7 @@ | ||||||
| #include "video_core/gpu.h" | #include "video_core/gpu.h" | ||||||
| #include "video_core/memory_manager.h" | #include "video_core/memory_manager.h" | ||||||
| #include "video_core/renderer_base.h" | #include "video_core/renderer_base.h" | ||||||
|  | #include "video_core/shader_notify.h" | ||||||
| #include "video_core/video_core.h" | #include "video_core/video_core.h" | ||||||
| 
 | 
 | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
|  | @ -36,6 +37,7 @@ GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& render | ||||||
|     kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); |     kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); | ||||||
|     maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager); |     maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager); | ||||||
|     kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); |     kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); | ||||||
|  |     shader_notify = std::make_unique<VideoCore::ShaderNotify>(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| GPU::~GPU() = default; | GPU::~GPU() = default; | ||||||
|  |  | ||||||
|  | @ -33,6 +33,7 @@ class System; | ||||||
| 
 | 
 | ||||||
| namespace VideoCore { | namespace VideoCore { | ||||||
| class RendererBase; | class RendererBase; | ||||||
|  | class ShaderNotify; | ||||||
| } // namespace VideoCore
 | } // namespace VideoCore
 | ||||||
| 
 | 
 | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
|  | @ -207,6 +208,14 @@ public: | ||||||
|         return *renderer; |         return *renderer; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     VideoCore::ShaderNotify& ShaderNotify() { | ||||||
|  |         return *shader_notify; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     const VideoCore::ShaderNotify& ShaderNotify() const { | ||||||
|  |         return *shader_notify; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     // Waits for the GPU to finish working
 |     // Waits for the GPU to finish working
 | ||||||
|     virtual void WaitIdle() const = 0; |     virtual void WaitIdle() const = 0; | ||||||
| 
 | 
 | ||||||
|  | @ -347,6 +356,8 @@ private: | ||||||
|     std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; |     std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; | ||||||
|     /// Inline memory engine
 |     /// Inline memory engine
 | ||||||
|     std::unique_ptr<Engines::KeplerMemory> kepler_memory; |     std::unique_ptr<Engines::KeplerMemory> kepler_memory; | ||||||
|  |     /// Shader build notifier
 | ||||||
|  |     std::unique_ptr<VideoCore::ShaderNotify> shader_notify; | ||||||
| 
 | 
 | ||||||
|     std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{}; |     std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{}; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -233,6 +233,8 @@ Device::Device() | ||||||
|                            GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && |                            GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && | ||||||
|                            GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; |                            GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; | ||||||
| 
 | 
 | ||||||
|  |     use_asynchronous_shaders = Settings::values.use_asynchronous_shaders; | ||||||
|  | 
 | ||||||
|     LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); |     LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); | ||||||
|     LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); |     LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); | ||||||
|     LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); |     LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); | ||||||
|  |  | ||||||
|  | @ -104,6 +104,10 @@ public: | ||||||
|         return use_assembly_shaders; |         return use_assembly_shaders; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     bool UseAsynchronousShaders() const { | ||||||
|  |         return use_asynchronous_shaders; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
| private: | private: | ||||||
|     static bool TestVariableAoffi(); |     static bool TestVariableAoffi(); | ||||||
|     static bool TestPreciseBug(); |     static bool TestPreciseBug(); | ||||||
|  | @ -127,6 +131,7 @@ private: | ||||||
|     bool has_fast_buffer_sub_data{}; |     bool has_fast_buffer_sub_data{}; | ||||||
|     bool has_nv_viewport_array2{}; |     bool has_nv_viewport_array2{}; | ||||||
|     bool use_assembly_shaders{}; |     bool use_assembly_shaders{}; | ||||||
|  |     bool use_asynchronous_shaders{}; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| } // namespace OpenGL
 | } // namespace OpenGL
 | ||||||
|  |  | ||||||
|  | @ -149,7 +149,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind | ||||||
|       shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, |       shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, | ||||||
|       buffer_cache{*this, system, device, STREAM_BUFFER_SIZE}, |       buffer_cache{*this, system, device, STREAM_BUFFER_SIZE}, | ||||||
|       fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system}, |       fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system}, | ||||||
|       screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} { |       screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker}, | ||||||
|  |       async_shaders{emu_window} { | ||||||
|     CheckExtensions(); |     CheckExtensions(); | ||||||
| 
 | 
 | ||||||
|     unified_uniform_buffer.Create(); |     unified_uniform_buffer.Create(); | ||||||
|  | @ -162,6 +163,23 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind | ||||||
|                                  nullptr, 0); |                                  nullptr, 0); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  | 
 | ||||||
|  |     if (device.UseAsynchronousShaders()) { | ||||||
|  |         // Max worker threads we should allow
 | ||||||
|  |         constexpr auto MAX_THREADS = 8u; | ||||||
|  |         // Amount of threads we should reserve for other parts of yuzu
 | ||||||
|  |         constexpr auto RESERVED_THREADS = 6u; | ||||||
|  |         // Get the amount of threads we can use(this can return zero)
 | ||||||
|  |         const auto cpu_thread_count = | ||||||
|  |             std::max(RESERVED_THREADS, std::thread::hardware_concurrency()); | ||||||
|  |         // Deduce how many "extra" threads we have to use.
 | ||||||
|  |         const auto max_threads_unused = cpu_thread_count - RESERVED_THREADS; | ||||||
|  |         // Always allow at least 1 thread regardless of our settings
 | ||||||
|  |         const auto max_worker_count = std::max(1u, max_threads_unused); | ||||||
|  |         // Don't use more than MAX_THREADS
 | ||||||
|  |         const auto worker_count = std::min(max_worker_count, MAX_THREADS); | ||||||
|  |         async_shaders.AllocateWorkers(worker_count); | ||||||
|  |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| RasterizerOpenGL::~RasterizerOpenGL() { | RasterizerOpenGL::~RasterizerOpenGL() { | ||||||
|  | @ -336,7 +354,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | ||||||
|             continue; |             continue; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         Shader* const shader = shader_cache.GetStageProgram(program); |         Shader* shader = shader_cache.GetStageProgram(program, async_shaders); | ||||||
| 
 | 
 | ||||||
|         if (device.UseAssemblyShaders()) { |         if (device.UseAssemblyShaders()) { | ||||||
|             // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this
 |             // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this
 | ||||||
|  | @ -353,7 +371,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | ||||||
|         SetupDrawTextures(stage, shader); |         SetupDrawTextures(stage, shader); | ||||||
|         SetupDrawImages(stage, shader); |         SetupDrawImages(stage, shader); | ||||||
| 
 | 
 | ||||||
|         const GLuint program_handle = shader->GetHandle(); |         const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; | ||||||
|         switch (program) { |         switch (program) { | ||||||
|         case Maxwell::ShaderProgram::VertexA: |         case Maxwell::ShaderProgram::VertexA: | ||||||
|         case Maxwell::ShaderProgram::VertexB: |         case Maxwell::ShaderProgram::VertexB: | ||||||
|  |  | ||||||
|  | @ -33,6 +33,7 @@ | ||||||
| #include "video_core/renderer_opengl/gl_state_tracker.h" | #include "video_core/renderer_opengl/gl_state_tracker.h" | ||||||
| #include "video_core/renderer_opengl/gl_texture_cache.h" | #include "video_core/renderer_opengl/gl_texture_cache.h" | ||||||
| #include "video_core/renderer_opengl/utils.h" | #include "video_core/renderer_opengl/utils.h" | ||||||
|  | #include "video_core/shader/async_shaders.h" | ||||||
| #include "video_core/textures/texture.h" | #include "video_core/textures/texture.h" | ||||||
| 
 | 
 | ||||||
| namespace Core { | namespace Core { | ||||||
|  | @ -91,6 +92,14 @@ public: | ||||||
|         return num_queued_commands > 0; |         return num_queued_commands > 0; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { | ||||||
|  |         return async_shaders; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { | ||||||
|  |         return async_shaders; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
| private: | private: | ||||||
|     /// Configures the color and depth framebuffer states.
 |     /// Configures the color and depth framebuffer states.
 | ||||||
|     void ConfigureFramebuffers(); |     void ConfigureFramebuffers(); | ||||||
|  | @ -242,6 +251,7 @@ private: | ||||||
|     ScreenInfo& screen_info; |     ScreenInfo& screen_info; | ||||||
|     ProgramManager& program_manager; |     ProgramManager& program_manager; | ||||||
|     StateTracker& state_tracker; |     StateTracker& state_tracker; | ||||||
|  |     VideoCommon::Shader::AsyncShaders async_shaders; | ||||||
| 
 | 
 | ||||||
|     static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; |     static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -177,6 +177,12 @@ public: | ||||||
|         Release(); |         Release(); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     OGLAssemblyProgram& operator=(OGLAssemblyProgram&& o) noexcept { | ||||||
|  |         Release(); | ||||||
|  |         handle = std::exchange(o.handle, 0); | ||||||
|  |         return *this; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     /// Deletes the internal OpenGL resource
 |     /// Deletes the internal OpenGL resource
 | ||||||
|     void Release(); |     void Release(); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -31,6 +31,7 @@ | ||||||
| #include "video_core/shader/registry.h" | #include "video_core/shader/registry.h" | ||||||
| #include "video_core/shader/shader_ir.h" | #include "video_core/shader/shader_ir.h" | ||||||
| #include "video_core/shader_cache.h" | #include "video_core/shader_cache.h" | ||||||
|  | #include "video_core/shader_notify.h" | ||||||
| 
 | 
 | ||||||
| namespace OpenGL { | namespace OpenGL { | ||||||
| 
 | 
 | ||||||
|  | @ -140,9 +141,24 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { | ||||||
|     return registry; |     return registry; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | std::unordered_set<GLenum> GetSupportedFormats() { | ||||||
|  |     GLint num_formats; | ||||||
|  |     glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); | ||||||
|  | 
 | ||||||
|  |     std::vector<GLint> formats(num_formats); | ||||||
|  |     glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); | ||||||
|  | 
 | ||||||
|  |     std::unordered_set<GLenum> supported_formats; | ||||||
|  |     for (const GLint format : formats) { | ||||||
|  |         supported_formats.insert(static_cast<GLenum>(format)); | ||||||
|  |     } | ||||||
|  |     return supported_formats; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | } // Anonymous namespace
 | ||||||
|  | 
 | ||||||
| ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier, | ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier, | ||||||
|                              const ShaderIR& ir, const Registry& registry, |                              const ShaderIR& ir, const Registry& registry, bool hint_retrievable) { | ||||||
|                              bool hint_retrievable = false) { |  | ||||||
|     const std::string shader_id = MakeShaderID(unique_identifier, shader_type); |     const std::string shader_id = MakeShaderID(unique_identifier, shader_type); | ||||||
|     LOG_INFO(Render_OpenGL, "{}", shader_id); |     LOG_INFO(Render_OpenGL, "{}", shader_id); | ||||||
| 
 | 
 | ||||||
|  | @ -181,30 +197,17 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u | ||||||
|     return program; |     return program; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| std::unordered_set<GLenum> GetSupportedFormats() { |  | ||||||
|     GLint num_formats; |  | ||||||
|     glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); |  | ||||||
| 
 |  | ||||||
|     std::vector<GLint> formats(num_formats); |  | ||||||
|     glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); |  | ||||||
| 
 |  | ||||||
|     std::unordered_set<GLenum> supported_formats; |  | ||||||
|     for (const GLint format : formats) { |  | ||||||
|         supported_formats.insert(static_cast<GLenum>(format)); |  | ||||||
|     } |  | ||||||
|     return supported_formats; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // Anonymous namespace
 |  | ||||||
| 
 |  | ||||||
| Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_, | Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_, | ||||||
|                ProgramSharedPtr program_) |                ProgramSharedPtr program_, bool is_built) | ||||||
|     : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)} { |     : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)}, | ||||||
|  |       is_built(is_built) { | ||||||
|     handle = program->assembly_program.handle; |     handle = program->assembly_program.handle; | ||||||
|     if (handle == 0) { |     if (handle == 0) { | ||||||
|         handle = program->source_program.handle; |         handle = program->source_program.handle; | ||||||
|     } |     } | ||||||
|  |     if (is_built) { | ||||||
|         ASSERT(handle != 0); |         ASSERT(handle != 0); | ||||||
|  |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| Shader::~Shader() = default; | Shader::~Shader() = default; | ||||||
|  | @ -214,21 +217,42 @@ GLuint Shader::GetHandle() const { | ||||||
|     return handle; |     return handle; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| std::unique_ptr<Shader> Shader::CreateStageFromMemory(const ShaderParameters& params, | bool Shader::IsBuilt() const { | ||||||
|                                                       Maxwell::ShaderProgram program_type, |     return is_built; | ||||||
|                                                       ProgramCode code, ProgramCode code_b) { | } | ||||||
|  | 
 | ||||||
|  | void Shader::AsyncOpenGLBuilt(OGLProgram new_program) { | ||||||
|  |     program->source_program = std::move(new_program); | ||||||
|  |     handle = program->source_program.handle; | ||||||
|  |     is_built = true; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) { | ||||||
|  |     program->assembly_program = std::move(new_program); | ||||||
|  |     handle = program->assembly_program.handle; | ||||||
|  |     is_built = true; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | std::unique_ptr<Shader> Shader::CreateStageFromMemory( | ||||||
|  |     const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code, | ||||||
|  |     ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) { | ||||||
|     const auto shader_type = GetShaderType(program_type); |     const auto shader_type = GetShaderType(program_type); | ||||||
|     const std::size_t size_in_bytes = code.size() * sizeof(u64); |     const std::size_t size_in_bytes = code.size() * sizeof(u64); | ||||||
| 
 | 
 | ||||||
|     auto registry = std::make_shared<Registry>(shader_type, params.system.GPU().Maxwell3D()); |     auto& gpu = params.system.GPU(); | ||||||
|  |     gpu.ShaderNotify().MarkSharderBuilding(); | ||||||
|  | 
 | ||||||
|  |     auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D()); | ||||||
|  |     if (!async_shaders.IsShaderAsync(params.system.GPU()) || | ||||||
|  |         !params.device.UseAsynchronousShaders()) { | ||||||
|         const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); |         const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); | ||||||
|         // TODO(Rodrigo): Handle VertexA shaders
 |         // TODO(Rodrigo): Handle VertexA shaders
 | ||||||
|         // std::optional<ShaderIR> ir_b;
 |         // std::optional<ShaderIR> ir_b;
 | ||||||
|         // if (!code_b.empty()) {
 |         // if (!code_b.empty()) {
 | ||||||
|         //     ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
 |         //     ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
 | ||||||
|         // }
 |         // }
 | ||||||
|     auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); |         auto program = | ||||||
| 
 |             BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); | ||||||
|         ShaderDiskCacheEntry entry; |         ShaderDiskCacheEntry entry; | ||||||
|         entry.type = shader_type; |         entry.type = shader_type; | ||||||
|         entry.code = std::move(code); |         entry.code = std::move(code); | ||||||
|  | @ -241,15 +265,34 @@ std::unique_ptr<Shader> Shader::CreateStageFromMemory(const ShaderParameters& pa | ||||||
|         entry.bindless_samplers = registry->GetBindlessSamplers(); |         entry.bindless_samplers = registry->GetBindlessSamplers(); | ||||||
|         params.disk_cache.SaveEntry(std::move(entry)); |         params.disk_cache.SaveEntry(std::move(entry)); | ||||||
| 
 | 
 | ||||||
|     return std::unique_ptr<Shader>(new Shader( |         gpu.ShaderNotify().MarkShaderComplete(); | ||||||
|         std::move(registry), MakeEntries(params.device, ir, shader_type), std::move(program))); | 
 | ||||||
|  |         return std::unique_ptr<Shader>(new Shader(std::move(registry), | ||||||
|  |                                                   MakeEntries(params.device, ir, shader_type), | ||||||
|  |                                                   std::move(program), true)); | ||||||
|  |     } else { | ||||||
|  |         // Required for entries
 | ||||||
|  |         const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); | ||||||
|  |         auto entries = MakeEntries(params.device, ir, shader_type); | ||||||
|  | 
 | ||||||
|  |         async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier, | ||||||
|  |                                         std::move(code), std::move(code_b), STAGE_MAIN_OFFSET, | ||||||
|  |                                         COMPILER_SETTINGS, *registry, cpu_addr); | ||||||
|  | 
 | ||||||
|  |         auto program = std::make_shared<ProgramHandle>(); | ||||||
|  |         return std::unique_ptr<Shader>( | ||||||
|  |             new Shader(std::move(registry), std::move(entries), std::move(program), false)); | ||||||
|  |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params, | std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params, | ||||||
|                                                        ProgramCode code) { |                                                        ProgramCode code) { | ||||||
|     const std::size_t size_in_bytes = code.size() * sizeof(u64); |     const std::size_t size_in_bytes = code.size() * sizeof(u64); | ||||||
| 
 | 
 | ||||||
|     auto& engine = params.system.GPU().KeplerCompute(); |     auto& gpu = params.system.GPU(); | ||||||
|  |     gpu.ShaderNotify().MarkSharderBuilding(); | ||||||
|  | 
 | ||||||
|  |     auto& engine = gpu.KeplerCompute(); | ||||||
|     auto registry = std::make_shared<Registry>(ShaderType::Compute, engine); |     auto registry = std::make_shared<Registry>(ShaderType::Compute, engine); | ||||||
|     const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); |     const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); | ||||||
|     const u64 uid = params.unique_identifier; |     const u64 uid = params.unique_identifier; | ||||||
|  | @ -266,6 +309,8 @@ std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& p | ||||||
|     entry.bindless_samplers = registry->GetBindlessSamplers(); |     entry.bindless_samplers = registry->GetBindlessSamplers(); | ||||||
|     params.disk_cache.SaveEntry(std::move(entry)); |     params.disk_cache.SaveEntry(std::move(entry)); | ||||||
| 
 | 
 | ||||||
|  |     gpu.ShaderNotify().MarkShaderComplete(); | ||||||
|  | 
 | ||||||
|     return std::unique_ptr<Shader>(new Shader(std::move(registry), |     return std::unique_ptr<Shader>(new Shader(std::move(registry), | ||||||
|                                               MakeEntries(params.device, ir, ShaderType::Compute), |                                               MakeEntries(params.device, ir, ShaderType::Compute), | ||||||
|                                               std::move(program))); |                                               std::move(program))); | ||||||
|  | @ -436,14 +481,51 @@ ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram( | ||||||
|     return program; |     return program; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program, | ||||||
|  |                                            VideoCommon::Shader::AsyncShaders& async_shaders) { | ||||||
|     if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) { |     if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) { | ||||||
|         return last_shaders[static_cast<std::size_t>(program)]; |         auto* last_shader = last_shaders[static_cast<std::size_t>(program)]; | ||||||
|  |         if (last_shader->IsBuilt()) { | ||||||
|  |             return last_shader; | ||||||
|  |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     auto& memory_manager{system.GPU().MemoryManager()}; |     auto& memory_manager{system.GPU().MemoryManager()}; | ||||||
|     const GPUVAddr address{GetShaderAddress(system, program)}; |     const GPUVAddr address{GetShaderAddress(system, program)}; | ||||||
| 
 | 
 | ||||||
|  |     if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) { | ||||||
|  |         auto completed_work = async_shaders.GetCompletedWork(); | ||||||
|  |         for (auto& work : completed_work) { | ||||||
|  |             Shader* shader = TryGet(work.cpu_address); | ||||||
|  |             auto& gpu = system.GPU(); | ||||||
|  |             gpu.ShaderNotify().MarkShaderComplete(); | ||||||
|  |             if (shader == nullptr) { | ||||||
|  |                 continue; | ||||||
|  |             } | ||||||
|  |             using namespace VideoCommon::Shader; | ||||||
|  |             if (work.backend == AsyncShaders::Backend::OpenGL) { | ||||||
|  |                 shader->AsyncOpenGLBuilt(std::move(work.program.opengl)); | ||||||
|  |             } else if (work.backend == AsyncShaders::Backend::GLASM) { | ||||||
|  |                 shader->AsyncGLASMBuilt(std::move(work.program.glasm)); | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             ShaderDiskCacheEntry entry; | ||||||
|  |             entry.type = work.shader_type; | ||||||
|  |             entry.code = std::move(work.code); | ||||||
|  |             entry.code_b = std::move(work.code_b); | ||||||
|  |             entry.unique_identifier = work.uid; | ||||||
|  | 
 | ||||||
|  |             auto& registry = shader->GetRegistry(); | ||||||
|  | 
 | ||||||
|  |             entry.bound_buffer = registry.GetBoundBuffer(); | ||||||
|  |             entry.graphics_info = registry.GetGraphicsInfo(); | ||||||
|  |             entry.keys = registry.GetKeys(); | ||||||
|  |             entry.bound_samplers = registry.GetBoundSamplers(); | ||||||
|  |             entry.bindless_samplers = registry.GetBindlessSamplers(); | ||||||
|  |             disk_cache.SaveEntry(std::move(entry)); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     // Look up shader in the cache based on address
 |     // Look up shader in the cache based on address
 | ||||||
|     const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; |     const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; | ||||||
|     if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) { |     if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) { | ||||||
|  | @ -471,7 +553,8 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | ||||||
|     std::unique_ptr<Shader> shader; |     std::unique_ptr<Shader> shader; | ||||||
|     const auto found = runtime_cache.find(unique_identifier); |     const auto found = runtime_cache.find(unique_identifier); | ||||||
|     if (found == runtime_cache.end()) { |     if (found == runtime_cache.end()) { | ||||||
|         shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b)); |         shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b), | ||||||
|  |                                                async_shaders, cpu_addr.value_or(0)); | ||||||
|     } else { |     } else { | ||||||
|         shader = Shader::CreateFromCache(params, found->second); |         shader = Shader::CreateFromCache(params, found->second); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  | @ -33,6 +33,10 @@ namespace Core::Frontend { | ||||||
| class EmuWindow; | class EmuWindow; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | namespace VideoCommon::Shader { | ||||||
|  | class AsyncShaders; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| namespace OpenGL { | namespace OpenGL { | ||||||
| 
 | 
 | ||||||
| class Device; | class Device; | ||||||
|  | @ -61,6 +65,11 @@ struct ShaderParameters { | ||||||
|     u64 unique_identifier; |     u64 unique_identifier; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type, | ||||||
|  |                              u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir, | ||||||
|  |                              const VideoCommon::Shader::Registry& registry, | ||||||
|  |                              bool hint_retrievable = false); | ||||||
|  | 
 | ||||||
| class Shader final { | class Shader final { | ||||||
| public: | public: | ||||||
|     ~Shader(); |     ~Shader(); | ||||||
|  | @ -68,15 +77,28 @@ public: | ||||||
|     /// Gets the GL program handle for the shader
 |     /// Gets the GL program handle for the shader
 | ||||||
|     GLuint GetHandle() const; |     GLuint GetHandle() const; | ||||||
| 
 | 
 | ||||||
|  |     bool IsBuilt() const; | ||||||
|  | 
 | ||||||
|     /// Gets the shader entries for the shader
 |     /// Gets the shader entries for the shader
 | ||||||
|     const ShaderEntries& GetEntries() const { |     const ShaderEntries& GetEntries() const { | ||||||
|         return entries; |         return entries; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     static std::unique_ptr<Shader> CreateStageFromMemory(const ShaderParameters& params, |     const VideoCommon::Shader::Registry& GetRegistry() const { | ||||||
|                                                          Maxwell::ShaderProgram program_type, |         return *registry; | ||||||
|                                                          ProgramCode program_code, |     } | ||||||
|                                                          ProgramCode program_code_b); | 
 | ||||||
|  |     /// Mark a OpenGL shader as built
 | ||||||
|  |     void AsyncOpenGLBuilt(OGLProgram new_program); | ||||||
|  | 
 | ||||||
|  |     /// Mark a GLASM shader as built
 | ||||||
|  |     void AsyncGLASMBuilt(OGLAssemblyProgram new_program); | ||||||
|  | 
 | ||||||
|  |     static std::unique_ptr<Shader> CreateStageFromMemory( | ||||||
|  |         const ShaderParameters& params, Maxwell::ShaderProgram program_type, | ||||||
|  |         ProgramCode program_code, ProgramCode program_code_b, | ||||||
|  |         VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr); | ||||||
|  | 
 | ||||||
|     static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params, |     static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params, | ||||||
|                                                           ProgramCode code); |                                                           ProgramCode code); | ||||||
| 
 | 
 | ||||||
|  | @ -85,12 +107,13 @@ public: | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries, |     explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries, | ||||||
|                     ProgramSharedPtr program); |                     ProgramSharedPtr program, bool is_built = true); | ||||||
| 
 | 
 | ||||||
|     std::shared_ptr<VideoCommon::Shader::Registry> registry; |     std::shared_ptr<VideoCommon::Shader::Registry> registry; | ||||||
|     ShaderEntries entries; |     ShaderEntries entries; | ||||||
|     ProgramSharedPtr program; |     ProgramSharedPtr program; | ||||||
|     GLuint handle = 0; |     GLuint handle = 0; | ||||||
|  |     bool is_built{}; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { | class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { | ||||||
|  | @ -104,7 +127,8 @@ public: | ||||||
|                        const VideoCore::DiskResourceLoadCallback& callback); |                        const VideoCore::DiskResourceLoadCallback& callback); | ||||||
| 
 | 
 | ||||||
|     /// Gets the current specified shader stage program
 |     /// Gets the current specified shader stage program
 | ||||||
|     Shader* GetStageProgram(Maxwell::ShaderProgram program); |     Shader* GetStageProgram(Maxwell::ShaderProgram program, | ||||||
|  |                             VideoCommon::Shader::AsyncShaders& async_shaders); | ||||||
| 
 | 
 | ||||||
|     /// Gets a compute kernel in the passed address
 |     /// Gets a compute kernel in the passed address
 | ||||||
|     Shader* GetComputeKernel(GPUVAddr code_addr); |     Shader* GetComputeKernel(GPUVAddr code_addr); | ||||||
|  |  | ||||||
							
								
								
									
										170
									
								
								src/video_core/shader/async_shaders.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										170
									
								
								src/video_core/shader/async_shaders.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,170 @@ | ||||||
|  | // Copyright 2020 yuzu Emulator Project
 | ||||||
|  | // Licensed under GPLv2 or any later version
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #include <chrono> | ||||||
|  | #include "video_core/engines/maxwell_3d.h" | ||||||
|  | #include "video_core/renderer_base.h" | ||||||
|  | #include "video_core/renderer_opengl/gl_shader_cache.h" | ||||||
|  | #include "video_core/shader/async_shaders.h" | ||||||
|  | 
 | ||||||
|  | namespace VideoCommon::Shader { | ||||||
|  | AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window) : emu_window(emu_window) {} | ||||||
|  | AsyncShaders::~AsyncShaders() { | ||||||
|  |     KillWorkers(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void AsyncShaders::AllocateWorkers(std::size_t num_workers) { | ||||||
|  |     // If we're already have workers queued or don't want to queue workers, ignore
 | ||||||
|  |     if (num_workers == worker_threads.size() || num_workers == 0) { | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // If workers already exist, clear them
 | ||||||
|  |     if (!worker_threads.empty()) { | ||||||
|  |         FreeWorkers(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // Create workers
 | ||||||
|  |     for (std::size_t i = 0; i < num_workers; i++) { | ||||||
|  |         context_list.push_back(emu_window.CreateSharedContext()); | ||||||
|  |         worker_threads.push_back(std::move( | ||||||
|  |             std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get()))); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void AsyncShaders::FreeWorkers() { | ||||||
|  |     // Mark all threads to quit
 | ||||||
|  |     is_thread_exiting.store(true); | ||||||
|  |     for (auto& thread : worker_threads) { | ||||||
|  |         thread.join(); | ||||||
|  |     } | ||||||
|  |     // Clear our shared contexts
 | ||||||
|  |     context_list.clear(); | ||||||
|  | 
 | ||||||
|  |     // Clear our worker threads
 | ||||||
|  |     worker_threads.clear(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void AsyncShaders::KillWorkers() { | ||||||
|  |     is_thread_exiting.store(true); | ||||||
|  |     for (auto& thread : worker_threads) { | ||||||
|  |         thread.detach(); | ||||||
|  |     } | ||||||
|  |     // Clear our shared contexts
 | ||||||
|  |     context_list.clear(); | ||||||
|  | 
 | ||||||
|  |     // Clear our worker threads
 | ||||||
|  |     worker_threads.clear(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | bool AsyncShaders::HasWorkQueued() { | ||||||
|  |     std::shared_lock lock(queue_mutex); | ||||||
|  |     return !pending_queue.empty(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | bool AsyncShaders::HasCompletedWork() { | ||||||
|  |     std::shared_lock lock(completed_mutex); | ||||||
|  |     return !finished_work.empty(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const { | ||||||
|  |     const auto& regs = gpu.Maxwell3D().regs; | ||||||
|  | 
 | ||||||
|  |     // If something is using depth, we can assume that games are not rendering anything which will
 | ||||||
|  |     // be used one time.
 | ||||||
|  |     if (regs.zeta_enable) { | ||||||
|  |         return true; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // If games are using a small index count, we can assume these are full screen quads. Usually
 | ||||||
|  |     // these shaders are only used once for building textures so we can assume they can't be built
 | ||||||
|  |     // async
 | ||||||
|  |     if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return true; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() { | ||||||
|  |     std::vector<AsyncShaders::Result> results; | ||||||
|  |     { | ||||||
|  |         std::unique_lock lock(completed_mutex); | ||||||
|  |         results.assign(std::make_move_iterator(finished_work.begin()), | ||||||
|  |                        std::make_move_iterator(finished_work.end())); | ||||||
|  |         finished_work.clear(); | ||||||
|  |     } | ||||||
|  |     return results; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, | ||||||
|  |                                      Tegra::Engines::ShaderType shader_type, u64 uid, | ||||||
|  |                                      std::vector<u64> code, std::vector<u64> code_b, | ||||||
|  |                                      u32 main_offset, | ||||||
|  |                                      VideoCommon::Shader::CompilerSettings compiler_settings, | ||||||
|  |                                      const VideoCommon::Shader::Registry& registry, | ||||||
|  |                                      VAddr cpu_addr) { | ||||||
|  |     WorkerParams params{device.UseAssemblyShaders() ? AsyncShaders::Backend::GLASM | ||||||
|  |                                                     : AsyncShaders::Backend::OpenGL, | ||||||
|  |                         device, | ||||||
|  |                         shader_type, | ||||||
|  |                         uid, | ||||||
|  |                         std::move(code), | ||||||
|  |                         std::move(code_b), | ||||||
|  |                         main_offset, | ||||||
|  |                         compiler_settings, | ||||||
|  |                         registry, | ||||||
|  |                         cpu_addr}; | ||||||
|  |     std::unique_lock lock(queue_mutex); | ||||||
|  |     pending_queue.push_back(std::move(params)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) { | ||||||
|  |     using namespace std::chrono_literals; | ||||||
|  |     while (!is_thread_exiting.load(std::memory_order_relaxed)) { | ||||||
|  |         // Partial lock to allow all threads to read at the same time
 | ||||||
|  |         if (!HasWorkQueued()) { | ||||||
|  |             continue; | ||||||
|  |         } | ||||||
|  |         // Complete lock for pulling workload
 | ||||||
|  |         queue_mutex.lock(); | ||||||
|  |         // Another thread beat us, just unlock and wait for the next load
 | ||||||
|  |         if (pending_queue.empty()) { | ||||||
|  |             queue_mutex.unlock(); | ||||||
|  |             continue; | ||||||
|  |         } | ||||||
|  |         // Pull work from queue
 | ||||||
|  |         WorkerParams work = std::move(pending_queue.front()); | ||||||
|  |         pending_queue.pop_front(); | ||||||
|  |         queue_mutex.unlock(); | ||||||
|  | 
 | ||||||
|  |         if (work.backend == AsyncShaders::Backend::OpenGL || | ||||||
|  |             work.backend == AsyncShaders::Backend::GLASM) { | ||||||
|  |             const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, work.registry); | ||||||
|  |             const auto scope = context->Acquire(); | ||||||
|  |             auto program = | ||||||
|  |                 OpenGL::BuildShader(work.device, work.shader_type, work.uid, ir, work.registry); | ||||||
|  |             Result result{}; | ||||||
|  |             result.backend = work.backend; | ||||||
|  |             result.cpu_address = work.cpu_address; | ||||||
|  |             result.uid = work.uid; | ||||||
|  |             result.code = std::move(work.code); | ||||||
|  |             result.code_b = std::move(work.code_b); | ||||||
|  |             result.shader_type = work.shader_type; | ||||||
|  | 
 | ||||||
|  |             if (work.backend == AsyncShaders::Backend::OpenGL) { | ||||||
|  |                 result.program.opengl = std::move(program->source_program); | ||||||
|  |             } else if (work.backend == AsyncShaders::Backend::GLASM) { | ||||||
|  |                 result.program.glasm = std::move(program->assembly_program); | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             { | ||||||
|  |                 std::unique_lock complete_lock(completed_mutex); | ||||||
|  |                 finished_work.push_back(std::move(result)); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | } // namespace VideoCommon::Shader
 | ||||||
							
								
								
									
										107
									
								
								src/video_core/shader/async_shaders.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										107
									
								
								src/video_core/shader/async_shaders.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,107 @@ | ||||||
|  | // Copyright 2020 yuzu Emulator Project
 | ||||||
|  | // Licensed under GPLv2 or any later version
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
|  | #include <deque> | ||||||
|  | #include <memory> | ||||||
|  | #include <shared_mutex> | ||||||
|  | #include <thread> | ||||||
|  | #include "common/bit_field.h" | ||||||
|  | #include "common/common_types.h" | ||||||
|  | #include "video_core/renderer_opengl/gl_device.h" | ||||||
|  | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||||
|  | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||||||
|  | 
 | ||||||
|  | namespace Core::Frontend { | ||||||
|  | class EmuWindow; | ||||||
|  | class GraphicsContext; | ||||||
|  | } // namespace Core::Frontend
 | ||||||
|  | 
 | ||||||
|  | namespace Tegra { | ||||||
|  | class GPU; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | namespace VideoCommon::Shader { | ||||||
|  | 
 | ||||||
|  | class AsyncShaders { | ||||||
|  | public: | ||||||
|  |     enum class Backend { | ||||||
|  |         OpenGL, | ||||||
|  |         GLASM, | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     struct ResultPrograms { | ||||||
|  |         OpenGL::OGLProgram opengl; | ||||||
|  |         OpenGL::OGLAssemblyProgram glasm; | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     struct Result { | ||||||
|  |         u64 uid; | ||||||
|  |         VAddr cpu_address; | ||||||
|  |         Backend backend; | ||||||
|  |         ResultPrograms program; | ||||||
|  |         std::vector<u64> code; | ||||||
|  |         std::vector<u64> code_b; | ||||||
|  |         Tegra::Engines::ShaderType shader_type; | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window); | ||||||
|  |     ~AsyncShaders(); | ||||||
|  | 
 | ||||||
|  |     /// Start up shader worker threads
 | ||||||
|  |     void AllocateWorkers(std::size_t num_workers); | ||||||
|  | 
 | ||||||
|  |     /// Clear the shader queue and kill all worker threads
 | ||||||
|  |     void FreeWorkers(); | ||||||
|  | 
 | ||||||
|  |     // Force end all threads
 | ||||||
|  |     void KillWorkers(); | ||||||
|  | 
 | ||||||
|  |     /// Check our worker queue to see if we have any work queued already
 | ||||||
|  |     bool HasWorkQueued(); | ||||||
|  | 
 | ||||||
|  |     /// Check to see if any shaders have actually been compiled
 | ||||||
|  |     bool HasCompletedWork(); | ||||||
|  | 
 | ||||||
|  |     /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build
 | ||||||
|  |     /// every shader async as some shaders are only built and executed once. We try to "guess" which
 | ||||||
|  |     /// shader would be used only once
 | ||||||
|  |     bool IsShaderAsync(const Tegra::GPU& gpu) const; | ||||||
|  | 
 | ||||||
|  |     /// Pulls completed compiled shaders
 | ||||||
|  |     std::vector<Result> GetCompletedWork(); | ||||||
|  | 
 | ||||||
|  |     void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type, | ||||||
|  |                            u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset, | ||||||
|  |                            VideoCommon::Shader::CompilerSettings compiler_settings, | ||||||
|  |                            const VideoCommon::Shader::Registry& registry, VAddr cpu_addr); | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); | ||||||
|  | 
 | ||||||
|  |     struct WorkerParams { | ||||||
|  |         AsyncShaders::Backend backend; | ||||||
|  |         OpenGL::Device device; | ||||||
|  |         Tegra::Engines::ShaderType shader_type; | ||||||
|  |         u64 uid; | ||||||
|  |         std::vector<u64> code; | ||||||
|  |         std::vector<u64> code_b; | ||||||
|  |         u32 main_offset; | ||||||
|  |         VideoCommon::Shader::CompilerSettings compiler_settings; | ||||||
|  |         VideoCommon::Shader::Registry registry; | ||||||
|  |         VAddr cpu_address; | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     std::shared_mutex queue_mutex; | ||||||
|  |     std::shared_mutex completed_mutex; | ||||||
|  |     std::atomic<bool> is_thread_exiting{}; | ||||||
|  |     std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list; | ||||||
|  |     std::vector<std::thread> worker_threads; | ||||||
|  |     std::deque<WorkerParams> pending_queue; | ||||||
|  |     std::vector<AsyncShaders::Result> finished_work; | ||||||
|  |     Core::Frontend::EmuWindow& emu_window; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | } // namespace VideoCommon::Shader
 | ||||||
							
								
								
									
										42
									
								
								src/video_core/shader_notify.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								src/video_core/shader_notify.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,42 @@ | ||||||
|  | // Copyright 2020 yuzu Emulator Project
 | ||||||
|  | // Licensed under GPLv2 or any later version
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #include "video_core/shader_notify.h" | ||||||
|  | 
 | ||||||
|  | using namespace std::chrono_literals; | ||||||
|  | 
 | ||||||
|  | namespace VideoCore { | ||||||
|  | namespace { | ||||||
|  | constexpr auto UPDATE_TICK = 32ms; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | ShaderNotify::ShaderNotify() = default; | ||||||
|  | ShaderNotify::~ShaderNotify() = default; | ||||||
|  | 
 | ||||||
|  | std::size_t ShaderNotify::GetShadersBuilding() { | ||||||
|  |     const auto now = std::chrono::high_resolution_clock::now(); | ||||||
|  |     const auto diff = now - last_update; | ||||||
|  |     if (diff > UPDATE_TICK) { | ||||||
|  |         std::shared_lock lock(mutex); | ||||||
|  |         last_updated_count = accurate_count; | ||||||
|  |     } | ||||||
|  |     return last_updated_count; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | std::size_t ShaderNotify::GetShadersBuildingAccurate() { | ||||||
|  |     std::shared_lock lock(mutex); | ||||||
|  |     return accurate_count; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void ShaderNotify::MarkShaderComplete() { | ||||||
|  |     std::unique_lock lock(mutex); | ||||||
|  |     accurate_count--; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void ShaderNotify::MarkSharderBuilding() { | ||||||
|  |     std::unique_lock lock(mutex); | ||||||
|  |     accurate_count++; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | } // namespace VideoCore
 | ||||||
							
								
								
									
										29
									
								
								src/video_core/shader_notify.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								src/video_core/shader_notify.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,29 @@ | ||||||
|  | // Copyright 2020 yuzu Emulator Project
 | ||||||
|  | // Licensed under GPLv2 or any later version
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
|  | #include <chrono> | ||||||
|  | #include <shared_mutex> | ||||||
|  | #include "common/common_types.h" | ||||||
|  | 
 | ||||||
|  | namespace VideoCore { | ||||||
|  | class ShaderNotify { | ||||||
|  | public: | ||||||
|  |     ShaderNotify(); | ||||||
|  |     ~ShaderNotify(); | ||||||
|  | 
 | ||||||
|  |     std::size_t GetShadersBuilding(); | ||||||
|  |     std::size_t GetShadersBuildingAccurate(); | ||||||
|  | 
 | ||||||
|  |     void MarkShaderComplete(); | ||||||
|  |     void MarkSharderBuilding(); | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     std::size_t last_updated_count{}; | ||||||
|  |     std::size_t accurate_count{}; | ||||||
|  |     std::shared_mutex mutex; | ||||||
|  |     std::chrono::high_resolution_clock::time_point last_update{}; | ||||||
|  | }; | ||||||
|  | } // namespace VideoCore
 | ||||||
|  | @ -94,6 +94,8 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual | ||||||
| #include "core/perf_stats.h" | #include "core/perf_stats.h" | ||||||
| #include "core/settings.h" | #include "core/settings.h" | ||||||
| #include "core/telemetry_session.h" | #include "core/telemetry_session.h" | ||||||
|  | #include "video_core/gpu.h" | ||||||
|  | #include "video_core/shader_notify.h" | ||||||
| #include "yuzu/about_dialog.h" | #include "yuzu/about_dialog.h" | ||||||
| #include "yuzu/bootmanager.h" | #include "yuzu/bootmanager.h" | ||||||
| #include "yuzu/compatdb.h" | #include "yuzu/compatdb.h" | ||||||
|  | @ -498,6 +500,8 @@ void GMainWindow::InitializeWidgets() { | ||||||
|     message_label->setAlignment(Qt::AlignLeft); |     message_label->setAlignment(Qt::AlignLeft); | ||||||
|     statusBar()->addPermanentWidget(message_label, 1); |     statusBar()->addPermanentWidget(message_label, 1); | ||||||
| 
 | 
 | ||||||
|  |     shader_building_label = new QLabel(); | ||||||
|  |     shader_building_label->setToolTip(tr("The amount of shaders currently being built")); | ||||||
|     emu_speed_label = new QLabel(); |     emu_speed_label = new QLabel(); | ||||||
|     emu_speed_label->setToolTip( |     emu_speed_label->setToolTip( | ||||||
|         tr("Current emulation speed. Values higher or lower than 100% " |         tr("Current emulation speed. Values higher or lower than 100% " | ||||||
|  | @ -510,7 +514,8 @@ void GMainWindow::InitializeWidgets() { | ||||||
|         tr("Time taken to emulate a Switch frame, not counting framelimiting or v-sync. For " |         tr("Time taken to emulate a Switch frame, not counting framelimiting or v-sync. For " | ||||||
|            "full-speed emulation this should be at most 16.67 ms.")); |            "full-speed emulation this should be at most 16.67 ms.")); | ||||||
| 
 | 
 | ||||||
|     for (auto& label : {emu_speed_label, game_fps_label, emu_frametime_label}) { |     for (auto& label : | ||||||
|  |          {shader_building_label, emu_speed_label, game_fps_label, emu_frametime_label}) { | ||||||
|         label->setVisible(false); |         label->setVisible(false); | ||||||
|         label->setFrameStyle(QFrame::NoFrame); |         label->setFrameStyle(QFrame::NoFrame); | ||||||
|         label->setContentsMargins(4, 0, 4, 0); |         label->setContentsMargins(4, 0, 4, 0); | ||||||
|  | @ -1176,6 +1181,7 @@ void GMainWindow::ShutdownGame() { | ||||||
| 
 | 
 | ||||||
|     // Disable status bar updates
 |     // Disable status bar updates
 | ||||||
|     status_bar_update_timer.stop(); |     status_bar_update_timer.stop(); | ||||||
|  |     shader_building_label->setVisible(false); | ||||||
|     emu_speed_label->setVisible(false); |     emu_speed_label->setVisible(false); | ||||||
|     game_fps_label->setVisible(false); |     game_fps_label->setVisible(false); | ||||||
|     emu_frametime_label->setVisible(false); |     emu_frametime_label->setVisible(false); | ||||||
|  | @ -2186,6 +2192,17 @@ void GMainWindow::UpdateStatusBar() { | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     auto results = Core::System::GetInstance().GetAndResetPerfStats(); |     auto results = Core::System::GetInstance().GetAndResetPerfStats(); | ||||||
|  |     auto& shader_notify = Core::System::GetInstance().GPU().ShaderNotify(); | ||||||
|  |     const auto shaders_building = shader_notify.GetShadersBuilding(); | ||||||
|  | 
 | ||||||
|  |     if (shaders_building != 0) { | ||||||
|  |         shader_building_label->setText( | ||||||
|  |             tr("Building: %1 shader").arg(shaders_building) + | ||||||
|  |             (shaders_building != 1 ? QString::fromStdString("s") : QString::fromStdString(""))); | ||||||
|  |         shader_building_label->setVisible(true); | ||||||
|  |     } else { | ||||||
|  |         shader_building_label->setVisible(false); | ||||||
|  |     } | ||||||
| 
 | 
 | ||||||
|     if (Settings::values.use_frame_limit.GetValue()) { |     if (Settings::values.use_frame_limit.GetValue()) { | ||||||
|         emu_speed_label->setText(tr("Speed: %1% / %2%") |         emu_speed_label->setText(tr("Speed: %1% / %2%") | ||||||
|  | @ -2315,9 +2332,12 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) { | ||||||
|     if (behavior == ReinitializeKeyBehavior::Warning) { |     if (behavior == ReinitializeKeyBehavior::Warning) { | ||||||
|         const auto res = QMessageBox::information( |         const auto res = QMessageBox::information( | ||||||
|             this, tr("Confirm Key Rederivation"), |             this, tr("Confirm Key Rederivation"), | ||||||
|             tr("You are about to force rederive all of your keys. \nIf you do not know what this " |             tr("You are about to force rederive all of your keys. \nIf you do not know what " | ||||||
|                "means or what you are doing, \nthis is a potentially destructive action. \nPlease " |                "this " | ||||||
|                "make sure this is what you want \nand optionally make backups.\n\nThis will delete " |                "means or what you are doing, \nthis is a potentially destructive action. " | ||||||
|  |                "\nPlease " | ||||||
|  |                "make sure this is what you want \nand optionally make backups.\n\nThis will " | ||||||
|  |                "delete " | ||||||
|                "your autogenerated key files and re-run the key derivation module."), |                "your autogenerated key files and re-run the key derivation module."), | ||||||
|             QMessageBox::StandardButtons{QMessageBox::Ok, QMessageBox::Cancel}); |             QMessageBox::StandardButtons{QMessageBox::Ok, QMessageBox::Cancel}); | ||||||
| 
 | 
 | ||||||
|  | @ -2628,8 +2648,8 @@ int main(int argc, char* argv[]) { | ||||||
| 
 | 
 | ||||||
| #ifdef __APPLE__ | #ifdef __APPLE__ | ||||||
|     // If you start a bundle (binary) on OSX without the Terminal, the working directory is "/".
 |     // If you start a bundle (binary) on OSX without the Terminal, the working directory is "/".
 | ||||||
|     // But since we require the working directory to be the executable path for the location of the
 |     // But since we require the working directory to be the executable path for the location of
 | ||||||
|     // user folder in the Qt Frontend, we need to cd into that working directory
 |     // the user folder in the Qt Frontend, we need to cd into that working directory
 | ||||||
|     const std::string bin_path = FileUtil::GetBundleDirectory() + DIR_SEP + ".."; |     const std::string bin_path = FileUtil::GetBundleDirectory() + DIR_SEP + ".."; | ||||||
|     chdir(bin_path.c_str()); |     chdir(bin_path.c_str()); | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|  | @ -248,6 +248,7 @@ private: | ||||||
| 
 | 
 | ||||||
|     // Status bar elements
 |     // Status bar elements
 | ||||||
|     QLabel* message_label = nullptr; |     QLabel* message_label = nullptr; | ||||||
|  |     QLabel* shader_building_label = nullptr; | ||||||
|     QLabel* emu_speed_label = nullptr; |     QLabel* emu_speed_label = nullptr; | ||||||
|     QLabel* game_fps_label = nullptr; |     QLabel* game_fps_label = nullptr; | ||||||
|     QLabel* emu_frametime_label = nullptr; |     QLabel* emu_frametime_label = nullptr; | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 David Marcec
						David Marcec