forked from eden-emu/eden
		
	OpenGL: Add Local Memory warmup shader
This commit is contained in:
		
							parent
							
								
									3a991f3aef
								
							
						
					
					
						commit
						82107b33a2
					
				
					 5 changed files with 62 additions and 1 deletions
				
			
		|  | @ -33,6 +33,7 @@ set(SHADER_FILES | |||
|     opengl_fidelityfx_fsr.frag | ||||
|     opengl_fidelityfx_fsr_easu.frag | ||||
|     opengl_fidelityfx_fsr_rcas.frag | ||||
|     opengl_lmem_warmup.comp | ||||
|     opengl_present.frag | ||||
|     opengl_present.vert | ||||
|     opengl_present_scaleforce.frag | ||||
|  |  | |||
							
								
								
									
										47
									
								
								src/video_core/host_shaders/opengl_lmem_warmup.comp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								src/video_core/host_shaders/opengl_lmem_warmup.comp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,47 @@ | |||
| // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project | ||||
| // SPDX-License-Identifier: GPL-2.0-or-later | ||||
| 
 | ||||
| // This shader is a workaround for a quirk in NVIDIA OpenGL drivers | ||||
| // Shaders using local memory see a great performance benefit if a shader that was dispatched | ||||
| // before it had more local memory allocated. | ||||
| // This shader allocates the maximum local memory allowed on NVIDIA drivers to ensure that | ||||
| // subsequent shaders see the performance boost. | ||||
| 
 | ||||
| // NOTE: This shader does no actual meaningful work and returns immediately, | ||||
| // it is simply a means to have the driver expect a shader using lots of local memory. | ||||
| 
 | ||||
| #version 450 | ||||
| 
 | ||||
| layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; | ||||
| 
 | ||||
| layout(location = 0) uniform uint uniform_data; | ||||
| 
 | ||||
| layout(binding = 0, rgba8) uniform writeonly restrict image2DArray dest_image; | ||||
| 
 | ||||
| #define MAX_LMEM_SIZE 4080 // Size chosen to avoid errors in Nvidia's GLSL compiler | ||||
| #define NUM_LMEM_CONSTANTS 1 | ||||
| #define ARRAY_SIZE MAX_LMEM_SIZE - NUM_LMEM_CONSTANTS | ||||
| 
 | ||||
| uint lmem_0[ARRAY_SIZE]; | ||||
| const uvec4 constant_values[NUM_LMEM_CONSTANTS] = uvec4[](uvec4(0)); | ||||
| 
 | ||||
| void main() { | ||||
|     const uint global_id = gl_GlobalInvocationID.x; | ||||
|     if (global_id <= 128) { | ||||
|         // Since the shader is called with a dispatch of 1x1x1 | ||||
|         // This should always be the case, and this shader will not actually execute | ||||
|         return; | ||||
|     } | ||||
|     for (uint t = 0; t < uniform_data; t++) { | ||||
|         const uint offset = (t * uniform_data); | ||||
|         lmem_0[offset] = t; | ||||
|     } | ||||
|     const uint offset = (gl_GlobalInvocationID.y * uniform_data + gl_GlobalInvocationID.x); | ||||
|     const uint value = lmem_0[offset]; | ||||
|     const uint const_value = constant_values[offset / 4][offset % 4]; | ||||
|     const uvec4 color = uvec4(value + const_value); | ||||
| 
 | ||||
|     // A "side-effect" is needed so the variables don't get optimized out, | ||||
|     // but this should never execute so there should be no clobbering of previously bound state. | ||||
|     imageStore(dest_image, ivec3(gl_GlobalInvocationID), color); | ||||
| } | ||||
|  | @ -222,6 +222,7 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) { | |||
|     gpu.TickWork(); | ||||
| 
 | ||||
|     std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||||
|     program_manager.LocalMemoryWarmup(); | ||||
|     pipeline->SetEngine(maxwell3d, gpu_memory); | ||||
|     pipeline->Configure(is_indexed); | ||||
| 
 | ||||
|  | @ -371,6 +372,7 @@ void RasterizerOpenGL::DispatchCompute() { | |||
|     if (!pipeline) { | ||||
|         return; | ||||
|     } | ||||
|     program_manager.LocalMemoryWarmup(); | ||||
|     pipeline->SetEngine(kepler_compute, gpu_memory); | ||||
|     pipeline->Configure(); | ||||
|     const auto& qmd{kepler_compute->launch_description}; | ||||
|  |  | |||
|  | @ -3,7 +3,9 @@ | |||
| 
 | ||||
| #include <glad/glad.h> | ||||
| 
 | ||||
| #include "video_core/host_shaders/opengl_lmem_warmup_comp.h" | ||||
| #include "video_core/renderer_opengl/gl_shader_manager.h" | ||||
| #include "video_core/renderer_opengl/gl_shader_util.h" | ||||
| 
 | ||||
| namespace OpenGL { | ||||
| 
 | ||||
|  | @ -12,7 +14,8 @@ static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{ | |||
|     GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, | ||||
| }; | ||||
| 
 | ||||
| ProgramManager::ProgramManager(const Device& device) { | ||||
| ProgramManager::ProgramManager(const Device& device) | ||||
|     : lmem_warmup_program(CreateProgram(HostShaders::OPENGL_LMEM_WARMUP_COMP, GL_COMPUTE_SHADER)) { | ||||
|     glCreateProgramPipelines(1, &pipeline.handle); | ||||
|     if (device.UseAssemblyShaders()) { | ||||
|         glEnable(GL_COMPUTE_PROGRAM_NV); | ||||
|  | @ -98,6 +101,11 @@ void ProgramManager::BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NU | |||
| 
 | ||||
| void ProgramManager::RestoreGuestCompute() {} | ||||
| 
 | ||||
| void ProgramManager::LocalMemoryWarmup() { | ||||
|     BindComputeProgram(lmem_warmup_program.handle); | ||||
|     glDispatchCompute(1, 1, 1); | ||||
| } | ||||
| 
 | ||||
| void ProgramManager::BindPipeline() { | ||||
|     if (!is_pipeline_bound) { | ||||
|         is_pipeline_bound = true; | ||||
|  |  | |||
|  | @ -30,6 +30,8 @@ public: | |||
| 
 | ||||
|     void RestoreGuestCompute(); | ||||
| 
 | ||||
|     void LocalMemoryWarmup(); | ||||
| 
 | ||||
| private: | ||||
|     void BindPipeline(); | ||||
| 
 | ||||
|  | @ -44,6 +46,7 @@ private: | |||
|     u32 current_stage_mask = 0; | ||||
|     std::array<GLuint, NUM_STAGES> current_programs{}; | ||||
|     GLuint current_assembly_compute_program = 0; | ||||
|     OGLProgram lmem_warmup_program; | ||||
| }; | ||||
| 
 | ||||
| } // namespace OpenGL
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 ameerj
						ameerj