forked from eden-emu/eden
		
	Merge pull request #4359 from ReinUsesLisp/clamp-shared
renderer_{opengl,vulkan}: Clamp shared memory to host's limit
			
			
This commit is contained in:
		
						commit
						7278c59d70
					
				
					 6 changed files with 42 additions and 9 deletions
				
			
		|  | @ -913,12 +913,20 @@ void ARBDecompiler::DeclareCompute() { | ||||||
|     const ComputeInfo& info = registry.GetComputeInfo(); |     const ComputeInfo& info = registry.GetComputeInfo(); | ||||||
|     AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1], |     AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1], | ||||||
|             info.workgroup_size[2]); |             info.workgroup_size[2]); | ||||||
|     if (info.shared_memory_size_in_words > 0) { |     if (info.shared_memory_size_in_words == 0) { | ||||||
|         const u32 size_in_bytes = info.shared_memory_size_in_words * 4; |         return; | ||||||
|  |     } | ||||||
|  |     const u32 limit = device.GetMaxComputeSharedMemorySize(); | ||||||
|  |     u32 size_in_bytes = info.shared_memory_size_in_words * 4; | ||||||
|  |     if (size_in_bytes > limit) { | ||||||
|  |         LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", | ||||||
|  |                   size_in_bytes, limit); | ||||||
|  |         size_in_bytes = limit; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     AddLine("SHARED_MEMORY {};", size_in_bytes); |     AddLine("SHARED_MEMORY {};", size_in_bytes); | ||||||
|     AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); |     AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); | ||||||
| } | } | ||||||
| } |  | ||||||
| 
 | 
 | ||||||
| void ARBDecompiler::DeclareInputAttributes() { | void ARBDecompiler::DeclareInputAttributes() { | ||||||
|     if (stage == ShaderType::Compute) { |     if (stage == ShaderType::Compute) { | ||||||
|  |  | ||||||
|  | @ -212,6 +212,7 @@ Device::Device() | ||||||
|     shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); |     shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); | ||||||
|     max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); |     max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); | ||||||
|     max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); |     max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); | ||||||
|  |     max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); | ||||||
|     has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && |     has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && | ||||||
|                           GLAD_GL_NV_shader_thread_shuffle; |                           GLAD_GL_NV_shader_thread_shuffle; | ||||||
|     has_shader_ballot = GLAD_GL_ARB_shader_ballot; |     has_shader_ballot = GLAD_GL_ARB_shader_ballot; | ||||||
|  | @ -250,6 +251,7 @@ Device::Device(std::nullptr_t) { | ||||||
|     shader_storage_alignment = 4; |     shader_storage_alignment = 4; | ||||||
|     max_vertex_attributes = 16; |     max_vertex_attributes = 16; | ||||||
|     max_varyings = 15; |     max_varyings = 15; | ||||||
|  |     max_compute_shared_memory_size = 0x10000; | ||||||
|     has_warp_intrinsics = true; |     has_warp_intrinsics = true; | ||||||
|     has_shader_ballot = true; |     has_shader_ballot = true; | ||||||
|     has_vertex_viewport_layer = true; |     has_vertex_viewport_layer = true; | ||||||
|  |  | ||||||
|  | @ -52,6 +52,10 @@ public: | ||||||
|         return max_varyings; |         return max_varyings; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     u32 GetMaxComputeSharedMemorySize() const { | ||||||
|  |         return max_compute_shared_memory_size; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     bool HasWarpIntrinsics() const { |     bool HasWarpIntrinsics() const { | ||||||
|         return has_warp_intrinsics; |         return has_warp_intrinsics; | ||||||
|     } |     } | ||||||
|  | @ -118,6 +122,7 @@ private: | ||||||
|     std::size_t shader_storage_alignment{}; |     std::size_t shader_storage_alignment{}; | ||||||
|     u32 max_vertex_attributes{}; |     u32 max_vertex_attributes{}; | ||||||
|     u32 max_varyings{}; |     u32 max_varyings{}; | ||||||
|  |     u32 max_compute_shared_memory_size{}; | ||||||
|     bool has_warp_intrinsics{}; |     bool has_warp_intrinsics{}; | ||||||
|     bool has_shader_ballot{}; |     bool has_shader_ballot{}; | ||||||
|     bool has_vertex_viewport_layer{}; |     bool has_vertex_viewport_layer{}; | ||||||
|  |  | ||||||
|  | @ -602,8 +602,15 @@ private: | ||||||
|             return; |             return; | ||||||
|         } |         } | ||||||
|         const auto& info = registry.GetComputeInfo(); |         const auto& info = registry.GetComputeInfo(); | ||||||
|         if (const u32 size = info.shared_memory_size_in_words; size > 0) { |         if (u32 size = info.shared_memory_size_in_words * 4; size > 0) { | ||||||
|             code.AddLine("shared uint smem[{}];", size); |             const u32 limit = device.GetMaxComputeSharedMemorySize(); | ||||||
|  |             if (size > limit) { | ||||||
|  |                 LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", | ||||||
|  |                           size, limit); | ||||||
|  |                 size = limit; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             code.AddLine("shared uint smem[{}];", size / 4); | ||||||
|             code.AddNewLine(); |             code.AddNewLine(); | ||||||
|         } |         } | ||||||
|         code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", |         code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", | ||||||
|  |  | ||||||
|  | @ -122,6 +122,11 @@ public: | ||||||
|         return properties.limits.maxPushConstantsSize; |         return properties.limits.maxPushConstantsSize; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     /// Returns the maximum size for shared memory.
 | ||||||
|  |     u32 GetMaxComputeSharedMemorySize() const { | ||||||
|  |         return properties.limits.maxComputeSharedMemorySize; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     /// Returns true if ASTC is natively supported.
 |     /// Returns true if ASTC is natively supported.
 | ||||||
|     bool IsOptimalAstcSupported() const { |     bool IsOptimalAstcSupported() const { | ||||||
|         return is_optimal_astc_supported; |         return is_optimal_astc_supported; | ||||||
|  |  | ||||||
|  | @ -685,13 +685,19 @@ private: | ||||||
|         } |         } | ||||||
|         t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint); |         t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint); | ||||||
| 
 | 
 | ||||||
|         const u32 smem_size = specialization.shared_memory_size; |         u32 smem_size = specialization.shared_memory_size * 4; | ||||||
|         if (smem_size == 0) { |         if (smem_size == 0) { | ||||||
|             // Avoid declaring an empty array.
 |             // Avoid declaring an empty array.
 | ||||||
|             return; |             return; | ||||||
|         } |         } | ||||||
|         const auto element_count = static_cast<u32>(Common::AlignUp(smem_size, 4) / 4); |         const u32 limit = device.GetMaxComputeSharedMemorySize(); | ||||||
|         const Id type_array = TypeArray(t_uint, Constant(t_uint, element_count)); |         if (smem_size > limit) { | ||||||
|  |             LOG_ERROR(Render_Vulkan, "Shared memory size {} is clamped to host's limit {}", | ||||||
|  |                       smem_size, limit); | ||||||
|  |             smem_size = limit; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         const Id type_array = TypeArray(t_uint, Constant(t_uint, smem_size / 4)); | ||||||
|         const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array); |         const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array); | ||||||
|         Name(type_pointer, "SharedMemory"); |         Name(type_pointer, "SharedMemory"); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Rodrigo Locatti
						Rodrigo Locatti