forked from eden-emu/eden
		
	Merge pull request #2358 from ReinUsesLisp/parallel-shader
gl_shader_cache: Use shared contexts to build shaders in parallel at boot
This commit is contained in:
		
						commit
						c539e53e55
					
				
					 9 changed files with 124 additions and 64 deletions
				
			
		|  | @ -98,9 +98,11 @@ struct FramebufferCacheKey { | |||
|     } | ||||
| }; | ||||
| 
 | ||||
| RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info) | ||||
|     : res_cache{*this}, shader_cache{*this, system, device}, global_cache{*this}, system{system}, | ||||
|       screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) { | ||||
| RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, | ||||
|                                    ScreenInfo& info) | ||||
|     : res_cache{*this}, shader_cache{*this, system, emu_window, device}, | ||||
|       global_cache{*this}, system{system}, screen_info{info}, | ||||
|       buffer_cache(*this, STREAM_BUFFER_SIZE) { | ||||
|     OpenGLState::ApplyDefaultState(); | ||||
| 
 | ||||
|     shader_program_manager = std::make_unique<GLShader::ProgramManager>(); | ||||
|  |  | |||
|  | @ -48,7 +48,8 @@ struct FramebufferCacheKey; | |||
| 
 | ||||
| class RasterizerOpenGL : public VideoCore::RasterizerInterface { | ||||
| public: | ||||
|     explicit RasterizerOpenGL(Core::System& system, ScreenInfo& info); | ||||
|     explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, | ||||
|                               ScreenInfo& info); | ||||
|     ~RasterizerOpenGL() override; | ||||
| 
 | ||||
|     void DrawArrays() override; | ||||
|  |  | |||
|  | @ -2,10 +2,14 @@ | |||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include <mutex> | ||||
| #include <thread> | ||||
| #include <boost/functional/hash.hpp> | ||||
| #include "common/assert.h" | ||||
| #include "common/hash.h" | ||||
| #include "common/scope_exit.h" | ||||
| #include "core/core.h" | ||||
| #include "core/frontend/emu_window.h" | ||||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/memory_manager.h" | ||||
| #include "video_core/renderer_opengl/gl_rasterizer.h" | ||||
|  | @ -344,8 +348,8 @@ ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode, | |||
| } | ||||
| 
 | ||||
| ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, | ||||
|                                      const Device& device) | ||||
|     : RasterizerCache{rasterizer}, device{device}, disk_cache{system} {} | ||||
|                                      Core::Frontend::EmuWindow& emu_window, const Device& device) | ||||
|     : RasterizerCache{rasterizer}, emu_window{emu_window}, device{device}, disk_cache{system} {} | ||||
| 
 | ||||
| void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | ||||
|                                       const VideoCore::DiskResourceLoadCallback& callback) { | ||||
|  | @ -353,62 +357,107 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
|     if (!transferable) { | ||||
|         return; | ||||
|     } | ||||
|     const auto [raws, usages] = *transferable; | ||||
|     const auto [raws, shader_usages] = *transferable; | ||||
| 
 | ||||
|     auto [decompiled, dumps] = disk_cache.LoadPrecompiled(); | ||||
| 
 | ||||
|     const auto supported_formats{GetSupportedFormats()}; | ||||
|     const auto unspecialized{ | ||||
|     const auto unspecialized_shaders{ | ||||
|         GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)}; | ||||
|     if (stop_loading) | ||||
|     if (stop_loading) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     // Track if precompiled cache was altered during loading to know if we have to serialize the
 | ||||
|     // virtual precompiled cache file back to the hard drive
 | ||||
|     bool precompiled_cache_altered = false; | ||||
| 
 | ||||
|     // Build shaders
 | ||||
|     if (callback) | ||||
|         callback(VideoCore::LoadCallbackStage::Build, 0, usages.size()); | ||||
|     for (std::size_t i = 0; i < usages.size(); ++i) { | ||||
|         if (stop_loading) | ||||
|     // Inform the frontend about shader build initialization
 | ||||
|     if (callback) { | ||||
|         callback(VideoCore::LoadCallbackStage::Build, 0, shader_usages.size()); | ||||
|     } | ||||
| 
 | ||||
|     std::mutex mutex; | ||||
|     std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex
 | ||||
|     std::atomic_bool compilation_failed = false; | ||||
| 
 | ||||
|     const auto Worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, | ||||
|                             std::size_t end, const std::vector<ShaderDiskCacheUsage>& shader_usages, | ||||
|                             const ShaderDumpsMap& dumps) { | ||||
|         context->MakeCurrent(); | ||||
|         SCOPE_EXIT({ return context->DoneCurrent(); }); | ||||
| 
 | ||||
|         for (std::size_t i = begin; i < end; ++i) { | ||||
|             if (stop_loading || compilation_failed) { | ||||
|                 return; | ||||
|             } | ||||
|             const auto& usage{shader_usages[i]}; | ||||
|             LOG_INFO(Render_OpenGL, "Building shader {:016x} (index {} of {})", | ||||
|                      usage.unique_identifier, i, shader_usages.size()); | ||||
| 
 | ||||
|         const auto& usage{usages[i]}; | ||||
|         LOG_INFO(Render_OpenGL, "Building shader {:016x} ({} of {})", usage.unique_identifier, | ||||
|                  i + 1, usages.size()); | ||||
| 
 | ||||
|         const auto& unspec{unspecialized.at(usage.unique_identifier)}; | ||||
|         const auto dump_it = dumps.find(usage); | ||||
|             const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)}; | ||||
|             const auto dump{dumps.find(usage)}; | ||||
| 
 | ||||
|             CachedProgram shader; | ||||
|         if (dump_it != dumps.end()) { | ||||
|             if (dump != dumps.end()) { | ||||
|                 // If the shader is dumped, attempt to load it with
 | ||||
|             shader = GeneratePrecompiledProgram(dump_it->second, supported_formats); | ||||
|                 shader = GeneratePrecompiledProgram(dump->second, supported_formats); | ||||
|                 if (!shader) { | ||||
|                     compilation_failed = true; | ||||
|                     return; | ||||
|                 } | ||||
|             } | ||||
|             if (!shader) { | ||||
|                 shader = SpecializeShader(unspecialized.code, unspecialized.entries, | ||||
|                                           unspecialized.program_type, usage.bindings, | ||||
|                                           usage.primitive, true); | ||||
|             } | ||||
| 
 | ||||
|             std::scoped_lock lock(mutex); | ||||
|             if (callback) { | ||||
|                 callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, | ||||
|                          shader_usages.size()); | ||||
|             } | ||||
| 
 | ||||
|             precompiled_programs.emplace(usage, std::move(shader)); | ||||
|         } | ||||
|     }; | ||||
| 
 | ||||
|     const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1)}; | ||||
|     const std::size_t bucket_size{shader_usages.size() / num_workers}; | ||||
|     std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); | ||||
|     std::vector<std::thread> threads(num_workers); | ||||
|     for (std::size_t i = 0; i < num_workers; ++i) { | ||||
|         const bool is_last_worker = i + 1 == num_workers; | ||||
|         const std::size_t start{bucket_size * i}; | ||||
|         const std::size_t end{is_last_worker ? shader_usages.size() : start + bucket_size}; | ||||
| 
 | ||||
|         // On some platforms the shared context has to be created from the GUI thread
 | ||||
|         contexts[i] = emu_window.CreateSharedContext(); | ||||
|         threads[i] = std::thread(Worker, contexts[i].get(), start, end, shader_usages, dumps); | ||||
|     } | ||||
|     for (auto& thread : threads) { | ||||
|         thread.join(); | ||||
|     } | ||||
| 
 | ||||
|     if (compilation_failed) { | ||||
|         // Invalidate the precompiled cache if a shader dumped shader was rejected
 | ||||
|         disk_cache.InvalidatePrecompiled(); | ||||
|                 precompiled_cache_altered = true; | ||||
|         dumps.clear(); | ||||
|         precompiled_cache_altered = true; | ||||
|         return; | ||||
|     } | ||||
|         } | ||||
|         if (!shader) { | ||||
|             shader = SpecializeShader(unspec.code, unspec.entries, unspec.program_type, | ||||
|                                       usage.bindings, usage.primitive, true); | ||||
|         } | ||||
|         precompiled_programs.insert({usage, std::move(shader)}); | ||||
| 
 | ||||
|         if (callback) | ||||
|             callback(VideoCore::LoadCallbackStage::Build, i + 1, usages.size()); | ||||
|     if (stop_loading) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before
 | ||||
|     // precompiling them
 | ||||
| 
 | ||||
|     for (std::size_t i = 0; i < usages.size(); ++i) { | ||||
|         const auto& usage{usages[i]}; | ||||
|     for (std::size_t i = 0; i < shader_usages.size(); ++i) { | ||||
|         const auto& usage{shader_usages[i]}; | ||||
|         if (dumps.find(usage) == dumps.end()) { | ||||
|             const auto& program = precompiled_programs.at(usage); | ||||
|             const auto& program{precompiled_programs.at(usage)}; | ||||
|             disk_cache.SaveDump(usage, program->handle); | ||||
|             precompiled_cache_altered = true; | ||||
|         } | ||||
|  |  | |||
|  | @ -22,7 +22,11 @@ | |||
| 
 | ||||
| namespace Core { | ||||
| class System; | ||||
| } // namespace Core
 | ||||
| } | ||||
| 
 | ||||
| namespace Core::Frontend { | ||||
| class EmuWindow; | ||||
| } | ||||
| 
 | ||||
| namespace OpenGL { | ||||
| 
 | ||||
|  | @ -111,7 +115,7 @@ private: | |||
| class ShaderCacheOpenGL final : public RasterizerCache<Shader> { | ||||
| public: | ||||
|     explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, | ||||
|                                const Device& device); | ||||
|                                Core::Frontend::EmuWindow& emu_window, const Device& device); | ||||
| 
 | ||||
|     /// Loads disk cache for the current game
 | ||||
|     void LoadDiskCache(const std::atomic_bool& stop_loading, | ||||
|  | @ -133,13 +137,13 @@ private: | |||
|     CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, | ||||
|                                              const std::set<GLenum>& supported_formats); | ||||
| 
 | ||||
|     Core::Frontend::EmuWindow& emu_window; | ||||
|     const Device& device; | ||||
| 
 | ||||
|     std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; | ||||
| 
 | ||||
|     ShaderDiskCacheOpenGL disk_cache; | ||||
| 
 | ||||
|     PrecompiledShaders precompiled_shaders; | ||||
|     PrecompiledPrograms precompiled_programs; | ||||
|     std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; | ||||
| }; | ||||
| 
 | ||||
| } // namespace OpenGL
 | ||||
|  |  | |||
|  | @ -183,8 +183,7 @@ ShaderDiskCacheOpenGL::LoadTransferable() { | |||
|     return {{raws, usages}}; | ||||
| } | ||||
| 
 | ||||
| std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, | ||||
|           std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> | ||||
| std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap> | ||||
| ShaderDiskCacheOpenGL::LoadPrecompiled() { | ||||
|     if (!IsUsable()) | ||||
|         return {}; | ||||
|  | @ -208,8 +207,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() { | |||
|     return *result; | ||||
| } | ||||
| 
 | ||||
| std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, | ||||
|                         std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>> | ||||
| std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>> | ||||
| ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { | ||||
|     // Read compressed file from disk and decompress to virtual precompiled cache file
 | ||||
|     std::vector<u8> compressed(file.GetSize()); | ||||
|  | @ -230,7 +228,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { | |||
|     } | ||||
| 
 | ||||
|     std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled; | ||||
|     std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> dumps; | ||||
|     ShaderDumpsMap dumps; | ||||
|     while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { | ||||
|         PrecompiledEntryKind kind{}; | ||||
|         if (!LoadObjectFromPrecompiled(kind)) { | ||||
|  |  | |||
|  | @ -33,6 +33,11 @@ namespace OpenGL { | |||
| using ProgramCode = std::vector<u64>; | ||||
| using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||||
| 
 | ||||
| struct ShaderDiskCacheUsage; | ||||
| struct ShaderDiskCacheDump; | ||||
| 
 | ||||
| using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; | ||||
| 
 | ||||
| /// Allocated bindings used by an OpenGL shader program
 | ||||
| struct BaseBindings { | ||||
|     u32 cbuf{}; | ||||
|  |  | |||
|  | @ -97,8 +97,8 @@ static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, cons | |||
|     return matrix; | ||||
| } | ||||
| 
 | ||||
| RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system) | ||||
|     : VideoCore::RendererBase{window}, system{system} {} | ||||
| RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system) | ||||
|     : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system} {} | ||||
| 
 | ||||
| RendererOpenGL::~RendererOpenGL() = default; | ||||
| 
 | ||||
|  | @ -265,7 +265,7 @@ void RendererOpenGL::CreateRasterizer() { | |||
|     } | ||||
|     // Initialize sRGB Usage
 | ||||
|     OpenGLState::ClearsRGBUsed(); | ||||
|     rasterizer = std::make_unique<RasterizerOpenGL>(system, screen_info); | ||||
|     rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info); | ||||
| } | ||||
| 
 | ||||
| void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, | ||||
|  |  | |||
|  | @ -45,7 +45,7 @@ struct ScreenInfo { | |||
| 
 | ||||
| class RendererOpenGL : public VideoCore::RendererBase { | ||||
| public: | ||||
|     explicit RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system); | ||||
|     explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system); | ||||
|     ~RendererOpenGL() override; | ||||
| 
 | ||||
|     /// Swap buffers (render frame)
 | ||||
|  | @ -77,6 +77,7 @@ private: | |||
|     void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a, | ||||
|                                     const TextureInfo& texture); | ||||
| 
 | ||||
|     Core::Frontend::EmuWindow& emu_window; | ||||
|     Core::System& system; | ||||
| 
 | ||||
|     OpenGLState state; | ||||
|  |  | |||
|  | @ -91,25 +91,25 @@ void EmuThread::run() { | |||
| 
 | ||||
| class GGLContext : public Core::Frontend::GraphicsContext { | ||||
| public: | ||||
|     explicit GGLContext(QOpenGLContext* shared_context) | ||||
|         : context{std::make_unique<QOpenGLContext>(shared_context)} { | ||||
|         surface.setFormat(shared_context->format()); | ||||
|         surface.create(); | ||||
|     explicit GGLContext(QOpenGLContext* shared_context) : shared_context{shared_context} { | ||||
|         context.setFormat(shared_context->format()); | ||||
|         context.setShareContext(shared_context); | ||||
|         context.create(); | ||||
|     } | ||||
| 
 | ||||
|     void MakeCurrent() override { | ||||
|         context->makeCurrent(&surface); | ||||
|         context.makeCurrent(shared_context->surface()); | ||||
|     } | ||||
| 
 | ||||
|     void DoneCurrent() override { | ||||
|         context->doneCurrent(); | ||||
|         context.doneCurrent(); | ||||
|     } | ||||
| 
 | ||||
|     void SwapBuffers() override {} | ||||
| 
 | ||||
| private: | ||||
|     std::unique_ptr<QOpenGLContext> context; | ||||
|     QOffscreenSurface surface; | ||||
|     QOpenGLContext* shared_context; | ||||
|     QOpenGLContext context; | ||||
| }; | ||||
| 
 | ||||
| // This class overrides paintEvent and resizeEvent to prevent the GUI thread from stealing GL
 | ||||
|  | @ -358,7 +358,7 @@ void GRenderWindow::OnClientAreaResized(unsigned width, unsigned height) { | |||
| } | ||||
| 
 | ||||
| std::unique_ptr<Core::Frontend::GraphicsContext> GRenderWindow::CreateSharedContext() const { | ||||
|     return std::make_unique<GGLContext>(shared_context.get()); | ||||
|     return std::make_unique<GGLContext>(context.get()); | ||||
| } | ||||
| 
 | ||||
| void GRenderWindow::InitRenderTarget() { | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 bunnei
						bunnei