forked from eden-emu/eden
		
	Merge pull request #3815 from FernandoS27/command-list-2
GPU: More optimizations to GPU Command List Processing and DMA Copy Optimizations
This commit is contained in:
		
						commit
						41682e0888
					
				
					 18 changed files with 198 additions and 66 deletions
				
			
		|  | @ -8,6 +8,7 @@ add_library(video_core STATIC | ||||||
|     dma_pusher.h |     dma_pusher.h | ||||||
|     engines/const_buffer_engine_interface.h |     engines/const_buffer_engine_interface.h | ||||||
|     engines/const_buffer_info.h |     engines/const_buffer_info.h | ||||||
|  |     engines/engine_interface.h | ||||||
|     engines/engine_upload.cpp |     engines/engine_upload.cpp | ||||||
|     engines/engine_upload.h |     engines/engine_upload.h | ||||||
|     engines/fermi_2d.cpp |     engines/fermi_2d.cpp | ||||||
|  |  | ||||||
|  | @ -27,6 +27,8 @@ void DmaPusher::DispatchCalls() { | ||||||
| 
 | 
 | ||||||
|     dma_pushbuffer_subindex = 0; |     dma_pushbuffer_subindex = 0; | ||||||
| 
 | 
 | ||||||
|  |     dma_state.is_last_call = true; | ||||||
|  | 
 | ||||||
|     while (system.IsPoweredOn()) { |     while (system.IsPoweredOn()) { | ||||||
|         if (!Step()) { |         if (!Step()) { | ||||||
|             break; |             break; | ||||||
|  | @ -82,9 +84,11 @@ bool DmaPusher::Step() { | ||||||
|                     index); |                     index); | ||||||
|                 CallMultiMethod(&command_header.argument, max_write); |                 CallMultiMethod(&command_header.argument, max_write); | ||||||
|                 dma_state.method_count -= max_write; |                 dma_state.method_count -= max_write; | ||||||
|  |                 dma_state.is_last_call = true; | ||||||
|                 index += max_write; |                 index += max_write; | ||||||
|                 continue; |                 continue; | ||||||
|             } else { |             } else { | ||||||
|  |                 dma_state.is_last_call = dma_state.method_count <= 1; | ||||||
|                 CallMethod(command_header.argument); |                 CallMethod(command_header.argument); | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|  | @ -144,12 +148,22 @@ void DmaPusher::SetState(const CommandHeader& command_header) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void DmaPusher::CallMethod(u32 argument) const { | void DmaPusher::CallMethod(u32 argument) const { | ||||||
|  |     if (dma_state.method < non_puller_methods) { | ||||||
|         gpu.CallMethod({dma_state.method, argument, dma_state.subchannel, dma_state.method_count}); |         gpu.CallMethod({dma_state.method, argument, dma_state.subchannel, dma_state.method_count}); | ||||||
|  |     } else { | ||||||
|  |         subchannels[dma_state.subchannel]->CallMethod(dma_state.method, argument, | ||||||
|  |                                                       dma_state.is_last_call); | ||||||
|  |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const { | void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const { | ||||||
|  |     if (dma_state.method < non_puller_methods) { | ||||||
|         gpu.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods, |         gpu.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods, | ||||||
|                             dma_state.method_count); |                             dma_state.method_count); | ||||||
|  |     } else { | ||||||
|  |         subchannels[dma_state.subchannel]->CallMultiMethod(dma_state.method, base_start, | ||||||
|  |                                                            num_methods, dma_state.method_count); | ||||||
|  |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| } // namespace Tegra
 | } // namespace Tegra
 | ||||||
|  |  | ||||||
|  | @ -4,11 +4,13 @@ | ||||||
| 
 | 
 | ||||||
| #pragma once | #pragma once | ||||||
| 
 | 
 | ||||||
|  | #include <array> | ||||||
| #include <vector> | #include <vector> | ||||||
| #include <queue> | #include <queue> | ||||||
| 
 | 
 | ||||||
| #include "common/bit_field.h" | #include "common/bit_field.h" | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
|  | #include "video_core/engines/engine_interface.h" | ||||||
| 
 | 
 | ||||||
| namespace Core { | namespace Core { | ||||||
| class System; | class System; | ||||||
|  | @ -69,7 +71,13 @@ public: | ||||||
| 
 | 
 | ||||||
|     void DispatchCalls(); |     void DispatchCalls(); | ||||||
| 
 | 
 | ||||||
|  |     void BindSubchannel(Tegra::Engines::EngineInterface* engine, u32 subchannel_id) { | ||||||
|  |         subchannels[subchannel_id] = engine; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
| private: | private: | ||||||
|  |     static constexpr u32 non_puller_methods = 0x40; | ||||||
|  |     static constexpr u32 max_subchannels = 8; | ||||||
|     bool Step(); |     bool Step(); | ||||||
| 
 | 
 | ||||||
|     void SetState(const CommandHeader& command_header); |     void SetState(const CommandHeader& command_header); | ||||||
|  | @ -88,6 +96,7 @@ private: | ||||||
|         u32 method_count;      ///< Current method count
 |         u32 method_count;      ///< Current method count
 | ||||||
|         u32 length_pending;    ///< Large NI command length pending
 |         u32 length_pending;    ///< Large NI command length pending
 | ||||||
|         bool non_incrementing; ///< Current command's NI flag
 |         bool non_incrementing; ///< Current command's NI flag
 | ||||||
|  |         bool is_last_call; | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
|     DmaState dma_state{}; |     DmaState dma_state{}; | ||||||
|  | @ -96,6 +105,8 @@ private: | ||||||
|     GPUVAddr dma_mget{};  ///< main pushbuffer last read address
 |     GPUVAddr dma_mget{};  ///< main pushbuffer last read address
 | ||||||
|     bool ib_enable{true}; ///< IB mode enabled
 |     bool ib_enable{true}; ///< IB mode enabled
 | ||||||
| 
 | 
 | ||||||
|  |     std::array<Tegra::Engines::EngineInterface*, max_subchannels> subchannels{}; | ||||||
|  | 
 | ||||||
|     GPU& gpu; |     GPU& gpu; | ||||||
|     Core::System& system; |     Core::System& system; | ||||||
| }; | }; | ||||||
|  |  | ||||||
							
								
								
									
										22
									
								
								src/video_core/engines/engine_interface.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								src/video_core/engines/engine_interface.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,22 @@ | ||||||
|  | // Copyright 2020 yuzu Emulator Project
 | ||||||
|  | // Licensed under GPLv2 or any later version
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
|  | #include <type_traits> | ||||||
|  | #include "common/common_types.h" | ||||||
|  | 
 | ||||||
|  | namespace Tegra::Engines { | ||||||
|  | 
 | ||||||
|  | class EngineInterface { | ||||||
|  | public: | ||||||
|  |     /// Write the value to the register identified by method.
 | ||||||
|  |     virtual void CallMethod(u32 method, u32 method_argument, bool is_last_call) = 0; | ||||||
|  | 
 | ||||||
|  |     /// Write multiple values to the register identified by method.
 | ||||||
|  |     virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount, | ||||||
|  |                                  u32 methods_pending) = 0; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | } // namespace Tegra::Engines
 | ||||||
|  | @ -12,13 +12,13 @@ namespace Tegra::Engines { | ||||||
| 
 | 
 | ||||||
| Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} | Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} | ||||||
| 
 | 
 | ||||||
| void Fermi2D::CallMethod(const GPU::MethodCall& method_call) { | void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { | ||||||
|     ASSERT_MSG(method_call.method < Regs::NUM_REGS, |     ASSERT_MSG(method < Regs::NUM_REGS, | ||||||
|                "Invalid Fermi2D register, increase the size of the Regs structure"); |                "Invalid Fermi2D register, increase the size of the Regs structure"); | ||||||
| 
 | 
 | ||||||
|     regs.reg_array[method_call.method] = method_call.argument; |     regs.reg_array[method] = method_argument; | ||||||
| 
 | 
 | ||||||
|     switch (method_call.method) { |     switch (method) { | ||||||
|     // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit,
 |     // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit,
 | ||||||
|     // so trigger on the second 32-bit write.
 |     // so trigger on the second 32-bit write.
 | ||||||
|     case FERMI2D_REG_INDEX(blit_src_y) + 1: { |     case FERMI2D_REG_INDEX(blit_src_y) + 1: { | ||||||
|  | @ -30,7 +30,7 @@ void Fermi2D::CallMethod(const GPU::MethodCall& method_call) { | ||||||
| 
 | 
 | ||||||
| void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) { | void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) { | ||||||
|     for (std::size_t i = 0; i < amount; i++) { |     for (std::size_t i = 0; i < amount; i++) { | ||||||
|         CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); |         CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -10,6 +10,7 @@ | ||||||
| #include "common/common_funcs.h" | #include "common/common_funcs.h" | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "common/math_util.h" | #include "common/math_util.h" | ||||||
|  | #include "video_core/engines/engine_interface.h" | ||||||
| #include "video_core/gpu.h" | #include "video_core/gpu.h" | ||||||
| 
 | 
 | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
|  | @ -31,16 +32,17 @@ namespace Tegra::Engines { | ||||||
| #define FERMI2D_REG_INDEX(field_name)                                                              \ | #define FERMI2D_REG_INDEX(field_name)                                                              \ | ||||||
|     (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32)) |     (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32)) | ||||||
| 
 | 
 | ||||||
| class Fermi2D final { | class Fermi2D final : public EngineInterface { | ||||||
| public: | public: | ||||||
|     explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer); |     explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer); | ||||||
|     ~Fermi2D() = default; |     ~Fermi2D() = default; | ||||||
| 
 | 
 | ||||||
|     /// Write the value to the register identified by method.
 |     /// Write the value to the register identified by method.
 | ||||||
|     void CallMethod(const GPU::MethodCall& method_call); |     void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; | ||||||
| 
 | 
 | ||||||
|     /// Write multiple values to the register identified by method.
 |     /// Write multiple values to the register identified by method.
 | ||||||
|     void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); |     void CallMultiMethod(u32 method, const u32* base_start, u32 amount, | ||||||
|  |                          u32 methods_pending) override; | ||||||
| 
 | 
 | ||||||
|     enum class Origin : u32 { |     enum class Origin : u32 { | ||||||
|         Center = 0, |         Center = 0, | ||||||
|  |  | ||||||
|  | @ -24,20 +24,19 @@ KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterfac | ||||||
| 
 | 
 | ||||||
| KeplerCompute::~KeplerCompute() = default; | KeplerCompute::~KeplerCompute() = default; | ||||||
| 
 | 
 | ||||||
| void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { | void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) { | ||||||
|     ASSERT_MSG(method_call.method < Regs::NUM_REGS, |     ASSERT_MSG(method < Regs::NUM_REGS, | ||||||
|                "Invalid KeplerCompute register, increase the size of the Regs structure"); |                "Invalid KeplerCompute register, increase the size of the Regs structure"); | ||||||
| 
 | 
 | ||||||
|     regs.reg_array[method_call.method] = method_call.argument; |     regs.reg_array[method] = method_argument; | ||||||
| 
 | 
 | ||||||
|     switch (method_call.method) { |     switch (method) { | ||||||
|     case KEPLER_COMPUTE_REG_INDEX(exec_upload): { |     case KEPLER_COMPUTE_REG_INDEX(exec_upload): { | ||||||
|         upload_state.ProcessExec(regs.exec_upload.linear != 0); |         upload_state.ProcessExec(regs.exec_upload.linear != 0); | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|     case KEPLER_COMPUTE_REG_INDEX(data_upload): { |     case KEPLER_COMPUTE_REG_INDEX(data_upload): { | ||||||
|         const bool is_last_call = method_call.IsLastCall(); |         upload_state.ProcessData(method_argument, is_last_call); | ||||||
|         upload_state.ProcessData(method_call.argument, is_last_call); |  | ||||||
|         if (is_last_call) { |         if (is_last_call) { | ||||||
|             system.GPU().Maxwell3D().OnMemoryWrite(); |             system.GPU().Maxwell3D().OnMemoryWrite(); | ||||||
|         } |         } | ||||||
|  | @ -54,7 +53,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { | ||||||
| void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amount, | void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amount, | ||||||
|                                     u32 methods_pending) { |                                     u32 methods_pending) { | ||||||
|     for (std::size_t i = 0; i < amount; i++) { |     for (std::size_t i = 0; i < amount; i++) { | ||||||
|         CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); |         CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -11,6 +11,7 @@ | ||||||
| #include "common/common_funcs.h" | #include "common/common_funcs.h" | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "video_core/engines/const_buffer_engine_interface.h" | #include "video_core/engines/const_buffer_engine_interface.h" | ||||||
|  | #include "video_core/engines/engine_interface.h" | ||||||
| #include "video_core/engines/engine_upload.h" | #include "video_core/engines/engine_upload.h" | ||||||
| #include "video_core/engines/shader_type.h" | #include "video_core/engines/shader_type.h" | ||||||
| #include "video_core/gpu.h" | #include "video_core/gpu.h" | ||||||
|  | @ -39,7 +40,7 @@ namespace Tegra::Engines { | ||||||
| #define KEPLER_COMPUTE_REG_INDEX(field_name)                                                       \ | #define KEPLER_COMPUTE_REG_INDEX(field_name)                                                       \ | ||||||
|     (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) |     (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) | ||||||
| 
 | 
 | ||||||
| class KeplerCompute final : public ConstBufferEngineInterface { | class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface { | ||||||
| public: | public: | ||||||
|     explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, |     explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||||||
|                            MemoryManager& memory_manager); |                            MemoryManager& memory_manager); | ||||||
|  | @ -200,10 +201,11 @@ public: | ||||||
|                   "KeplerCompute LaunchParams has wrong size"); |                   "KeplerCompute LaunchParams has wrong size"); | ||||||
| 
 | 
 | ||||||
|     /// Write the value to the register identified by method.
 |     /// Write the value to the register identified by method.
 | ||||||
|     void CallMethod(const GPU::MethodCall& method_call); |     void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; | ||||||
| 
 | 
 | ||||||
|     /// Write multiple values to the register identified by method.
 |     /// Write multiple values to the register identified by method.
 | ||||||
|     void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); |     void CallMultiMethod(u32 method, const u32* base_start, u32 amount, | ||||||
|  |                          u32 methods_pending) override; | ||||||
| 
 | 
 | ||||||
|     Texture::FullTextureInfo GetTexture(std::size_t offset) const; |     Texture::FullTextureInfo GetTexture(std::size_t offset) const; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -19,20 +19,19 @@ KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager) | ||||||
| 
 | 
 | ||||||
| KeplerMemory::~KeplerMemory() = default; | KeplerMemory::~KeplerMemory() = default; | ||||||
| 
 | 
 | ||||||
| void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { | void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call) { | ||||||
|     ASSERT_MSG(method_call.method < Regs::NUM_REGS, |     ASSERT_MSG(method < Regs::NUM_REGS, | ||||||
|                "Invalid KeplerMemory register, increase the size of the Regs structure"); |                "Invalid KeplerMemory register, increase the size of the Regs structure"); | ||||||
| 
 | 
 | ||||||
|     regs.reg_array[method_call.method] = method_call.argument; |     regs.reg_array[method] = method_argument; | ||||||
| 
 | 
 | ||||||
|     switch (method_call.method) { |     switch (method) { | ||||||
|     case KEPLERMEMORY_REG_INDEX(exec): { |     case KEPLERMEMORY_REG_INDEX(exec): { | ||||||
|         upload_state.ProcessExec(regs.exec.linear != 0); |         upload_state.ProcessExec(regs.exec.linear != 0); | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|     case KEPLERMEMORY_REG_INDEX(data): { |     case KEPLERMEMORY_REG_INDEX(data): { | ||||||
|         const bool is_last_call = method_call.IsLastCall(); |         upload_state.ProcessData(method_argument, is_last_call); | ||||||
|         upload_state.ProcessData(method_call.argument, is_last_call); |  | ||||||
|         if (is_last_call) { |         if (is_last_call) { | ||||||
|             system.GPU().Maxwell3D().OnMemoryWrite(); |             system.GPU().Maxwell3D().OnMemoryWrite(); | ||||||
|         } |         } | ||||||
|  | @ -44,7 +43,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { | ||||||
| void KeplerMemory::CallMultiMethod(u32 method, const u32* base_start, u32 amount, | void KeplerMemory::CallMultiMethod(u32 method, const u32* base_start, u32 amount, | ||||||
|                                    u32 methods_pending) { |                                    u32 methods_pending) { | ||||||
|     for (std::size_t i = 0; i < amount; i++) { |     for (std::size_t i = 0; i < amount; i++) { | ||||||
|         CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); |         CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -10,6 +10,7 @@ | ||||||
| #include "common/bit_field.h" | #include "common/bit_field.h" | ||||||
| #include "common/common_funcs.h" | #include "common/common_funcs.h" | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
|  | #include "video_core/engines/engine_interface.h" | ||||||
| #include "video_core/engines/engine_upload.h" | #include "video_core/engines/engine_upload.h" | ||||||
| #include "video_core/gpu.h" | #include "video_core/gpu.h" | ||||||
| 
 | 
 | ||||||
|  | @ -32,16 +33,17 @@ namespace Tegra::Engines { | ||||||
| #define KEPLERMEMORY_REG_INDEX(field_name)                                                         \ | #define KEPLERMEMORY_REG_INDEX(field_name)                                                         \ | ||||||
|     (offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32)) |     (offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32)) | ||||||
| 
 | 
 | ||||||
| class KeplerMemory final { | class KeplerMemory final : public EngineInterface { | ||||||
| public: | public: | ||||||
|     KeplerMemory(Core::System& system, MemoryManager& memory_manager); |     KeplerMemory(Core::System& system, MemoryManager& memory_manager); | ||||||
|     ~KeplerMemory(); |     ~KeplerMemory(); | ||||||
| 
 | 
 | ||||||
|     /// Write the value to the register identified by method.
 |     /// Write the value to the register identified by method.
 | ||||||
|     void CallMethod(const GPU::MethodCall& method_call); |     void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; | ||||||
| 
 | 
 | ||||||
|     /// Write multiple values to the register identified by method.
 |     /// Write multiple values to the register identified by method.
 | ||||||
|     void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); |     void CallMultiMethod(u32 method, const u32* base_start, u32 amount, | ||||||
|  |                          u32 methods_pending) override; | ||||||
| 
 | 
 | ||||||
|     struct Regs { |     struct Regs { | ||||||
|         static constexpr size_t NUM_REGS = 0x7F; |         static constexpr size_t NUM_REGS = 0x7F; | ||||||
|  |  | ||||||
|  | @ -125,12 +125,10 @@ void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u3 | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { | ||||||
|     const u32 method = method_call.method; |  | ||||||
| 
 |  | ||||||
|     if (method == cb_data_state.current) { |     if (method == cb_data_state.current) { | ||||||
|         regs.reg_array[method] = method_call.argument; |         regs.reg_array[method] = method_argument; | ||||||
|         ProcessCBData(method_call.argument); |         ProcessCBData(method_argument); | ||||||
|         return; |         return; | ||||||
|     } else if (cb_data_state.current != null_cb_data) { |     } else if (cb_data_state.current != null_cb_data) { | ||||||
|         FinishCBData(); |         FinishCBData(); | ||||||
|  | @ -153,10 +151,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | ||||||
|             executing_macro = method; |             executing_macro = method; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         macro_params.push_back(method_call.argument); |         macro_params.push_back(method_argument); | ||||||
| 
 | 
 | ||||||
|         // Call the macro when there are no more parameters in the command buffer
 |         // Call the macro when there are no more parameters in the command buffer
 | ||||||
|         if (method_call.IsLastCall()) { |         if (is_last_call) { | ||||||
|             CallMacroMethod(executing_macro, macro_params.size(), macro_params.data()); |             CallMacroMethod(executing_macro, macro_params.size(), macro_params.data()); | ||||||
|             macro_params.clear(); |             macro_params.clear(); | ||||||
|         } |         } | ||||||
|  | @ -166,7 +164,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | ||||||
|     ASSERT_MSG(method < Regs::NUM_REGS, |     ASSERT_MSG(method < Regs::NUM_REGS, | ||||||
|                "Invalid Maxwell3D register, increase the size of the Regs structure"); |                "Invalid Maxwell3D register, increase the size of the Regs structure"); | ||||||
| 
 | 
 | ||||||
|     u32 arg = method_call.argument; |     u32 arg = method_argument; | ||||||
|     // Keep track of the register value in shadow_state when requested.
 |     // Keep track of the register value in shadow_state when requested.
 | ||||||
|     if (shadow_state.shadow_ram_control == Regs::ShadowRamControl::Track || |     if (shadow_state.shadow_ram_control == Regs::ShadowRamControl::Track || | ||||||
|         shadow_state.shadow_ram_control == Regs::ShadowRamControl::TrackWithFilter) { |         shadow_state.shadow_ram_control == Regs::ShadowRamControl::TrackWithFilter) { | ||||||
|  | @ -189,7 +187,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|     case MAXWELL3D_REG_INDEX(shadow_ram_control): { |     case MAXWELL3D_REG_INDEX(shadow_ram_control): { | ||||||
|         shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(method_call.argument); |         shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(method_argument); | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|     case MAXWELL3D_REG_INDEX(macros.data): { |     case MAXWELL3D_REG_INDEX(macros.data): { | ||||||
|  | @ -272,7 +270,6 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|     case MAXWELL3D_REG_INDEX(data_upload): { |     case MAXWELL3D_REG_INDEX(data_upload): { | ||||||
|         const bool is_last_call = method_call.IsLastCall(); |  | ||||||
|         upload_state.ProcessData(arg, is_last_call); |         upload_state.ProcessData(arg, is_last_call); | ||||||
|         if (is_last_call) { |         if (is_last_call) { | ||||||
|             OnMemoryWrite(); |             OnMemoryWrite(); | ||||||
|  | @ -330,7 +327,7 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, | ||||||
|     } |     } | ||||||
|     default: { |     default: { | ||||||
|         for (std::size_t i = 0; i < amount; i++) { |         for (std::size_t i = 0; i < amount; i++) { | ||||||
|             CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); |             CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|     } |     } | ||||||
|  | @ -360,16 +357,15 @@ void Maxwell3D::StepInstance(const MMEDrawMode expected_mode, const u32 count) { | ||||||
|     StepInstance(expected_mode, count); |     StepInstance(expected_mode, count); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void Maxwell3D::CallMethodFromMME(const GPU::MethodCall& method_call) { | void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) { | ||||||
|     const u32 method = method_call.method; |  | ||||||
|     if (mme_inline[method]) { |     if (mme_inline[method]) { | ||||||
|         regs.reg_array[method] = method_call.argument; |         regs.reg_array[method] = method_argument; | ||||||
|         if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count) || |         if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count) || | ||||||
|             method == MAXWELL3D_REG_INDEX(index_array.count)) { |             method == MAXWELL3D_REG_INDEX(index_array.count)) { | ||||||
|             const MMEDrawMode expected_mode = method == MAXWELL3D_REG_INDEX(vertex_buffer.count) |             const MMEDrawMode expected_mode = method == MAXWELL3D_REG_INDEX(vertex_buffer.count) | ||||||
|                                                   ? MMEDrawMode::Array |                                                   ? MMEDrawMode::Array | ||||||
|                                                   : MMEDrawMode::Indexed; |                                                   : MMEDrawMode::Indexed; | ||||||
|             StepInstance(expected_mode, method_call.argument); |             StepInstance(expected_mode, method_argument); | ||||||
|         } else if (method == MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)) { |         } else if (method == MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)) { | ||||||
|             mme_draw.instance_mode = |             mme_draw.instance_mode = | ||||||
|                 (regs.draw.instance_next != 0) || (regs.draw.instance_cont != 0); |                 (regs.draw.instance_next != 0) || (regs.draw.instance_cont != 0); | ||||||
|  | @ -381,7 +377,7 @@ void Maxwell3D::CallMethodFromMME(const GPU::MethodCall& method_call) { | ||||||
|         if (mme_draw.current_mode != MMEDrawMode::Undefined) { |         if (mme_draw.current_mode != MMEDrawMode::Undefined) { | ||||||
|             FlushMMEInlineDraw(); |             FlushMMEInlineDraw(); | ||||||
|         } |         } | ||||||
|         CallMethod(method_call); |         CallMethod(method, method_argument, true); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -19,6 +19,7 @@ | ||||||
| #include "common/math_util.h" | #include "common/math_util.h" | ||||||
| #include "video_core/engines/const_buffer_engine_interface.h" | #include "video_core/engines/const_buffer_engine_interface.h" | ||||||
| #include "video_core/engines/const_buffer_info.h" | #include "video_core/engines/const_buffer_info.h" | ||||||
|  | #include "video_core/engines/engine_interface.h" | ||||||
| #include "video_core/engines/engine_upload.h" | #include "video_core/engines/engine_upload.h" | ||||||
| #include "video_core/engines/shader_type.h" | #include "video_core/engines/shader_type.h" | ||||||
| #include "video_core/gpu.h" | #include "video_core/gpu.h" | ||||||
|  | @ -48,7 +49,7 @@ namespace Tegra::Engines { | ||||||
| #define MAXWELL3D_REG_INDEX(field_name)                                                            \ | #define MAXWELL3D_REG_INDEX(field_name)                                                            \ | ||||||
|     (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) |     (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) | ||||||
| 
 | 
 | ||||||
| class Maxwell3D final : public ConstBufferEngineInterface { | class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface { | ||||||
| public: | public: | ||||||
|     explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, |     explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||||||
|                        MemoryManager& memory_manager); |                        MemoryManager& memory_manager); | ||||||
|  | @ -1360,13 +1361,14 @@ public: | ||||||
|     u32 GetRegisterValue(u32 method) const; |     u32 GetRegisterValue(u32 method) const; | ||||||
| 
 | 
 | ||||||
|     /// Write the value to the register identified by method.
 |     /// Write the value to the register identified by method.
 | ||||||
|     void CallMethod(const GPU::MethodCall& method_call); |     void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; | ||||||
| 
 | 
 | ||||||
|     /// Write multiple values to the register identified by method.
 |     /// Write multiple values to the register identified by method.
 | ||||||
|     void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); |     void CallMultiMethod(u32 method, const u32* base_start, u32 amount, | ||||||
|  |                          u32 methods_pending) override; | ||||||
| 
 | 
 | ||||||
|     /// Write the value to the register identified by method.
 |     /// Write the value to the register identified by method.
 | ||||||
|     void CallMethodFromMME(const GPU::MethodCall& method_call); |     void CallMethodFromMME(u32 method, u32 method_argument); | ||||||
| 
 | 
 | ||||||
|     void FlushMMEInlineDraw(); |     void FlushMMEInlineDraw(); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -17,16 +17,16 @@ namespace Tegra::Engines { | ||||||
| MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager) | MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager) | ||||||
|     : system{system}, memory_manager{memory_manager} {} |     : system{system}, memory_manager{memory_manager} {} | ||||||
| 
 | 
 | ||||||
| void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { | void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) { | ||||||
|     ASSERT_MSG(method_call.method < Regs::NUM_REGS, |     ASSERT_MSG(method < Regs::NUM_REGS, | ||||||
|                "Invalid MaxwellDMA register, increase the size of the Regs structure"); |                "Invalid MaxwellDMA register, increase the size of the Regs structure"); | ||||||
| 
 | 
 | ||||||
|     regs.reg_array[method_call.method] = method_call.argument; |     regs.reg_array[method] = method_argument; | ||||||
| 
 | 
 | ||||||
| #define MAXWELLDMA_REG_INDEX(field_name)                                                           \ | #define MAXWELLDMA_REG_INDEX(field_name)                                                           \ | ||||||
|     (offsetof(Tegra::Engines::MaxwellDMA::Regs, field_name) / sizeof(u32)) |     (offsetof(Tegra::Engines::MaxwellDMA::Regs, field_name) / sizeof(u32)) | ||||||
| 
 | 
 | ||||||
|     switch (method_call.method) { |     switch (method) { | ||||||
|     case MAXWELLDMA_REG_INDEX(exec): { |     case MAXWELLDMA_REG_INDEX(exec): { | ||||||
|         HandleCopy(); |         HandleCopy(); | ||||||
|         break; |         break; | ||||||
|  | @ -39,7 +39,7 @@ void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { | ||||||
| void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount, | void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount, | ||||||
|                                  u32 methods_pending) { |                                  u32 methods_pending) { | ||||||
|     for (std::size_t i = 0; i < amount; i++) { |     for (std::size_t i = 0; i < amount; i++) { | ||||||
|         CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); |         CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -90,7 +90,47 @@ void MaxwellDMA::HandleCopy() { | ||||||
|     ASSERT(regs.exec.enable_2d == 1); |     ASSERT(regs.exec.enable_2d == 1); | ||||||
| 
 | 
 | ||||||
|     if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { |     if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { | ||||||
|  | 
 | ||||||
|         ASSERT(regs.src_params.BlockDepth() == 0); |         ASSERT(regs.src_params.BlockDepth() == 0); | ||||||
|  |         // Optimized path for micro copies.
 | ||||||
|  |         if (regs.dst_pitch * regs.y_count < Texture::GetGOBSize() && regs.dst_pitch <= 64) { | ||||||
|  |             const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count; | ||||||
|  |             const std::size_t src_size = Texture::GetGOBSize(); | ||||||
|  |             const std::size_t dst_size = regs.dst_pitch * regs.y_count; | ||||||
|  |             u32 pos_x = regs.src_params.pos_x; | ||||||
|  |             u32 pos_y = regs.src_params.pos_y; | ||||||
|  |             const u64 offset = | ||||||
|  |                 Texture::GetGOBOffset(regs.src_params.size_x, regs.src_params.size_y, pos_x, pos_y, | ||||||
|  |                                       regs.src_params.BlockDepth(), bytes_per_pixel); | ||||||
|  |             const u32 x_in_gob = 64 / bytes_per_pixel; | ||||||
|  |             pos_x = pos_x % x_in_gob; | ||||||
|  |             pos_y = pos_y % 8; | ||||||
|  | 
 | ||||||
|  |             if (read_buffer.size() < src_size) { | ||||||
|  |                 read_buffer.resize(src_size); | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             if (write_buffer.size() < dst_size) { | ||||||
|  |                 write_buffer.resize(dst_size); | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             if (Settings::IsGPULevelExtreme()) { | ||||||
|  |                 memory_manager.ReadBlock(source + offset, read_buffer.data(), src_size); | ||||||
|  |                 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); | ||||||
|  |             } else { | ||||||
|  |                 memory_manager.ReadBlockUnsafe(source + offset, read_buffer.data(), src_size); | ||||||
|  |                 memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size); | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, | ||||||
|  |                                       regs.src_params.size_x, bytes_per_pixel, read_buffer.data(), | ||||||
|  |                                       write_buffer.data(), regs.src_params.BlockHeight(), pos_x, | ||||||
|  |                                       pos_y); | ||||||
|  | 
 | ||||||
|  |             memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); | ||||||
|  | 
 | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|         // If the input is tiled and the output is linear, deswizzle the input and copy it over.
 |         // If the input is tiled and the output is linear, deswizzle the input and copy it over.
 | ||||||
|         const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count; |         const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count; | ||||||
|         const std::size_t src_size = Texture::CalculateSize( |         const std::size_t src_size = Texture::CalculateSize( | ||||||
|  |  | ||||||
|  | @ -10,6 +10,7 @@ | ||||||
| #include "common/bit_field.h" | #include "common/bit_field.h" | ||||||
| #include "common/common_funcs.h" | #include "common/common_funcs.h" | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
|  | #include "video_core/engines/engine_interface.h" | ||||||
| #include "video_core/gpu.h" | #include "video_core/gpu.h" | ||||||
| 
 | 
 | ||||||
| namespace Core { | namespace Core { | ||||||
|  | @ -27,16 +28,17 @@ namespace Tegra::Engines { | ||||||
|  * https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml
 |  * https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml
 | ||||||
|  */ |  */ | ||||||
| 
 | 
 | ||||||
| class MaxwellDMA final { | class MaxwellDMA final : public EngineInterface { | ||||||
| public: | public: | ||||||
|     explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager); |     explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager); | ||||||
|     ~MaxwellDMA() = default; |     ~MaxwellDMA() = default; | ||||||
| 
 | 
 | ||||||
|     /// Write the value to the register identified by method.
 |     /// Write the value to the register identified by method.
 | ||||||
|     void CallMethod(const GPU::MethodCall& method_call); |     void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; | ||||||
| 
 | 
 | ||||||
|     /// Write multiple values to the register identified by method.
 |     /// Write multiple values to the register identified by method.
 | ||||||
|     void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); |     void CallMultiMethod(u32 method, const u32* base_start, u32 amount, | ||||||
|  |                          u32 methods_pending) override; | ||||||
| 
 | 
 | ||||||
|     struct Regs { |     struct Regs { | ||||||
|         static constexpr std::size_t NUM_REGS = 0x1D6; |         static constexpr std::size_t NUM_REGS = 0x1D6; | ||||||
|  |  | ||||||
|  | @ -299,19 +299,21 @@ void GPU::CallEngineMethod(const MethodCall& method_call) { | ||||||
| 
 | 
 | ||||||
|     switch (engine) { |     switch (engine) { | ||||||
|     case EngineID::FERMI_TWOD_A: |     case EngineID::FERMI_TWOD_A: | ||||||
|         fermi_2d->CallMethod(method_call); |         fermi_2d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); | ||||||
|         break; |         break; | ||||||
|     case EngineID::MAXWELL_B: |     case EngineID::MAXWELL_B: | ||||||
|         maxwell_3d->CallMethod(method_call); |         maxwell_3d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); | ||||||
|         break; |         break; | ||||||
|     case EngineID::KEPLER_COMPUTE_B: |     case EngineID::KEPLER_COMPUTE_B: | ||||||
|         kepler_compute->CallMethod(method_call); |         kepler_compute->CallMethod(method_call.method, method_call.argument, | ||||||
|  |                                    method_call.IsLastCall()); | ||||||
|         break; |         break; | ||||||
|     case EngineID::MAXWELL_DMA_COPY_A: |     case EngineID::MAXWELL_DMA_COPY_A: | ||||||
|         maxwell_dma->CallMethod(method_call); |         maxwell_dma->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); | ||||||
|         break; |         break; | ||||||
|     case EngineID::KEPLER_INLINE_TO_MEMORY_B: |     case EngineID::KEPLER_INLINE_TO_MEMORY_B: | ||||||
|         kepler_memory->CallMethod(method_call); |         kepler_memory->CallMethod(method_call.method, method_call.argument, | ||||||
|  |                                   method_call.IsLastCall()); | ||||||
|         break; |         break; | ||||||
|     default: |     default: | ||||||
|         UNIMPLEMENTED_MSG("Unimplemented engine"); |         UNIMPLEMENTED_MSG("Unimplemented engine"); | ||||||
|  | @ -347,7 +349,27 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) { | ||||||
|     // Bind the current subchannel to the desired engine id.
 |     // Bind the current subchannel to the desired engine id.
 | ||||||
|     LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, |     LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, | ||||||
|               method_call.argument); |               method_call.argument); | ||||||
|     bound_engines[method_call.subchannel] = static_cast<EngineID>(method_call.argument); |     const auto engine_id = static_cast<EngineID>(method_call.argument); | ||||||
|  |     bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id); | ||||||
|  |     switch (engine_id) { | ||||||
|  |     case EngineID::FERMI_TWOD_A: | ||||||
|  |         dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel); | ||||||
|  |         break; | ||||||
|  |     case EngineID::MAXWELL_B: | ||||||
|  |         dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel); | ||||||
|  |         break; | ||||||
|  |     case EngineID::KEPLER_COMPUTE_B: | ||||||
|  |         dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel); | ||||||
|  |         break; | ||||||
|  |     case EngineID::MAXWELL_DMA_COPY_A: | ||||||
|  |         dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel); | ||||||
|  |         break; | ||||||
|  |     case EngineID::KEPLER_INLINE_TO_MEMORY_B: | ||||||
|  |         dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel); | ||||||
|  |         break; | ||||||
|  |     default: | ||||||
|  |         UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", static_cast<u32>(engine_id)); | ||||||
|  |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void GPU::ProcessSemaphoreTriggerMethod() { | void GPU::ProcessSemaphoreTriggerMethod() { | ||||||
|  |  | ||||||
|  | @ -328,7 +328,7 @@ void MacroInterpreter::SetMethodAddress(u32 address) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void MacroInterpreter::Send(u32 value) { | void MacroInterpreter::Send(u32 value) { | ||||||
|     maxwell3d.CallMethodFromMME({method_address.address, value}); |     maxwell3d.CallMethodFromMME(method_address.address, value); | ||||||
|     // Increment the method address by the method increment.
 |     // Increment the method address by the method increment.
 | ||||||
|     method_address.address.Assign(method_address.address.Value() + |     method_address.address.Assign(method_address.address.Value() + | ||||||
|                                   method_address.increment.Value()); |                                   method_address.increment.Value()); | ||||||
|  |  | ||||||
|  | @ -382,4 +382,18 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | u64 GetGOBOffset(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, | ||||||
|  |                  u32 bytes_per_pixel) { | ||||||
|  |     auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; | ||||||
|  |     const u32 gobs_in_block = 1 << block_height; | ||||||
|  |     const u32 y_blocks = gob_size_y << block_height; | ||||||
|  |     const u32 x_per_gob = gob_size_x / bytes_per_pixel; | ||||||
|  |     const u32 x_blocks = div_ceil(width, x_per_gob); | ||||||
|  |     const u32 block_size = gob_size * gobs_in_block; | ||||||
|  |     const u32 stride = block_size * x_blocks; | ||||||
|  |     const u32 base = (dst_y / y_blocks) * stride + (dst_x / x_per_gob) * block_size; | ||||||
|  |     const u32 relative_y = dst_y % y_blocks; | ||||||
|  |     return base + (relative_y / gob_size_y) * gob_size; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| } // namespace Tegra::Texture
 | } // namespace Tegra::Texture
 | ||||||
|  |  | ||||||
|  | @ -59,4 +59,8 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 | ||||||
| void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, | void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, | ||||||
|                    std::size_t copy_size, const u8* source_data, u8* swizzle_data); |                    std::size_t copy_size, const u8* source_data, u8* swizzle_data); | ||||||
| 
 | 
 | ||||||
|  | /// Obtains the offset of the gob for positions 'dst_x' & 'dst_y'
 | ||||||
|  | u64 GetGOBOffset(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, | ||||||
|  |                  u32 bytes_per_pixel); | ||||||
|  | 
 | ||||||
| } // namespace Tegra::Texture
 | } // namespace Tegra::Texture
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 bunnei
						bunnei