forked from eden-emu/eden
		
	MacroHLE: Add HLE replacement for base vertex and base instance.
This commit is contained in:
		
							parent
							
								
									209ce46a9f
								
							
						
					
					
						commit
						7842543573
					
				
					 22 changed files with 265 additions and 70 deletions
				
			
		|  | @ -339,6 +339,10 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { | ||||||
|             const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)}; |             const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)}; | ||||||
|             return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base)); |             return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base)); | ||||||
|         } |         } | ||||||
|  |     case IR::Attribute::BaseInstance: | ||||||
|  |         return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_instance)); | ||||||
|  |     case IR::Attribute::BaseVertex: | ||||||
|  |         return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_vertex)); | ||||||
|     case IR::Attribute::FrontFace: |     case IR::Attribute::FrontFace: | ||||||
|         return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face), |         return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face), | ||||||
|                             ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits<u32>::max())), |                             ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits<u32>::max())), | ||||||
|  | @ -380,6 +384,10 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) { | ||||||
|             const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)}; |             const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)}; | ||||||
|             return ctx.OpISub(ctx.U32[1], index, base); |             return ctx.OpISub(ctx.U32[1], index, base); | ||||||
|         } |         } | ||||||
|  |     case IR::Attribute::BaseInstance: | ||||||
|  |         return ctx.OpLoad(ctx.U32[1], ctx.base_instance); | ||||||
|  |     case IR::Attribute::BaseVertex: | ||||||
|  |         return ctx.OpLoad(ctx.U32[1], ctx.base_vertex); | ||||||
|     default: |     default: | ||||||
|         throw NotImplementedException("Read U32 attribute {}", attr); |         throw NotImplementedException("Read U32 attribute {}", attr); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  | @ -1379,18 +1379,28 @@ void EmitContext::DefineInputs(const IR::Program& program) { | ||||||
|     if (loads[IR::Attribute::InstanceId]) { |     if (loads[IR::Attribute::InstanceId]) { | ||||||
|         if (profile.support_vertex_instance_id) { |         if (profile.support_vertex_instance_id) { | ||||||
|             instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId); |             instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId); | ||||||
|  |             if (loads[IR::Attribute::BaseInstance]) { | ||||||
|  |                 base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); | ||||||
|  |             } | ||||||
|         } else { |         } else { | ||||||
|             instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex); |             instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex); | ||||||
|             base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance); |             base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance); | ||||||
|         } |         } | ||||||
|  |     } else if (loads[IR::Attribute::BaseInstance]) { | ||||||
|  |         base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance); | ||||||
|     } |     } | ||||||
|     if (loads[IR::Attribute::VertexId]) { |     if (loads[IR::Attribute::VertexId]) { | ||||||
|         if (profile.support_vertex_instance_id) { |         if (profile.support_vertex_instance_id) { | ||||||
|             vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId); |             vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId); | ||||||
|  |             if (loads[IR::Attribute::BaseVertex]) { | ||||||
|  |                 base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); | ||||||
|  |             } | ||||||
|         } else { |         } else { | ||||||
|             vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex); |             vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex); | ||||||
|             base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); |             base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); | ||||||
|         } |         } | ||||||
|  |     } else if (loads[IR::Attribute::BaseVertex]) { | ||||||
|  |         base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); | ||||||
|     } |     } | ||||||
|     if (loads[IR::Attribute::FrontFace]) { |     if (loads[IR::Attribute::FrontFace]) { | ||||||
|         front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing); |         front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing); | ||||||
|  |  | ||||||
|  | @ -34,6 +34,11 @@ public: | ||||||
| 
 | 
 | ||||||
|     [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0; |     [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0; | ||||||
| 
 | 
 | ||||||
|  |     [[nodiscard]] virtual bool HasHLEMacroState() const = 0; | ||||||
|  | 
 | ||||||
|  |     [[nodiscard]] virtual std::optional<ReplaceConstant> GetReplaceConstBuffer( | ||||||
|  |         u32 bank, u32 offset) = 0; | ||||||
|  | 
 | ||||||
|     virtual void Dump(u64 hash) = 0; |     virtual void Dump(u64 hash) = 0; | ||||||
| 
 | 
 | ||||||
|     [[nodiscard]] const ProgramHeader& SPH() const noexcept { |     [[nodiscard]] const ProgramHeader& SPH() const noexcept { | ||||||
|  |  | ||||||
|  | @ -446,6 +446,10 @@ std::string NameOf(Attribute attribute) { | ||||||
|         return "ViewportMask"; |         return "ViewportMask"; | ||||||
|     case Attribute::FrontFace: |     case Attribute::FrontFace: | ||||||
|         return "FrontFace"; |         return "FrontFace"; | ||||||
|  |     case Attribute::BaseInstance: | ||||||
|  |         return "BaseInstance"; | ||||||
|  |     case Attribute::BaseVertex: | ||||||
|  |         return "BaseVertex"; | ||||||
|     } |     } | ||||||
|     return fmt::format("<reserved attribute {}>", static_cast<int>(attribute)); |     return fmt::format("<reserved attribute {}>", static_cast<int>(attribute)); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -219,6 +219,10 @@ enum class Attribute : u64 { | ||||||
|     FixedFncTexture9Q = 231, |     FixedFncTexture9Q = 231, | ||||||
|     ViewportMask = 232, |     ViewportMask = 232, | ||||||
|     FrontFace = 255, |     FrontFace = 255, | ||||||
|  | 
 | ||||||
|  |     // Implementation attributes
 | ||||||
|  |     BaseInstance = 256, | ||||||
|  |     BaseVertex = 257, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| constexpr size_t NUM_GENERICS = 32; | constexpr size_t NUM_GENERICS = 32; | ||||||
|  |  | ||||||
|  | @ -294,6 +294,14 @@ F32 IREmitter::GetAttribute(IR::Attribute attribute, const U32& vertex) { | ||||||
|     return Inst<F32>(Opcode::GetAttribute, attribute, vertex); |     return Inst<F32>(Opcode::GetAttribute, attribute, vertex); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | U32 IREmitter::GetAttributeU32(IR::Attribute attribute) { | ||||||
|  |     return GetAttributeU32(attribute, Imm32(0)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | U32 IREmitter::GetAttributeU32(IR::Attribute attribute, const U32& vertex) { | ||||||
|  |     return Inst<U32>(Opcode::GetAttributeU32, attribute, vertex); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) { | void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) { | ||||||
|     Inst(Opcode::SetAttribute, attribute, value, vertex); |     Inst(Opcode::SetAttribute, attribute, value, vertex); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -74,6 +74,8 @@ public: | ||||||
| 
 | 
 | ||||||
|     [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); |     [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); | ||||||
|     [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex); |     [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex); | ||||||
|  |     [[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute); | ||||||
|  |     [[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute, const U32& vertex); | ||||||
|     void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex); |     void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex); | ||||||
| 
 | 
 | ||||||
|     [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address); |     [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address); | ||||||
|  |  | ||||||
|  | @ -219,7 +219,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | ||||||
|     } |     } | ||||||
|     Optimization::SsaRewritePass(program); |     Optimization::SsaRewritePass(program); | ||||||
| 
 | 
 | ||||||
|     Optimization::ConstantPropagationPass(program); |     Optimization::ConstantPropagationPass(env, program); | ||||||
| 
 | 
 | ||||||
|     Optimization::PositionPass(env, program); |     Optimization::PositionPass(env, program); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -7,6 +7,7 @@ | ||||||
| #include <type_traits> | #include <type_traits> | ||||||
| 
 | 
 | ||||||
| #include "common/bit_cast.h" | #include "common/bit_cast.h" | ||||||
|  | #include "shader_recompiler/environment.h" | ||||||
| #include "shader_recompiler/exception.h" | #include "shader_recompiler/exception.h" | ||||||
| #include "shader_recompiler/frontend/ir/ir_emitter.h" | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||||||
| #include "shader_recompiler/frontend/ir/value.h" | #include "shader_recompiler/frontend/ir/value.h" | ||||||
|  | @ -515,6 +516,8 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { | ||||||
|             case IR::Attribute::PrimitiveId: |             case IR::Attribute::PrimitiveId: | ||||||
|             case IR::Attribute::InstanceId: |             case IR::Attribute::InstanceId: | ||||||
|             case IR::Attribute::VertexId: |             case IR::Attribute::VertexId: | ||||||
|  |             case IR::Attribute::BaseVertex: | ||||||
|  |             case IR::Attribute::BaseInstance: | ||||||
|                 break; |                 break; | ||||||
|             default: |             default: | ||||||
|                 return; |                 return; | ||||||
|  | @ -644,7 +647,37 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void ConstantPropagation(IR::Block& block, IR::Inst& inst) { | void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) { | ||||||
|  |     const IR::Value bank{inst.Arg(0)}; | ||||||
|  |     const IR::Value offset{inst.Arg(1)}; | ||||||
|  |     if (!bank.IsImmediate() || !offset.IsImmediate()) { | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |     const auto bank_value = bank.U32(); | ||||||
|  |     const auto offset_value = offset.U32(); | ||||||
|  |     auto replacement = env.GetReplaceConstBuffer(bank_value, offset_value); | ||||||
|  |     if (!replacement) { | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |     const auto new_attribute = [replacement]() { | ||||||
|  |         switch (*replacement) { | ||||||
|  |         case ReplaceConstant::BaseInstance: | ||||||
|  |             return IR::Attribute::BaseInstance; | ||||||
|  |         case ReplaceConstant::BaseVertex: | ||||||
|  |             return IR::Attribute::BaseVertex; | ||||||
|  |         default: | ||||||
|  |             throw NotImplementedException("Not implemented replacement variable {}", *replacement); | ||||||
|  |         } | ||||||
|  |     }(); | ||||||
|  |     IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||||||
|  |     if (inst.GetOpcode() == IR::Opcode::GetCbufU32) { | ||||||
|  |         inst.ReplaceUsesWith(ir.GetAttributeU32(new_attribute)); | ||||||
|  |     } else { | ||||||
|  |         inst.ReplaceUsesWith(ir.GetAttribute(new_attribute)); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) { | ||||||
|     switch (inst.GetOpcode()) { |     switch (inst.GetOpcode()) { | ||||||
|     case IR::Opcode::GetRegister: |     case IR::Opcode::GetRegister: | ||||||
|         return FoldGetRegister(inst); |         return FoldGetRegister(inst); | ||||||
|  | @ -789,18 +822,24 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { | ||||||
|                                     IR::Opcode::CompositeInsertF16x4); |                                     IR::Opcode::CompositeInsertF16x4); | ||||||
|     case IR::Opcode::FSwizzleAdd: |     case IR::Opcode::FSwizzleAdd: | ||||||
|         return FoldFSwizzleAdd(block, inst); |         return FoldFSwizzleAdd(block, inst); | ||||||
|  |     case IR::Opcode::GetCbufF32: | ||||||
|  |     case IR::Opcode::GetCbufU32: | ||||||
|  |         if (env.HasHLEMacroState()) { | ||||||
|  |             return FoldConstBuffer(env, block, inst); | ||||||
|  |         } | ||||||
|  |         break; | ||||||
|     default: |     default: | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
| } | } | ||||||
| } // Anonymous namespace
 | } // Anonymous namespace
 | ||||||
| 
 | 
 | ||||||
| void ConstantPropagationPass(IR::Program& program) { | void ConstantPropagationPass(Environment& env, IR::Program& program) { | ||||||
|     const auto end{program.post_order_blocks.rend()}; |     const auto end{program.post_order_blocks.rend()}; | ||||||
|     for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) { |     for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) { | ||||||
|         IR::Block* const block{*it}; |         IR::Block* const block{*it}; | ||||||
|         for (IR::Inst& inst : block->Instructions()) { |         for (IR::Inst& inst : block->Instructions()) { | ||||||
|             ConstantPropagation(*block, inst); |             ConstantPropagation(env, *block, inst); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -13,7 +13,7 @@ struct HostTranslateInfo; | ||||||
| namespace Shader::Optimization { | namespace Shader::Optimization { | ||||||
| 
 | 
 | ||||||
| void CollectShaderInfoPass(Environment& env, IR::Program& program); | void CollectShaderInfoPass(Environment& env, IR::Program& program); | ||||||
| void ConstantPropagationPass(IR::Program& program); | void ConstantPropagationPass(Environment& env, IR::Program& program); | ||||||
| void DeadCodeEliminationPass(IR::Program& program); | void DeadCodeEliminationPass(IR::Program& program); | ||||||
| void GlobalMemoryToStorageBufferPass(IR::Program& program); | void GlobalMemoryToStorageBufferPass(IR::Program& program); | ||||||
| void IdentityRemovalPass(IR::Program& program); | void IdentityRemovalPass(IR::Program& program); | ||||||
|  |  | ||||||
|  | @ -16,6 +16,11 @@ | ||||||
| 
 | 
 | ||||||
| namespace Shader { | namespace Shader { | ||||||
| 
 | 
 | ||||||
|  | enum class ReplaceConstant : u32 { | ||||||
|  |     BaseInstance, | ||||||
|  |     BaseVertex, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| enum class TextureType : u32 { | enum class TextureType : u32 { | ||||||
|     Color1D, |     Color1D, | ||||||
|     ColorArray1D, |     ColorArray1D, | ||||||
|  |  | ||||||
|  | @ -11,7 +11,7 @@ | ||||||
| namespace Shader { | namespace Shader { | ||||||
| 
 | 
 | ||||||
| struct VaryingState { | struct VaryingState { | ||||||
|     std::bitset<256> mask{}; |     std::bitset<512> mask{}; | ||||||
| 
 | 
 | ||||||
|     void Set(IR::Attribute attribute, bool state = true) { |     void Set(IR::Attribute attribute, bool state = true) { | ||||||
|         mask[static_cast<size_t>(attribute)] = state; |         mask[static_cast<size_t>(attribute)] = state; | ||||||
|  |  | ||||||
|  | @ -182,7 +182,13 @@ u32 Maxwell3D::GetMaxCurrentVertices() { | ||||||
| size_t Maxwell3D::EstimateIndexBufferSize() { | size_t Maxwell3D::EstimateIndexBufferSize() { | ||||||
|     GPUVAddr start_address = regs.index_buffer.StartAddress(); |     GPUVAddr start_address = regs.index_buffer.StartAddress(); | ||||||
|     GPUVAddr end_address = regs.index_buffer.EndAddress(); |     GPUVAddr end_address = regs.index_buffer.EndAddress(); | ||||||
|     return std::min<size_t>(memory_manager.GetMemoryLayoutSize(start_address), |     constexpr std::array<size_t, 4> max_sizes = { | ||||||
|  |         std::numeric_limits<u8>::max(), std::numeric_limits<u16>::max(), | ||||||
|  |         std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()}; | ||||||
|  |     const size_t byte_size = regs.index_buffer.FormatSizeInBytes(); | ||||||
|  |     return std::min<size_t>( | ||||||
|  |         memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[byte_size]) / | ||||||
|  |             byte_size, | ||||||
|         static_cast<size_t>(end_address - start_address)); |         static_cast<size_t>(end_address - start_address)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -572,4 +578,9 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const { | ||||||
|     return regs.reg_array[method]; |     return regs.reg_array[method]; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | void Maxwell3D::setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name) { | ||||||
|  |     const u64 key = (static_cast<u64>(bank) << 32) | offset; | ||||||
|  |     replace_table.emplace(key, name); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| } // namespace Tegra::Engines
 | } // namespace Tegra::Engines
 | ||||||
|  |  | ||||||
|  | @ -3020,6 +3020,23 @@ public: | ||||||
|     /// Store temporary hw register values, used by some calls to restore state after a operation
 |     /// Store temporary hw register values, used by some calls to restore state after a operation
 | ||||||
|     Regs shadow_state; |     Regs shadow_state; | ||||||
| 
 | 
 | ||||||
|  |     // None Engine
 | ||||||
|  |     enum class EngineHint : u32 { | ||||||
|  |         None = 0x0, | ||||||
|  |         OnHLEMacro = 0x1, | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     EngineHint engine_state{EngineHint::None}; | ||||||
|  | 
 | ||||||
|  |     enum class HLEReplaceName : u32 { | ||||||
|  |         BaseVertex = 0x0, | ||||||
|  |         BaseInstance = 0x1, | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     void setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name); | ||||||
|  | 
 | ||||||
|  |     std::unordered_map<u64, HLEReplaceName> replace_table; | ||||||
|  | 
 | ||||||
|     static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size"); |     static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size"); | ||||||
|     static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable"); |     static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable"); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -14,26 +14,29 @@ | ||||||
| #include "video_core/rasterizer_interface.h" | #include "video_core/rasterizer_interface.h" | ||||||
| 
 | 
 | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
|  | 
 | ||||||
|  | using Maxwell = Engines::Maxwell3D; | ||||||
|  | 
 | ||||||
| namespace { | namespace { | ||||||
| 
 | 
 | ||||||
| bool IsTopologySafe(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) { | bool IsTopologySafe(Maxwell::Regs::PrimitiveTopology topology) { | ||||||
|     switch (topology) { |     switch (topology) { | ||||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points: |     case Maxwell::Regs::PrimitiveTopology::Points: | ||||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines: |     case Maxwell::Regs::PrimitiveTopology::Lines: | ||||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineLoop: |     case Maxwell::Regs::PrimitiveTopology::LineLoop: | ||||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip: |     case Maxwell::Regs::PrimitiveTopology::LineStrip: | ||||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles: |     case Maxwell::Regs::PrimitiveTopology::Triangles: | ||||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: |     case Maxwell::Regs::PrimitiveTopology::TriangleStrip: | ||||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan: |     case Maxwell::Regs::PrimitiveTopology::TriangleFan: | ||||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: |     case Maxwell::Regs::PrimitiveTopology::LinesAdjacency: | ||||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: |     case Maxwell::Regs::PrimitiveTopology::LineStripAdjacency: | ||||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: |     case Maxwell::Regs::PrimitiveTopology::TrianglesAdjacency: | ||||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: |     case Maxwell::Regs::PrimitiveTopology::TriangleStripAdjacency: | ||||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Patches: |     case Maxwell::Regs::PrimitiveTopology::Patches: | ||||||
|         return true; |         return true; | ||||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Quads: |     case Maxwell::Regs::PrimitiveTopology::Quads: | ||||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::QuadStrip: |     case Maxwell::Regs::PrimitiveTopology::QuadStrip: | ||||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Polygon: |     case Maxwell::Regs::PrimitiveTopology::Polygon: | ||||||
|     default: |     default: | ||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
|  | @ -82,8 +85,7 @@ public: | ||||||
|         : HLEMacroImpl(maxwell3d_), extended(extended_) {} |         : HLEMacroImpl(maxwell3d_), extended(extended_) {} | ||||||
| 
 | 
 | ||||||
|     void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |     void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { | ||||||
|         auto topology = |         auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]); | ||||||
|             static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]); |  | ||||||
|         if (!IsTopologySafe(topology)) { |         if (!IsTopologySafe(topology)) { | ||||||
|             Fallback(parameters); |             Fallback(parameters); | ||||||
|             return; |             return; | ||||||
|  | @ -99,18 +101,16 @@ public: | ||||||
|         params.stride = 0; |         params.stride = 0; | ||||||
| 
 | 
 | ||||||
|         if (extended) { |         if (extended) { | ||||||
|             maxwell3d.CallMethod(0x8e3, 0x640, true); |             maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; | ||||||
|             maxwell3d.CallMethod(0x8e4, parameters[4], true); |             maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         maxwell3d.draw_manager->DrawArrayIndirect(topology); |         maxwell3d.draw_manager->DrawArrayIndirect(topology); | ||||||
| 
 | 
 | ||||||
|         if (extended) { |         if (extended) { | ||||||
|             maxwell3d.CallMethod(0x8e3, 0x640, true); |             maxwell3d.engine_state = Maxwell::EngineHint::None; | ||||||
|             maxwell3d.CallMethod(0x8e4, 0, true); |             maxwell3d.replace_table.clear(); | ||||||
|         } |         } | ||||||
|         maxwell3d.regs.vertex_buffer.first = 0; |  | ||||||
|         maxwell3d.regs.vertex_buffer.count = 0; |  | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|  | @ -134,13 +134,18 @@ private: | ||||||
| 
 | 
 | ||||||
|         const u32 base_instance = parameters[4]; |         const u32 base_instance = parameters[4]; | ||||||
|         if (extended) { |         if (extended) { | ||||||
|             maxwell3d.CallMethod(0x8e3, 0x640, true); |             maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; | ||||||
|             maxwell3d.CallMethod(0x8e4, base_instance, true); |             maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         maxwell3d.draw_manager->DrawArray( |         maxwell3d.draw_manager->DrawArray( | ||||||
|             static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), |             static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), | ||||||
|             vertex_first, vertex_count, base_instance, instance_count); |             vertex_first, vertex_count, base_instance, instance_count); | ||||||
|  | 
 | ||||||
|  |         if (extended) { | ||||||
|  |             maxwell3d.engine_state = Maxwell::EngineHint::None; | ||||||
|  |             maxwell3d.replace_table.clear(); | ||||||
|  |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     bool extended; |     bool extended; | ||||||
|  | @ -151,8 +156,7 @@ public: | ||||||
|     explicit HLE_DrawIndexedIndirect(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} |     explicit HLE_DrawIndexedIndirect(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} | ||||||
| 
 | 
 | ||||||
|     void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |     void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { | ||||||
|         auto topology = |         auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]); | ||||||
|             static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]); |  | ||||||
|         if (!IsTopologySafe(topology)) { |         if (!IsTopologySafe(topology)) { | ||||||
|             Fallback(parameters); |             Fallback(parameters); | ||||||
|             return; |             return; | ||||||
|  | @ -164,16 +168,12 @@ public: | ||||||
|             minimum_limit = std::max(parameters[3], minimum_limit); |             minimum_limit = std::max(parameters[3], minimum_limit); | ||||||
|         } |         } | ||||||
|         const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize()); |         const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize()); | ||||||
|         const u32 base_size = std::max(minimum_limit, estimate); |         const u32 base_size = std::max<u32>(minimum_limit, estimate); | ||||||
|         const u32 element_base = parameters[4]; |  | ||||||
|         const u32 base_instance = parameters[5]; |  | ||||||
|         maxwell3d.regs.index_buffer.first = 0; |  | ||||||
|         maxwell3d.regs.index_buffer.count = base_size; // Use a fixed size, just for mapping
 |  | ||||||
|         maxwell3d.regs.draw.topology.Assign(topology); |         maxwell3d.regs.draw.topology.Assign(topology); | ||||||
|         maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; |         maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; | ||||||
|         maxwell3d.CallMethod(0x8e3, 0x640, true); |         maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; | ||||||
|         maxwell3d.CallMethod(0x8e4, element_base, true); |         maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); | ||||||
|         maxwell3d.CallMethod(0x8e5, base_instance, true); |         maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); | ||||||
|         auto& params = maxwell3d.draw_manager->GetIndirectParams(); |         auto& params = maxwell3d.draw_manager->GetIndirectParams(); | ||||||
|         params.is_indexed = true; |         params.is_indexed = true; | ||||||
|         params.include_count = false; |         params.include_count = false; | ||||||
|  | @ -184,9 +184,8 @@ public: | ||||||
|         params.stride = 0; |         params.stride = 0; | ||||||
|         maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; |         maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; | ||||||
|         maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size); |         maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size); | ||||||
|         maxwell3d.CallMethod(0x8e3, 0x640, true); |         maxwell3d.engine_state = Maxwell::EngineHint::None; | ||||||
|         maxwell3d.CallMethod(0x8e4, 0x0, true); |         maxwell3d.replace_table.clear(); | ||||||
|         maxwell3d.CallMethod(0x8e5, 0x0, true); |  | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|  | @ -197,18 +196,17 @@ private: | ||||||
|         const u32 base_instance = parameters[5]; |         const u32 base_instance = parameters[5]; | ||||||
|         maxwell3d.regs.vertex_id_base = element_base; |         maxwell3d.regs.vertex_id_base = element_base; | ||||||
|         maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; |         maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; | ||||||
|         maxwell3d.CallMethod(0x8e3, 0x640, true); |         maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; | ||||||
|         maxwell3d.CallMethod(0x8e4, element_base, true); |         maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); | ||||||
|         maxwell3d.CallMethod(0x8e5, base_instance, true); |         maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); | ||||||
| 
 | 
 | ||||||
|         maxwell3d.draw_manager->DrawIndex( |         maxwell3d.draw_manager->DrawIndex( | ||||||
|             static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), |             static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), | ||||||
|             parameters[3], parameters[1], element_base, base_instance, instance_count); |             parameters[3], parameters[1], element_base, base_instance, instance_count); | ||||||
| 
 | 
 | ||||||
|         maxwell3d.regs.vertex_id_base = 0x0; |         maxwell3d.regs.vertex_id_base = 0x0; | ||||||
|         maxwell3d.CallMethod(0x8e3, 0x640, true); |         maxwell3d.engine_state = Maxwell::EngineHint::None; | ||||||
|         maxwell3d.CallMethod(0x8e4, 0x0, true); |         maxwell3d.replace_table.clear(); | ||||||
|         maxwell3d.CallMethod(0x8e5, 0x0, true); |  | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     u32 minimum_limit{1 << 18}; |     u32 minimum_limit{1 << 18}; | ||||||
|  | @ -238,8 +236,7 @@ public: | ||||||
|         : HLEMacroImpl(maxwell3d_) {} |         : HLEMacroImpl(maxwell3d_) {} | ||||||
| 
 | 
 | ||||||
|     void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |     void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { | ||||||
|         const auto topology = |         const auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[2]); | ||||||
|             static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]); |  | ||||||
|         if (!IsTopologySafe(topology)) { |         if (!IsTopologySafe(topology)) { | ||||||
|             Fallback(parameters); |             Fallback(parameters); | ||||||
|             return; |             return; | ||||||
|  | @ -277,9 +274,6 @@ public: | ||||||
|         } |         } | ||||||
|         const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize()); |         const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize()); | ||||||
|         const u32 base_size = std::max(minimum_limit, estimate); |         const u32 base_size = std::max(minimum_limit, estimate); | ||||||
| 
 |  | ||||||
|         maxwell3d.regs.index_buffer.first = 0; |  | ||||||
|         maxwell3d.regs.index_buffer.count = std::max(highest_limit, base_size); |  | ||||||
|         maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; |         maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; | ||||||
|         auto& params = maxwell3d.draw_manager->GetIndirectParams(); |         auto& params = maxwell3d.draw_manager->GetIndirectParams(); | ||||||
|         params.is_indexed = true; |         params.is_indexed = true; | ||||||
|  | @ -290,7 +284,12 @@ public: | ||||||
|         params.max_draw_counts = draw_count; |         params.max_draw_counts = draw_count; | ||||||
|         params.stride = stride; |         params.stride = stride; | ||||||
|         maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; |         maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; | ||||||
|         maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, highest_limit); |         maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; | ||||||
|  |         maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); | ||||||
|  |         maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); | ||||||
|  |         maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size); | ||||||
|  |         maxwell3d.engine_state = Maxwell::EngineHint::None; | ||||||
|  |         maxwell3d.replace_table.clear(); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|  | @ -299,9 +298,8 @@ private: | ||||||
|             // Clean everything.
 |             // Clean everything.
 | ||||||
|             // Clean everything.
 |             // Clean everything.
 | ||||||
|             maxwell3d.regs.vertex_id_base = 0x0; |             maxwell3d.regs.vertex_id_base = 0x0; | ||||||
|             maxwell3d.CallMethod(0x8e3, 0x640, true); |             maxwell3d.engine_state = Maxwell::EngineHint::None; | ||||||
|             maxwell3d.CallMethod(0x8e4, 0x0, true); |             maxwell3d.replace_table.clear(); | ||||||
|             maxwell3d.CallMethod(0x8e5, 0x0, true); |  | ||||||
|         }); |         }); | ||||||
|         maxwell3d.RefreshParameters(); |         maxwell3d.RefreshParameters(); | ||||||
|         const u32 start_indirect = parameters[0]; |         const u32 start_indirect = parameters[0]; | ||||||
|  | @ -310,8 +308,7 @@ private: | ||||||
|             // Nothing to do.
 |             // Nothing to do.
 | ||||||
|             return; |             return; | ||||||
|         } |         } | ||||||
|         const auto topology = |         const auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[2]); | ||||||
|             static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]); |  | ||||||
|         maxwell3d.regs.draw.topology.Assign(topology); |         maxwell3d.regs.draw.topology.Assign(topology); | ||||||
|         const u32 padding = parameters[3]; |         const u32 padding = parameters[3]; | ||||||
|         const std::size_t max_draws = parameters[4]; |         const std::size_t max_draws = parameters[4]; | ||||||
|  | @ -326,9 +323,9 @@ private: | ||||||
|             const u32 base_vertex = parameters[base + 3]; |             const u32 base_vertex = parameters[base + 3]; | ||||||
|             const u32 base_instance = parameters[base + 4]; |             const u32 base_instance = parameters[base + 4]; | ||||||
|             maxwell3d.regs.vertex_id_base = base_vertex; |             maxwell3d.regs.vertex_id_base = base_vertex; | ||||||
|             maxwell3d.CallMethod(0x8e3, 0x640, true); |             maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; | ||||||
|             maxwell3d.CallMethod(0x8e4, base_vertex, true); |             maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); | ||||||
|             maxwell3d.CallMethod(0x8e5, base_instance, true); |             maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); | ||||||
|             maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; |             maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; | ||||||
|             maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base], |             maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base], | ||||||
|                                               base_vertex, base_instance, parameters[base + 1]); |                                               base_vertex, base_instance, parameters[base + 1]); | ||||||
|  |  | ||||||
|  | @ -577,7 +577,7 @@ size_t MemoryManager::MaxContinousRange(GPUVAddr gpu_addr, size_t size) const { | ||||||
|     return range_so_far; |     return range_so_far; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const { | size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) const { | ||||||
|     PTEKind base_kind = GetPageKind(gpu_addr); |     PTEKind base_kind = GetPageKind(gpu_addr); | ||||||
|     if (base_kind == PTEKind::INVALID) { |     if (base_kind == PTEKind::INVALID) { | ||||||
|         return 0; |         return 0; | ||||||
|  | @ -596,6 +596,10 @@ size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const { | ||||||
|             return true; |             return true; | ||||||
|         } |         } | ||||||
|         range_so_far += copy_amount; |         range_so_far += copy_amount; | ||||||
|  |         if (range_so_far >= max_size) { | ||||||
|  |             result = true; | ||||||
|  |             return true; | ||||||
|  |         } | ||||||
|         return false; |         return false; | ||||||
|     }; |     }; | ||||||
|     auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |     auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||||
|  | @ -605,6 +609,10 @@ size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const { | ||||||
|             return true; |             return true; | ||||||
|         } |         } | ||||||
|         range_so_far += copy_amount; |         range_so_far += copy_amount; | ||||||
|  |         if (range_so_far >= max_size) { | ||||||
|  |             result = true; | ||||||
|  |             return true; | ||||||
|  |         } | ||||||
|         return false; |         return false; | ||||||
|     }; |     }; | ||||||
|     auto check_short_pages = [&](std::size_t page_index, std::size_t offset, |     auto check_short_pages = [&](std::size_t page_index, std::size_t offset, | ||||||
|  |  | ||||||
|  | @ -118,7 +118,8 @@ public: | ||||||
| 
 | 
 | ||||||
|     PTEKind GetPageKind(GPUVAddr gpu_addr) const; |     PTEKind GetPageKind(GPUVAddr gpu_addr) const; | ||||||
| 
 | 
 | ||||||
|     size_t GetMemoryLayoutSize(GPUVAddr gpu_addr) const; |     size_t GetMemoryLayoutSize(GPUVAddr gpu_addr, | ||||||
|  |                                size_t max_size = std::numeric_limits<size_t>::max()) const; | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> |     template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> | ||||||
|  |  | ||||||
|  | @ -97,6 +97,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, | ||||||
|     smooth_lines.Assign(regs.line_anti_alias_enable != 0 ? 1 : 0); |     smooth_lines.Assign(regs.line_anti_alias_enable != 0 ? 1 : 0); | ||||||
|     alpha_to_coverage_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_coverage != 0 ? 1 : 0); |     alpha_to_coverage_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_coverage != 0 ? 1 : 0); | ||||||
|     alpha_to_one_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_one != 0 ? 1 : 0); |     alpha_to_one_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_one != 0 ? 1 : 0); | ||||||
|  |     app_stage.Assign(maxwell3d.engine_state); | ||||||
| 
 | 
 | ||||||
|     for (size_t i = 0; i < regs.rt.size(); ++i) { |     for (size_t i = 0; i < regs.rt.size(); ++i) { | ||||||
|         color_formats[i] = static_cast<u8>(regs.rt[i].format); |         color_formats[i] = static_cast<u8>(regs.rt[i].format); | ||||||
|  |  | ||||||
|  | @ -197,6 +197,7 @@ struct FixedPipelineState { | ||||||
|         BitField<14, 1, u32> smooth_lines; |         BitField<14, 1, u32> smooth_lines; | ||||||
|         BitField<15, 1, u32> alpha_to_coverage_enabled; |         BitField<15, 1, u32> alpha_to_coverage_enabled; | ||||||
|         BitField<16, 1, u32> alpha_to_one_enabled; |         BitField<16, 1, u32> alpha_to_one_enabled; | ||||||
|  |         BitField<17, 3, Tegra::Engines::Maxwell3D::EngineHint> app_stage; | ||||||
|     }; |     }; | ||||||
|     std::array<u8, Maxwell::NumRenderTargets> color_formats; |     std::array<u8, Maxwell::NumRenderTargets> color_formats; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -54,7 +54,7 @@ using VideoCommon::FileEnvironment; | ||||||
| using VideoCommon::GenericEnvironment; | using VideoCommon::GenericEnvironment; | ||||||
| using VideoCommon::GraphicsEnvironment; | using VideoCommon::GraphicsEnvironment; | ||||||
| 
 | 
 | ||||||
| constexpr u32 CACHE_VERSION = 8; | constexpr u32 CACHE_VERSION = 9; | ||||||
| 
 | 
 | ||||||
| template <typename Container> | template <typename Container> | ||||||
| auto MakeSpan(Container& container) { | auto MakeSpan(Container& container) { | ||||||
|  |  | ||||||
|  | @ -202,12 +202,15 @@ void GenericEnvironment::Serialize(std::ofstream& file) const { | ||||||
|     const u64 num_texture_types{static_cast<u64>(texture_types.size())}; |     const u64 num_texture_types{static_cast<u64>(texture_types.size())}; | ||||||
|     const u64 num_texture_pixel_formats{static_cast<u64>(texture_pixel_formats.size())}; |     const u64 num_texture_pixel_formats{static_cast<u64>(texture_pixel_formats.size())}; | ||||||
|     const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())}; |     const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())}; | ||||||
|  |     const u64 num_cbuf_replacement_values{static_cast<u64>(cbuf_replacements.size())}; | ||||||
| 
 | 
 | ||||||
|     file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size)) |     file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size)) | ||||||
|         .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types)) |         .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types)) | ||||||
|         .write(reinterpret_cast<const char*>(&num_texture_pixel_formats), |         .write(reinterpret_cast<const char*>(&num_texture_pixel_formats), | ||||||
|                sizeof(num_texture_pixel_formats)) |                sizeof(num_texture_pixel_formats)) | ||||||
|         .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values)) |         .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values)) | ||||||
|  |         .write(reinterpret_cast<const char*>(&num_cbuf_replacement_values), | ||||||
|  |                sizeof(num_cbuf_replacement_values)) | ||||||
|         .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size)) |         .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size)) | ||||||
|         .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound)) |         .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound)) | ||||||
|         .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address)) |         .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address)) | ||||||
|  | @ -229,6 +232,10 @@ void GenericEnvironment::Serialize(std::ofstream& file) const { | ||||||
|         file.write(reinterpret_cast<const char*>(&key), sizeof(key)) |         file.write(reinterpret_cast<const char*>(&key), sizeof(key)) | ||||||
|             .write(reinterpret_cast<const char*>(&type), sizeof(type)); |             .write(reinterpret_cast<const char*>(&type), sizeof(type)); | ||||||
|     } |     } | ||||||
|  |     for (const auto& [key, type] : cbuf_replacements) { | ||||||
|  |         file.write(reinterpret_cast<const char*>(&key), sizeof(key)) | ||||||
|  |             .write(reinterpret_cast<const char*>(&type), sizeof(type)); | ||||||
|  |     } | ||||||
|     if (stage == Shader::Stage::Compute) { |     if (stage == Shader::Stage::Compute) { | ||||||
|         file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size)) |         file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size)) | ||||||
|             .write(reinterpret_cast<const char*>(&shared_memory_size), sizeof(shared_memory_size)); |             .write(reinterpret_cast<const char*>(&shared_memory_size), sizeof(shared_memory_size)); | ||||||
|  | @ -318,6 +325,8 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, | ||||||
|     ASSERT(local_size <= std::numeric_limits<u32>::max()); |     ASSERT(local_size <= std::numeric_limits<u32>::max()); | ||||||
|     local_memory_size = static_cast<u32>(local_size) + sph.common3.shader_local_memory_crs_size; |     local_memory_size = static_cast<u32>(local_size) + sph.common3.shader_local_memory_crs_size; | ||||||
|     texture_bound = maxwell3d->regs.bindless_texture_const_buffer_slot; |     texture_bound = maxwell3d->regs.bindless_texture_const_buffer_slot; | ||||||
|  |     has_hle_engine_state = | ||||||
|  |         maxwell3d->engine_state == Tegra::Engines::Maxwell3D::EngineHint::OnHLEMacro; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { | u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { | ||||||
|  | @ -331,6 +340,30 @@ u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { | ||||||
|     return value; |     return value; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | std::optional<Shader::ReplaceConstant> GraphicsEnvironment::GetReplaceConstBuffer(u32 bank, | ||||||
|  |                                                                                   u32 offset) { | ||||||
|  |     if (!has_hle_engine_state) { | ||||||
|  |         return std::nullopt; | ||||||
|  |     } | ||||||
|  |     const u64 key = (static_cast<u64>(bank) << 32) | static_cast<u64>(offset); | ||||||
|  |     auto it = maxwell3d->replace_table.find(key); | ||||||
|  |     if (it == maxwell3d->replace_table.end()) { | ||||||
|  |         return std::nullopt; | ||||||
|  |     } | ||||||
|  |     const auto converted_value = [](Tegra::Engines::Maxwell3D::HLEReplaceName name) { | ||||||
|  |         switch (name) { | ||||||
|  |         case Tegra::Engines::Maxwell3D::HLEReplaceName::BaseVertex: | ||||||
|  |             return Shader::ReplaceConstant::BaseVertex; | ||||||
|  |         case Tegra::Engines::Maxwell3D::HLEReplaceName::BaseInstance: | ||||||
|  |             return Shader::ReplaceConstant::BaseInstance; | ||||||
|  |         default: | ||||||
|  |             UNREACHABLE(); | ||||||
|  |         } | ||||||
|  |     }(it->second); | ||||||
|  |     cbuf_replacements.emplace(key, converted_value); | ||||||
|  |     return converted_value; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) { | Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) { | ||||||
|     const auto& regs{maxwell3d->regs}; |     const auto& regs{maxwell3d->regs}; | ||||||
|     const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding}; |     const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding}; | ||||||
|  | @ -409,11 +442,14 @@ void FileEnvironment::Deserialize(std::ifstream& file) { | ||||||
|     u64 num_texture_types{}; |     u64 num_texture_types{}; | ||||||
|     u64 num_texture_pixel_formats{}; |     u64 num_texture_pixel_formats{}; | ||||||
|     u64 num_cbuf_values{}; |     u64 num_cbuf_values{}; | ||||||
|  |     u64 num_cbuf_replacement_values{}; | ||||||
|     file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size)) |     file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size)) | ||||||
|         .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types)) |         .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types)) | ||||||
|         .read(reinterpret_cast<char*>(&num_texture_pixel_formats), |         .read(reinterpret_cast<char*>(&num_texture_pixel_formats), | ||||||
|               sizeof(num_texture_pixel_formats)) |               sizeof(num_texture_pixel_formats)) | ||||||
|         .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values)) |         .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values)) | ||||||
|  |         .read(reinterpret_cast<char*>(&num_cbuf_replacement_values), | ||||||
|  |               sizeof(num_cbuf_replacement_values)) | ||||||
|         .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size)) |         .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size)) | ||||||
|         .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound)) |         .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound)) | ||||||
|         .read(reinterpret_cast<char*>(&start_address), sizeof(start_address)) |         .read(reinterpret_cast<char*>(&start_address), sizeof(start_address)) | ||||||
|  | @ -444,6 +480,13 @@ void FileEnvironment::Deserialize(std::ifstream& file) { | ||||||
|             .read(reinterpret_cast<char*>(&value), sizeof(value)); |             .read(reinterpret_cast<char*>(&value), sizeof(value)); | ||||||
|         cbuf_values.emplace(key, value); |         cbuf_values.emplace(key, value); | ||||||
|     } |     } | ||||||
|  |     for (size_t i = 0; i < num_cbuf_replacement_values; ++i) { | ||||||
|  |         u64 key; | ||||||
|  |         Shader::ReplaceConstant value; | ||||||
|  |         file.read(reinterpret_cast<char*>(&key), sizeof(key)) | ||||||
|  |             .read(reinterpret_cast<char*>(&value), sizeof(value)); | ||||||
|  |         cbuf_replacements.emplace(key, value); | ||||||
|  |     } | ||||||
|     if (stage == Shader::Stage::Compute) { |     if (stage == Shader::Stage::Compute) { | ||||||
|         file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size)) |         file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size)) | ||||||
|             .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size)); |             .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size)); | ||||||
|  | @ -512,6 +555,16 @@ std::array<u32, 3> FileEnvironment::WorkgroupSize() const { | ||||||
|     return workgroup_size; |     return workgroup_size; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | std::optional<Shader::ReplaceConstant> FileEnvironment::GetReplaceConstBuffer(u32 bank, | ||||||
|  |                                                                               u32 offset) { | ||||||
|  |     const u64 key = (static_cast<u64>(bank) << 32) | static_cast<u64>(offset); | ||||||
|  |     auto it = cbuf_replacements.find(key); | ||||||
|  |     if (it == cbuf_replacements.end()) { | ||||||
|  |         return std::nullopt; | ||||||
|  |     } | ||||||
|  |     return it->second; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs, | void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs, | ||||||
|                        const std::filesystem::path& filename, u32 cache_version) try { |                        const std::filesystem::path& filename, u32 cache_version) try { | ||||||
|     std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app); |     std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app); | ||||||
|  |  | ||||||
|  | @ -60,6 +60,10 @@ public: | ||||||
| 
 | 
 | ||||||
|     void Serialize(std::ofstream& file) const; |     void Serialize(std::ofstream& file) const; | ||||||
| 
 | 
 | ||||||
|  |     bool HasHLEMacroState() const override { | ||||||
|  |         return has_hle_engine_state; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
| protected: | protected: | ||||||
|     std::optional<u64> TryFindSize(); |     std::optional<u64> TryFindSize(); | ||||||
| 
 | 
 | ||||||
|  | @ -73,6 +77,7 @@ protected: | ||||||
|     std::unordered_map<u32, Shader::TextureType> texture_types; |     std::unordered_map<u32, Shader::TextureType> texture_types; | ||||||
|     std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats; |     std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats; | ||||||
|     std::unordered_map<u64, u32> cbuf_values; |     std::unordered_map<u64, u32> cbuf_values; | ||||||
|  |     std::unordered_map<u64, Shader::ReplaceConstant> cbuf_replacements; | ||||||
| 
 | 
 | ||||||
|     u32 local_memory_size{}; |     u32 local_memory_size{}; | ||||||
|     u32 texture_bound{}; |     u32 texture_bound{}; | ||||||
|  | @ -89,6 +94,7 @@ protected: | ||||||
|     u32 viewport_transform_state = 1; |     u32 viewport_transform_state = 1; | ||||||
| 
 | 
 | ||||||
|     bool has_unbound_instructions = false; |     bool has_unbound_instructions = false; | ||||||
|  |     bool has_hle_engine_state = false; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| class GraphicsEnvironment final : public GenericEnvironment { | class GraphicsEnvironment final : public GenericEnvironment { | ||||||
|  | @ -109,6 +115,8 @@ public: | ||||||
| 
 | 
 | ||||||
|     u32 ReadViewportTransformState() override; |     u32 ReadViewportTransformState() override; | ||||||
| 
 | 
 | ||||||
|  |     std::optional<Shader::ReplaceConstant> GetReplaceConstBuffer(u32 bank, u32 offset) override; | ||||||
|  | 
 | ||||||
| private: | private: | ||||||
|     Tegra::Engines::Maxwell3D* maxwell3d{}; |     Tegra::Engines::Maxwell3D* maxwell3d{}; | ||||||
|     size_t stage_index{}; |     size_t stage_index{}; | ||||||
|  | @ -131,6 +139,11 @@ public: | ||||||
| 
 | 
 | ||||||
|     u32 ReadViewportTransformState() override; |     u32 ReadViewportTransformState() override; | ||||||
| 
 | 
 | ||||||
|  |     std::optional<Shader::ReplaceConstant> GetReplaceConstBuffer( | ||||||
|  |         [[maybe_unused]] u32 bank, [[maybe_unused]] u32 offset) override { | ||||||
|  |         return std::nullopt; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
| private: | private: | ||||||
|     Tegra::Engines::KeplerCompute* kepler_compute{}; |     Tegra::Engines::KeplerCompute* kepler_compute{}; | ||||||
| }; | }; | ||||||
|  | @ -166,6 +179,13 @@ public: | ||||||
| 
 | 
 | ||||||
|     [[nodiscard]] std::array<u32, 3> WorkgroupSize() const override; |     [[nodiscard]] std::array<u32, 3> WorkgroupSize() const override; | ||||||
| 
 | 
 | ||||||
|  |     [[nodiscard]] std::optional<Shader::ReplaceConstant> GetReplaceConstBuffer(u32 bank, | ||||||
|  |                                                                                u32 offset) override; | ||||||
|  | 
 | ||||||
|  |     [[nodiscard]] bool HasHLEMacroState() const override { | ||||||
|  |         return cbuf_replacements.size() != 0; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     void Dump(u64 hash) override; |     void Dump(u64 hash) override; | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|  | @ -173,6 +193,7 @@ private: | ||||||
|     std::unordered_map<u32, Shader::TextureType> texture_types; |     std::unordered_map<u32, Shader::TextureType> texture_types; | ||||||
|     std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats; |     std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats; | ||||||
|     std::unordered_map<u64, u32> cbuf_values; |     std::unordered_map<u64, u32> cbuf_values; | ||||||
|  |     std::unordered_map<u64, Shader::ReplaceConstant> cbuf_replacements; | ||||||
|     std::array<u32, 3> workgroup_size{}; |     std::array<u32, 3> workgroup_size{}; | ||||||
|     u32 local_memory_size{}; |     u32 local_memory_size{}; | ||||||
|     u32 shared_memory_size{}; |     u32 shared_memory_size{}; | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Fernando Sahmkow
						Fernando Sahmkow