forked from eden-emu/eden
		
	shader: Constant propagation and global memory to storage buffer
This commit is contained in:
		
							parent
							
								
									f5605b424f
								
							
						
					
					
						commit
						37fc39de17
					
				
					 17 changed files with 652 additions and 63 deletions
				
			
		
							
								
								
									
										146
									
								
								src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										146
									
								
								src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,146 @@ | |||
| // Copyright 2021 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include <algorithm> | ||||
| #include <type_traits> | ||||
| 
 | ||||
| #include "common/bit_util.h" | ||||
| #include "shader_recompiler/exception.h" | ||||
| #include "shader_recompiler/frontend/ir/microinstruction.h" | ||||
| #include "shader_recompiler/ir_opt/passes.h" | ||||
| 
 | ||||
| namespace Shader::Optimization { | ||||
| namespace { | ||||
| [[nodiscard]] u32 BitFieldUExtract(u32 base, u32 shift, u32 count) { | ||||
|     if (static_cast<size_t>(shift) + static_cast<size_t>(count) > Common::BitSize<u32>()) { | ||||
|         throw LogicError("Undefined result in BitFieldUExtract({}, {}, {})", base, shift, count); | ||||
|     } | ||||
|     return (base >> shift) & ((1U << count) - 1); | ||||
| } | ||||
| 
 | ||||
| template <typename T> | ||||
| [[nodiscard]] T Arg(const IR::Value& value) { | ||||
|     if constexpr (std::is_same_v<T, bool>) { | ||||
|         return value.U1(); | ||||
|     } else if constexpr (std::is_same_v<T, u32>) { | ||||
|         return value.U32(); | ||||
|     } else if constexpr (std::is_same_v<T, u64>) { | ||||
|         return value.U64(); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| template <typename ImmFn> | ||||
| bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { | ||||
|     const auto arg = [](const IR::Value& value) { | ||||
|         if constexpr (std::is_invocable_r_v<bool, ImmFn, bool, bool>) { | ||||
|             return value.U1(); | ||||
|         } else if constexpr (std::is_invocable_r_v<u32, ImmFn, u32, u32>) { | ||||
|             return value.U32(); | ||||
|         } else if constexpr (std::is_invocable_r_v<u64, ImmFn, u64, u64>) { | ||||
|             return value.U64(); | ||||
|         } | ||||
|     }; | ||||
|     const IR::Value lhs{inst.Arg(0)}; | ||||
|     const IR::Value rhs{inst.Arg(1)}; | ||||
| 
 | ||||
|     const bool is_lhs_immediate{lhs.IsImmediate()}; | ||||
|     const bool is_rhs_immediate{rhs.IsImmediate()}; | ||||
| 
 | ||||
|     if (is_lhs_immediate && is_rhs_immediate) { | ||||
|         const auto result{imm_fn(arg(lhs), arg(rhs))}; | ||||
|         inst.ReplaceUsesWith(IR::Value{result}); | ||||
|         return false; | ||||
|     } | ||||
|     if (is_lhs_immediate && !is_rhs_immediate) { | ||||
|         IR::Inst* const rhs_inst{rhs.InstRecursive()}; | ||||
|         if (rhs_inst->Opcode() == inst.Opcode() && rhs_inst->Arg(1).IsImmediate()) { | ||||
|             const auto combined{imm_fn(arg(lhs), arg(rhs_inst->Arg(1)))}; | ||||
|             inst.SetArg(0, rhs_inst->Arg(0)); | ||||
|             inst.SetArg(1, IR::Value{combined}); | ||||
|         } else { | ||||
|             // Normalize
 | ||||
|             inst.SetArg(0, rhs); | ||||
|             inst.SetArg(1, lhs); | ||||
|         } | ||||
|     } | ||||
|     if (!is_lhs_immediate && is_rhs_immediate) { | ||||
|         const IR::Inst* const lhs_inst{lhs.InstRecursive()}; | ||||
|         if (lhs_inst->Opcode() == inst.Opcode() && lhs_inst->Arg(1).IsImmediate()) { | ||||
|             const auto combined{imm_fn(arg(rhs), arg(lhs_inst->Arg(1)))}; | ||||
|             inst.SetArg(0, lhs_inst->Arg(0)); | ||||
|             inst.SetArg(1, IR::Value{combined}); | ||||
|         } | ||||
|     } | ||||
|     return true; | ||||
| } | ||||
| 
 | ||||
| void FoldGetRegister(IR::Inst& inst) { | ||||
|     if (inst.Arg(0).Reg() == IR::Reg::RZ) { | ||||
|         inst.ReplaceUsesWith(IR::Value{u32{0}}); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void FoldGetPred(IR::Inst& inst) { | ||||
|     if (inst.Arg(0).Pred() == IR::Pred::PT) { | ||||
|         inst.ReplaceUsesWith(IR::Value{true}); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| template <typename T> | ||||
| void FoldAdd(IR::Inst& inst) { | ||||
|     if (inst.HasAssociatedPseudoOperation()) { | ||||
|         return; | ||||
|     } | ||||
|     if (!FoldCommutative(inst, [](T a, T b) { return a + b; })) { | ||||
|         return; | ||||
|     } | ||||
|     const IR::Value rhs{inst.Arg(1)}; | ||||
|     if (rhs.IsImmediate() && Arg<T>(rhs) == 0) { | ||||
|         inst.ReplaceUsesWith(inst.Arg(0)); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void FoldLogicalAnd(IR::Inst& inst) { | ||||
|     if (!FoldCommutative(inst, [](bool a, bool b) { return a && b; })) { | ||||
|         return; | ||||
|     } | ||||
|     const IR::Value rhs{inst.Arg(1)}; | ||||
|     if (rhs.IsImmediate()) { | ||||
|         if (rhs.U1()) { | ||||
|             inst.ReplaceUsesWith(inst.Arg(0)); | ||||
|         } else { | ||||
|             inst.ReplaceUsesWith(IR::Value{false}); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void ConstantPropagation(IR::Inst& inst) { | ||||
|     switch (inst.Opcode()) { | ||||
|     case IR::Opcode::GetRegister: | ||||
|         return FoldGetRegister(inst); | ||||
|     case IR::Opcode::GetPred: | ||||
|         return FoldGetPred(inst); | ||||
|     case IR::Opcode::IAdd32: | ||||
|         return FoldAdd<u32>(inst); | ||||
|     case IR::Opcode::IAdd64: | ||||
|         return FoldAdd<u64>(inst); | ||||
|     case IR::Opcode::BitFieldUExtract: | ||||
|         if (inst.AreAllArgsImmediates() && !inst.HasAssociatedPseudoOperation()) { | ||||
|             inst.ReplaceUsesWith(IR::Value{ | ||||
|                 BitFieldUExtract(inst.Arg(0).U32(), inst.Arg(1).U32(), inst.Arg(2).U32())}); | ||||
|         } | ||||
|         break; | ||||
|     case IR::Opcode::LogicalAnd: | ||||
|         return FoldLogicalAnd(inst); | ||||
|     default: | ||||
|         break; | ||||
|     } | ||||
| } | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| void ConstantPropagationPass(IR::Block& block) { | ||||
|     std::ranges::for_each(block, ConstantPropagation); | ||||
| } | ||||
| 
 | ||||
| } // namespace Shader::Optimization
 | ||||
|  | @ -0,0 +1,331 @@ | |||
| // Copyright 2021 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include <algorithm> | ||||
| #include <compare> | ||||
| #include <optional> | ||||
| #include <ranges> | ||||
| 
 | ||||
| #include <boost/container/flat_set.hpp> | ||||
| #include <boost/container/small_vector.hpp> | ||||
| 
 | ||||
| #include "shader_recompiler/frontend/ir/basic_block.h" | ||||
| #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||||
| #include "shader_recompiler/frontend/ir/microinstruction.h" | ||||
| #include "shader_recompiler/ir_opt/passes.h" | ||||
| 
 | ||||
| namespace Shader::Optimization { | ||||
| namespace { | ||||
| /// Address in constant buffers to the storage buffer descriptor
 | ||||
| struct StorageBufferAddr { | ||||
|     auto operator<=>(const StorageBufferAddr&) const noexcept = default; | ||||
| 
 | ||||
|     u32 index; | ||||
|     u32 offset; | ||||
| }; | ||||
| 
 | ||||
| /// Block iterator to a global memory instruction and the storage buffer it uses
 | ||||
| struct StorageInst { | ||||
|     StorageBufferAddr storage_buffer; | ||||
|     IR::Block::iterator inst; | ||||
| }; | ||||
| 
 | ||||
| /// Bias towards a certain range of constant buffers when looking for storage buffers
 | ||||
| struct Bias { | ||||
|     u32 index; | ||||
|     u32 offset_begin; | ||||
|     u32 offset_end; | ||||
| }; | ||||
| 
 | ||||
| using StorageBufferSet = | ||||
|     boost::container::flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, | ||||
|                                boost::container::small_vector<StorageBufferAddr, 16>>; | ||||
| using StorageInstVector = boost::container::small_vector<StorageInst, 32>; | ||||
| 
 | ||||
| /// Returns true when the instruction is a global memory instruction
 | ||||
| bool IsGlobalMemory(const IR::Inst& inst) { | ||||
|     switch (inst.Opcode()) { | ||||
|     case IR::Opcode::LoadGlobalS8: | ||||
|     case IR::Opcode::LoadGlobalU8: | ||||
|     case IR::Opcode::LoadGlobalS16: | ||||
|     case IR::Opcode::LoadGlobalU16: | ||||
|     case IR::Opcode::LoadGlobal32: | ||||
|     case IR::Opcode::LoadGlobal64: | ||||
|     case IR::Opcode::LoadGlobal128: | ||||
|     case IR::Opcode::WriteGlobalS8: | ||||
|     case IR::Opcode::WriteGlobalU8: | ||||
|     case IR::Opcode::WriteGlobalS16: | ||||
|     case IR::Opcode::WriteGlobalU16: | ||||
|     case IR::Opcode::WriteGlobal32: | ||||
|     case IR::Opcode::WriteGlobal64: | ||||
|     case IR::Opcode::WriteGlobal128: | ||||
|         return true; | ||||
|     default: | ||||
|         return false; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| /// Converts a global memory opcode to its storage buffer equivalent
 | ||||
| IR::Opcode GlobalToStorage(IR::Opcode opcode) { | ||||
|     switch (opcode) { | ||||
|     case IR::Opcode::LoadGlobalS8: | ||||
|         return IR::Opcode::LoadStorageS8; | ||||
|     case IR::Opcode::LoadGlobalU8: | ||||
|         return IR::Opcode::LoadStorageU8; | ||||
|     case IR::Opcode::LoadGlobalS16: | ||||
|         return IR::Opcode::LoadStorageS16; | ||||
|     case IR::Opcode::LoadGlobalU16: | ||||
|         return IR::Opcode::LoadStorageU16; | ||||
|     case IR::Opcode::LoadGlobal32: | ||||
|         return IR::Opcode::LoadStorage32; | ||||
|     case IR::Opcode::LoadGlobal64: | ||||
|         return IR::Opcode::LoadStorage64; | ||||
|     case IR::Opcode::LoadGlobal128: | ||||
|         return IR::Opcode::LoadStorage128; | ||||
|     case IR::Opcode::WriteGlobalS8: | ||||
|         return IR::Opcode::WriteStorageS8; | ||||
|     case IR::Opcode::WriteGlobalU8: | ||||
|         return IR::Opcode::WriteStorageU8; | ||||
|     case IR::Opcode::WriteGlobalS16: | ||||
|         return IR::Opcode::WriteStorageS16; | ||||
|     case IR::Opcode::WriteGlobalU16: | ||||
|         return IR::Opcode::WriteStorageU16; | ||||
|     case IR::Opcode::WriteGlobal32: | ||||
|         return IR::Opcode::WriteStorage32; | ||||
|     case IR::Opcode::WriteGlobal64: | ||||
|         return IR::Opcode::WriteStorage64; | ||||
|     case IR::Opcode::WriteGlobal128: | ||||
|         return IR::Opcode::WriteStorage128; | ||||
|     default: | ||||
|         throw InvalidArgument("Invalid global memory opcode {}", opcode); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| /// Returns true when a storage buffer address satisfies a bias
 | ||||
| bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept { | ||||
|     return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin && | ||||
|            storage_buffer.offset < bias.offset_end; | ||||
| } | ||||
| 
 | ||||
| /// Ignores a global memory operation, reads return zero and writes are ignored
 | ||||
| void IgnoreGlobalMemory(IR::Block& block, IR::Block::iterator inst) { | ||||
|     const IR::Value zero{u32{0}}; | ||||
|     switch (inst->Opcode()) { | ||||
|     case IR::Opcode::LoadGlobalS8: | ||||
|     case IR::Opcode::LoadGlobalU8: | ||||
|     case IR::Opcode::LoadGlobalS16: | ||||
|     case IR::Opcode::LoadGlobalU16: | ||||
|     case IR::Opcode::LoadGlobal32: | ||||
|         inst->ReplaceUsesWith(zero); | ||||
|         break; | ||||
|     case IR::Opcode::LoadGlobal64: | ||||
|         inst->ReplaceUsesWith( | ||||
|             IR::Value{&*block.PrependNewInst(inst, IR::Opcode::CompositeConstruct2, {zero, zero})}); | ||||
|         break; | ||||
|     case IR::Opcode::LoadGlobal128: | ||||
|         inst->ReplaceUsesWith(IR::Value{&*block.PrependNewInst( | ||||
|             inst, IR::Opcode::CompositeConstruct4, {zero, zero, zero, zero})}); | ||||
|         break; | ||||
|     case IR::Opcode::WriteGlobalS8: | ||||
|     case IR::Opcode::WriteGlobalU8: | ||||
|     case IR::Opcode::WriteGlobalS16: | ||||
|     case IR::Opcode::WriteGlobalU16: | ||||
|     case IR::Opcode::WriteGlobal32: | ||||
|     case IR::Opcode::WriteGlobal64: | ||||
|     case IR::Opcode::WriteGlobal128: | ||||
|         inst->Invalidate(); | ||||
|         break; | ||||
|     default: | ||||
|         throw LogicError("Invalid opcode to ignore its global memory operation {}", inst->Opcode()); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| /// Recursively tries to track the storage buffer address used by a global memory instruction
 | ||||
| std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) { | ||||
|     if (value.IsImmediate()) { | ||||
|         // Immediates can't be a storage buffer
 | ||||
|         return std::nullopt; | ||||
|     } | ||||
|     const IR::Inst* const inst{value.InstRecursive()}; | ||||
|     if (inst->Opcode() == IR::Opcode::GetCbuf) { | ||||
|         const IR::Value index{inst->Arg(0)}; | ||||
|         const IR::Value offset{inst->Arg(1)}; | ||||
|         if (!index.IsImmediate()) { | ||||
|             // Definitely not a storage buffer if it's read from a non-immediate index
 | ||||
|             return std::nullopt; | ||||
|         } | ||||
|         if (!offset.IsImmediate()) { | ||||
|             // TODO: Support SSBO arrays
 | ||||
|             return std::nullopt; | ||||
|         } | ||||
|         const StorageBufferAddr storage_buffer{ | ||||
|             .index = index.U32(), | ||||
|             .offset = offset.U32(), | ||||
|         }; | ||||
|         if (bias && !MeetsBias(storage_buffer, *bias)) { | ||||
|             // We have to blacklist some addresses in case we wrongly point to them
 | ||||
|             return std::nullopt; | ||||
|         } | ||||
|         return storage_buffer; | ||||
|     } | ||||
|     // Reversed loops are more likely to find the right result
 | ||||
|     for (size_t arg = inst->NumArgs(); arg--;) { | ||||
|         if (const std::optional storage_buffer{Track(inst->Arg(arg), bias)}) { | ||||
|             return *storage_buffer; | ||||
|         } | ||||
|     } | ||||
|     return std::nullopt; | ||||
| } | ||||
| 
 | ||||
| /// Collects the storage buffer used by a global memory instruction and the instruction itself
 | ||||
| void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, | ||||
|                            StorageBufferSet& storage_buffer_set, StorageInstVector& to_replace) { | ||||
|     // NVN puts storage buffers in a specific range, we have to bias towards these addresses to
 | ||||
|     // avoid getting false positives
 | ||||
|     static constexpr Bias nvn_bias{ | ||||
|         .index{0}, | ||||
|         .offset_begin{0x110}, | ||||
|         .offset_end{0x610}, | ||||
|     }; | ||||
|     // First try to find storage buffers in the NVN address
 | ||||
|     const IR::U64 addr{inst->Arg(0)}; | ||||
|     std::optional<StorageBufferAddr> storage_buffer{Track(addr, &nvn_bias)}; | ||||
|     if (!storage_buffer) { | ||||
|         // If it fails, track without a bias
 | ||||
|         storage_buffer = Track(addr, nullptr); | ||||
|         if (!storage_buffer) { | ||||
|             // If that also failed, drop the global memory usage
 | ||||
|             IgnoreGlobalMemory(block, inst); | ||||
|         } | ||||
|     } | ||||
|     // Collect storage buffer and the instruction
 | ||||
|     storage_buffer_set.insert(*storage_buffer); | ||||
|     to_replace.push_back(StorageInst{ | ||||
|         .storage_buffer{*storage_buffer}, | ||||
|         .inst{inst}, | ||||
|     }); | ||||
| } | ||||
| 
 | ||||
| /// Tries to track the first 32-bits of a global memory instruction
 | ||||
| std::optional<IR::U32> TrackLowAddress(IR::IREmitter& ir, IR::Inst* inst) { | ||||
|     // The first argument is the low level GPU pointer to the global memory instruction
 | ||||
|     const IR::U64 addr{inst->Arg(0)}; | ||||
|     if (addr.IsImmediate()) { | ||||
|         // Not much we can do if it's an immediate
 | ||||
|         return std::nullopt; | ||||
|     } | ||||
|     // This address is expected to either be a PackUint2x32 or a IAdd64
 | ||||
|     IR::Inst* addr_inst{addr.InstRecursive()}; | ||||
|     s32 imm_offset{0}; | ||||
|     if (addr_inst->Opcode() == IR::Opcode::IAdd64) { | ||||
|         // If it's an IAdd64, get the immediate offset it is applying and grab the address
 | ||||
|         // instruction. This expects for the instruction to be canonicalized having the address on
 | ||||
|         // the first argument and the immediate offset on the second one.
 | ||||
|         const IR::U64 imm_offset_value{addr_inst->Arg(1)}; | ||||
|         if (!imm_offset_value.IsImmediate()) { | ||||
|             return std::nullopt; | ||||
|         } | ||||
|         imm_offset = static_cast<s32>(static_cast<s64>(imm_offset_value.U64())); | ||||
|         const IR::U64 iadd_addr{addr_inst->Arg(0)}; | ||||
|         if (iadd_addr.IsImmediate()) { | ||||
|             return std::nullopt; | ||||
|         } | ||||
|         addr_inst = iadd_addr.Inst(); | ||||
|     } | ||||
|     // With IAdd64 handled, now PackUint2x32 is expected without exceptions
 | ||||
|     if (addr_inst->Opcode() != IR::Opcode::PackUint2x32) { | ||||
|         return std::nullopt; | ||||
|     } | ||||
|     // PackUint2x32 is expected to be generated from a vector
 | ||||
|     const IR::Value vector{addr_inst->Arg(0)}; | ||||
|     if (vector.IsImmediate()) { | ||||
|         return std::nullopt; | ||||
|     } | ||||
|     // This vector is expected to be a CompositeConstruct2
 | ||||
|     IR::Inst* const vector_inst{vector.InstRecursive()}; | ||||
|     if (vector_inst->Opcode() != IR::Opcode::CompositeConstruct2) { | ||||
|         return std::nullopt; | ||||
|     } | ||||
|     // Grab the first argument from the CompositeConstruct2, this is the low address.
 | ||||
|     // Re-apply the offset in case we found one.
 | ||||
|     const IR::U32 low_addr{vector_inst->Arg(0)}; | ||||
|     return imm_offset != 0 ? IR::U32{ir.IAdd(low_addr, ir.Imm32(imm_offset))} : low_addr; | ||||
| } | ||||
| 
 | ||||
| /// Returns the offset in indices (not bytes) for an equivalent storage instruction
 | ||||
| IR::U32 StorageOffset(IR::Block& block, IR::Block::iterator inst, StorageBufferAddr buffer) { | ||||
|     IR::IREmitter ir{block, inst}; | ||||
|     IR::U32 offset; | ||||
|     if (const std::optional<IR::U32> low_addr{TrackLowAddress(ir, &*inst)}) { | ||||
|         offset = *low_addr; | ||||
|     } else { | ||||
|         offset = ir.ConvertU(32, IR::U64{inst->Arg(0)}); | ||||
|     } | ||||
|     // Subtract the least significant 32 bits from the guest offset. The result is the storage
 | ||||
|     // buffer offset in bytes.
 | ||||
|     const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; | ||||
|     return ir.ISub(offset, low_cbuf); | ||||
| } | ||||
| 
 | ||||
| /// Replace a global memory load instruction with its storage buffer equivalent
 | ||||
| void ReplaceLoad(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, | ||||
|                  const IR::U32& offset) { | ||||
|     const IR::Opcode new_opcode{GlobalToStorage(inst->Opcode())}; | ||||
|     const IR::Value value{&*block.PrependNewInst(inst, new_opcode, {storage_index, offset})}; | ||||
|     inst->ReplaceUsesWith(value); | ||||
| } | ||||
| 
 | ||||
| /// Replace a global memory write instruction with its storage buffer equivalent
 | ||||
| void ReplaceWrite(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, | ||||
|                   const IR::U32& offset) { | ||||
|     const IR::Opcode new_opcode{GlobalToStorage(inst->Opcode())}; | ||||
|     block.PrependNewInst(inst, new_opcode, {storage_index, offset, inst->Arg(1)}); | ||||
|     inst->Invalidate(); | ||||
| } | ||||
| 
 | ||||
| /// Replace a global memory instruction with its storage buffer equivalent
 | ||||
| void Replace(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, | ||||
|              const IR::U32& offset) { | ||||
|     switch (inst->Opcode()) { | ||||
|     case IR::Opcode::LoadGlobalS8: | ||||
|     case IR::Opcode::LoadGlobalU8: | ||||
|     case IR::Opcode::LoadGlobalS16: | ||||
|     case IR::Opcode::LoadGlobalU16: | ||||
|     case IR::Opcode::LoadGlobal32: | ||||
|     case IR::Opcode::LoadGlobal64: | ||||
|     case IR::Opcode::LoadGlobal128: | ||||
|         return ReplaceLoad(block, inst, storage_index, offset); | ||||
|     case IR::Opcode::WriteGlobalS8: | ||||
|     case IR::Opcode::WriteGlobalU8: | ||||
|     case IR::Opcode::WriteGlobalS16: | ||||
|     case IR::Opcode::WriteGlobalU16: | ||||
|     case IR::Opcode::WriteGlobal32: | ||||
|     case IR::Opcode::WriteGlobal64: | ||||
|     case IR::Opcode::WriteGlobal128: | ||||
|         return ReplaceWrite(block, inst, storage_index, offset); | ||||
|     default: | ||||
|         throw InvalidArgument("Invalid global memory opcode {}", inst->Opcode()); | ||||
|     } | ||||
| } | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| void GlobalMemoryToStorageBufferPass(IR::Block& block) { | ||||
|     StorageBufferSet storage_buffers; | ||||
|     StorageInstVector to_replace; | ||||
| 
 | ||||
|     for (IR::Block::iterator inst{block.begin()}; inst != block.end(); ++inst) { | ||||
|         if (!IsGlobalMemory(*inst)) { | ||||
|             continue; | ||||
|         } | ||||
|         CollectStorageBuffers(block, inst, storage_buffers, to_replace); | ||||
|     } | ||||
|     for (const auto [storage_buffer, inst] : to_replace) { | ||||
|         const auto it{storage_buffers.find(storage_buffer)}; | ||||
|         const IR::U32 storage_index{IR::Value{static_cast<u32>(storage_buffers.index_of(it))}}; | ||||
|         const IR::U32 offset{StorageOffset(block, inst, storage_buffer)}; | ||||
|         Replace(block, inst, storage_index, offset); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| } // namespace Shader::Optimization
 | ||||
|  | @ -10,22 +10,24 @@ | |||
| 
 | ||||
| namespace Shader::Optimization { | ||||
| 
 | ||||
| void IdentityRemovalPass(IR::Block& block) { | ||||
| void IdentityRemovalPass(IR::Function& function) { | ||||
|     std::vector<IR::Inst*> to_invalidate; | ||||
| 
 | ||||
|     for (auto inst = block.begin(); inst != block.end();) { | ||||
|         const size_t num_args{inst->NumArgs()}; | ||||
|         for (size_t i = 0; i < num_args; ++i) { | ||||
|             IR::Value arg; | ||||
|             while ((arg = inst->Arg(i)).IsIdentity()) { | ||||
|                 inst->SetArg(i, arg.Inst()->Arg(0)); | ||||
|     for (auto& block : function.blocks) { | ||||
|         for (auto inst = block->begin(); inst != block->end();) { | ||||
|             const size_t num_args{inst->NumArgs()}; | ||||
|             for (size_t i = 0; i < num_args; ++i) { | ||||
|                 IR::Value arg; | ||||
|                 while ((arg = inst->Arg(i)).IsIdentity()) { | ||||
|                     inst->SetArg(i, arg.Inst()->Arg(0)); | ||||
|                 } | ||||
|             } | ||||
|             if (inst->Opcode() == IR::Opcode::Identity || inst->Opcode() == IR::Opcode::Void) { | ||||
|                 to_invalidate.push_back(&*inst); | ||||
|                 inst = block->Instructions().erase(inst); | ||||
|             } else { | ||||
|                 ++inst; | ||||
|             } | ||||
|         } | ||||
|         if (inst->Opcode() == IR::Opcode::Identity || inst->Opcode() == IR::Opcode::Void) { | ||||
|             to_invalidate.push_back(&*inst); | ||||
|             inst = block.Instructions().erase(inst); | ||||
|         } else { | ||||
|             ++inst; | ||||
|         } | ||||
|     } | ||||
|     for (IR::Inst* const inst : to_invalidate) { | ||||
|  |  | |||
|  | @ -16,9 +16,11 @@ void Invoke(Func&& func, IR::Function& function) { | |||
|     } | ||||
| } | ||||
| 
 | ||||
| void ConstantPropagationPass(IR::Block& block); | ||||
| void DeadCodeEliminationPass(IR::Block& block); | ||||
| void IdentityRemovalPass(IR::Block& block); | ||||
| void GlobalMemoryToStorageBufferPass(IR::Block& block); | ||||
| void IdentityRemovalPass(IR::Function& function); | ||||
| void SsaRewritePass(IR::Function& function); | ||||
| void VerificationPass(const IR::Block& block); | ||||
| void VerificationPass(const IR::Function& function); | ||||
| 
 | ||||
| } // namespace Shader::Optimization
 | ||||
|  |  | |||
|  | @ -14,8 +14,6 @@ | |||
| //      https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6
 | ||||
| //
 | ||||
| 
 | ||||
| #include <map> | ||||
| 
 | ||||
| #include <boost/container/flat_map.hpp> | ||||
| 
 | ||||
| #include "shader_recompiler/frontend/ir/basic_block.h" | ||||
|  | @ -30,6 +28,12 @@ namespace Shader::Optimization { | |||
| namespace { | ||||
| using ValueMap = boost::container::flat_map<IR::Block*, IR::Value, std::less<IR::Block*>>; | ||||
| 
 | ||||
| struct FlagTag {}; | ||||
| struct ZeroFlagTag : FlagTag {}; | ||||
| struct SignFlagTag : FlagTag {}; | ||||
| struct CarryFlagTag : FlagTag {}; | ||||
| struct OverflowFlagTag : FlagTag {}; | ||||
| 
 | ||||
| struct DefTable { | ||||
|     [[nodiscard]] ValueMap& operator[](IR::Reg variable) noexcept { | ||||
|         return regs[IR::RegIndex(variable)]; | ||||
|  | @ -39,8 +43,28 @@ struct DefTable { | |||
|         return preds[IR::PredIndex(variable)]; | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] ValueMap& operator[](ZeroFlagTag) noexcept { | ||||
|         return zero_flag; | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] ValueMap& operator[](SignFlagTag) noexcept { | ||||
|         return sign_flag; | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] ValueMap& operator[](CarryFlagTag) noexcept { | ||||
|         return carry_flag; | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] ValueMap& operator[](OverflowFlagTag) noexcept { | ||||
|         return overflow_flag; | ||||
|     } | ||||
| 
 | ||||
|     std::array<ValueMap, IR::NUM_USER_REGS> regs; | ||||
|     std::array<ValueMap, IR::NUM_USER_PREDS> preds; | ||||
|     ValueMap zero_flag; | ||||
|     ValueMap sign_flag; | ||||
|     ValueMap carry_flag; | ||||
|     ValueMap overflow_flag; | ||||
| }; | ||||
| 
 | ||||
| IR::Opcode UndefOpcode(IR::Reg) noexcept { | ||||
|  | @ -51,6 +75,10 @@ IR::Opcode UndefOpcode(IR::Pred) noexcept { | |||
|     return IR::Opcode::Undef1; | ||||
| } | ||||
| 
 | ||||
| IR::Opcode UndefOpcode(const FlagTag&) noexcept { | ||||
|     return IR::Opcode::Undef1; | ||||
| } | ||||
| 
 | ||||
| [[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept { | ||||
|     return inst.Opcode() == IR::Opcode::Phi; | ||||
| } | ||||
|  | @ -135,6 +163,18 @@ void SsaRewritePass(IR::Function& function) { | |||
|                     pass.WriteVariable(pred, block.get(), inst.Arg(1)); | ||||
|                 } | ||||
|                 break; | ||||
|             case IR::Opcode::SetZFlag: | ||||
|                 pass.WriteVariable(ZeroFlagTag{}, block.get(), inst.Arg(0)); | ||||
|                 break; | ||||
|             case IR::Opcode::SetSFlag: | ||||
|                 pass.WriteVariable(SignFlagTag{}, block.get(), inst.Arg(0)); | ||||
|                 break; | ||||
|             case IR::Opcode::SetCFlag: | ||||
|                 pass.WriteVariable(CarryFlagTag{}, block.get(), inst.Arg(0)); | ||||
|                 break; | ||||
|             case IR::Opcode::SetOFlag: | ||||
|                 pass.WriteVariable(OverflowFlagTag{}, block.get(), inst.Arg(0)); | ||||
|                 break; | ||||
|             case IR::Opcode::GetRegister: | ||||
|                 if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { | ||||
|                     inst.ReplaceUsesWith(pass.ReadVariable(reg, block.get())); | ||||
|  | @ -145,6 +185,18 @@ void SsaRewritePass(IR::Function& function) { | |||
|                     inst.ReplaceUsesWith(pass.ReadVariable(pred, block.get())); | ||||
|                 } | ||||
|                 break; | ||||
|             case IR::Opcode::GetZFlag: | ||||
|                 inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block.get())); | ||||
|                 break; | ||||
|             case IR::Opcode::GetSFlag: | ||||
|                 inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block.get())); | ||||
|                 break; | ||||
|             case IR::Opcode::GetCFlag: | ||||
|                 inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block.get())); | ||||
|                 break; | ||||
|             case IR::Opcode::GetOFlag: | ||||
|                 inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block.get())); | ||||
|                 break; | ||||
|             default: | ||||
|                 break; | ||||
|             } | ||||
|  |  | |||
|  | @ -11,40 +11,44 @@ | |||
| 
 | ||||
| namespace Shader::Optimization { | ||||
| 
 | ||||
| static void ValidateTypes(const IR::Block& block) { | ||||
|     for (const IR::Inst& inst : block) { | ||||
|         const size_t num_args{inst.NumArgs()}; | ||||
|         for (size_t i = 0; i < num_args; ++i) { | ||||
|             const IR::Type t1{inst.Arg(i).Type()}; | ||||
|             const IR::Type t2{IR::ArgTypeOf(inst.Opcode(), i)}; | ||||
|             if (!IR::AreTypesCompatible(t1, t2)) { | ||||
|                 throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(block)); | ||||
| static void ValidateTypes(const IR::Function& function) { | ||||
|     for (const auto& block : function.blocks) { | ||||
|         for (const IR::Inst& inst : *block) { | ||||
|             const size_t num_args{inst.NumArgs()}; | ||||
|             for (size_t i = 0; i < num_args; ++i) { | ||||
|                 const IR::Type t1{inst.Arg(i).Type()}; | ||||
|                 const IR::Type t2{IR::ArgTypeOf(inst.Opcode(), i)}; | ||||
|                 if (!IR::AreTypesCompatible(t1, t2)) { | ||||
|                     throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block)); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static void ValidateUses(const IR::Block& block) { | ||||
| static void ValidateUses(const IR::Function& function) { | ||||
|     std::map<IR::Inst*, int> actual_uses; | ||||
|     for (const IR::Inst& inst : block) { | ||||
|         const size_t num_args{inst.NumArgs()}; | ||||
|         for (size_t i = 0; i < num_args; ++i) { | ||||
|             const IR::Value arg{inst.Arg(i)}; | ||||
|             if (!arg.IsImmediate()) { | ||||
|                 ++actual_uses[arg.Inst()]; | ||||
|     for (const auto& block : function.blocks) { | ||||
|         for (const IR::Inst& inst : *block) { | ||||
|             const size_t num_args{inst.NumArgs()}; | ||||
|             for (size_t i = 0; i < num_args; ++i) { | ||||
|                 const IR::Value arg{inst.Arg(i)}; | ||||
|                 if (!arg.IsImmediate()) { | ||||
|                     ++actual_uses[arg.Inst()]; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     for (const auto [inst, uses] : actual_uses) { | ||||
|         if (inst->UseCount() != uses) { | ||||
|             throw LogicError("Invalid uses in block:\n{}", IR::DumpBlock(block)); | ||||
|             throw LogicError("Invalid uses in block:" /*, IR::DumpFunction(function)*/); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void VerificationPass(const IR::Block& block) { | ||||
|     ValidateTypes(block); | ||||
|     ValidateUses(block); | ||||
| void VerificationPass(const IR::Function& function) { | ||||
|     ValidateTypes(function); | ||||
|     ValidateUses(function); | ||||
| } | ||||
| 
 | ||||
| } // namespace Shader::Optimization
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 ReinUsesLisp
						ReinUsesLisp