From 5aea560b9e73994ffe64d0c621650333e62be7b1 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 30 Aug 2025 05:54:53 +0000 Subject: [PATCH 01/47] [dynarmic] reduce opt pass latency Signed-off-by: lizzie --- src/dynarmic/docs/RegisterAllocator.md | 40 +- .../docs/ReturnStackBufferOptimization.md | 162 +- src/dynarmic/src/dynarmic/CMakeLists.txt | 199 +-- .../backend/arm64/a32_address_space.cpp | 18 +- .../backend/arm64/a64_address_space.cpp | 19 +- .../backend/riscv64/a32_address_space.cpp | 16 +- .../dynarmic/backend/x64/a32_interface.cpp | 16 +- .../dynarmic/backend/x64/a64_interface.cpp | 18 +- .../src/dynarmic/common/memory_pool.cpp | 13 - .../src/dynarmic/common/memory_pool.h | 61 - src/dynarmic/src/dynarmic/ir/basic_block.cpp | 7 +- src/dynarmic/src/dynarmic/ir/basic_block.h | 3 - src/dynarmic/src/dynarmic/ir/ir_emitter.cpp | 21 - .../ir/opt/a32_constant_memory_reads_pass.cpp | 70 - .../ir/opt/a32_get_set_elimination_pass.cpp | 382 ----- .../ir/opt/a64_callback_config_pass.cpp | 57 - .../ir/opt/a64_get_set_elimination_pass.cpp | 165 -- .../ir/opt/a64_merge_interpret_blocks.cpp | 57 - .../ir/opt/constant_propagation_pass.cpp | 559 ------ .../ir/opt/dead_code_elimination_pass.cpp | 23 - .../dynarmic/ir/opt/identity_removal_pass.cpp | 44 - src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h | 127 -- .../src/dynarmic/ir/opt/naming_pass.cpp | 18 - src/dynarmic/src/dynarmic/ir/opt/passes.h | 47 - .../src/dynarmic/ir/opt/polyfill_pass.cpp | 218 --- .../src/dynarmic/ir/opt/verification_pass.cpp | 51 - src/dynarmic/src/dynarmic/ir/opt_passes.cpp | 1502 +++++++++++++++++ src/dynarmic/src/dynarmic/ir/opt_passes.h | 34 + src/dynarmic/tests/A32/fuzz_thumb.cpp | 10 +- src/dynarmic/tests/A64/fuzz_with_unicorn.cpp | 2 +- src/dynarmic/tests/CMakeLists.txt | 146 +- src/dynarmic/tests/print_info.cpp | 2 +- 32 files changed, 1808 insertions(+), 2299 deletions(-) delete mode 100644 src/dynarmic/src/dynarmic/common/memory_pool.cpp delete mode 100644 src/dynarmic/src/dynarmic/common/memory_pool.h delete mode 100644 src/dynarmic/src/dynarmic/ir/ir_emitter.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/passes.h delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp create mode 100644 src/dynarmic/src/dynarmic/ir/opt_passes.cpp create mode 100644 src/dynarmic/src/dynarmic/ir/opt_passes.h diff --git a/src/dynarmic/docs/RegisterAllocator.md b/src/dynarmic/docs/RegisterAllocator.md index fea6f19e6a..5a7210c791 100644 --- a/src/dynarmic/docs/RegisterAllocator.md +++ b/src/dynarmic/docs/RegisterAllocator.md @@ -16,19 +16,31 @@ Note that `Use`ing a value decrements its `use_count` by one. When the `use_coun The member functions on `RegAlloc` are just a combination of the above concepts. +The following registers are reserved for internal use and should NOT participate in register allocation: +- `%xmm0`, `%xmm1`, `%xmm2`: Used as scratch in exclusive memory access. +- `%rsp`: Stack pointer. + +The layout convenes `%r15` as the JIT state pointer - while it may be tempting to turn it into a synthetic pointer, keeping an entire register (out of 12 available) is preferable over inlining a directly computed immediate. + +Do NEVER modify `%r15`, we must make it clear that this register is "immutable" for the entirety of the JIT block duration. + ### `Scratch` - Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr) - Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm) +```c++ +Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr); +Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm); +``` At runtime, allocate one of the registers in `desired_locations`. You are free to modify the register. The register is discarded at the end of the allocation scope. ### Pure `Use` - Xbyak::Reg64 UseGpr(Argument& arg); - Xbyak::Xmm UseXmm(Argument& arg); - OpArg UseOpArg(Argument& arg); - void Use(Argument& arg, HostLoc host_loc); +```c++ +Xbyak::Reg64 UseGpr(Argument& arg); +Xbyak::Xmm UseXmm(Argument& arg); +OpArg UseOpArg(Argument& arg); +void Use(Argument& arg, HostLoc host_loc); +``` At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by which one of the above functions is called. `UseGpr` places it in an unused GPR, `UseXmm` places it @@ -39,9 +51,11 @@ This register **must not** have it's value changed. ### `UseScratch` - Xbyak::Reg64 UseScratchGpr(Argument& arg); - Xbyak::Xmm UseScratchXmm(Argument& arg); - void UseScratch(Argument& arg, HostLoc host_loc); +```c++ +Xbyak::Reg64 UseScratchGpr(Argument& arg); +Xbyak::Xmm UseScratchXmm(Argument& arg); +void UseScratch(Argument& arg, HostLoc host_loc); +``` At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by which one of the above functions is called. `UseScratchGpr` places it in an unused GPR, `UseScratchXmm` places it @@ -55,7 +69,9 @@ You are free to modify the value in the register. The register is discarded at t A `Define` is the defintion of a value. This is the only time when a value may be set. - void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg); +```c++ +void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg); +``` By calling `DefineValue`, you are stating that you wish to define the value for `inst`, and you have written the value to the specified register `reg`. @@ -64,7 +80,9 @@ value to the specified register `reg`. Adding a `Define` to an existing value. - void DefineValue(IR::Inst* inst, Argument& arg); +```c++ +void DefineValue(IR::Inst* inst, Argument& arg); +``` You are declaring that the value for `inst` is the same as the value for `arg`. No host machine instructions are emitted. diff --git a/src/dynarmic/docs/ReturnStackBufferOptimization.md b/src/dynarmic/docs/ReturnStackBufferOptimization.md index 6ffe41bcc6..0e72c3bce8 100644 --- a/src/dynarmic/docs/ReturnStackBufferOptimization.md +++ b/src/dynarmic/docs/ReturnStackBufferOptimization.md @@ -23,15 +23,17 @@ One complication dynarmic has is that a compiled block is not uniquely identifia the PC alone, but bits in the FPSCR and CPSR are also relevant. We resolve this by computing a 64-bit `UniqueHash` that is guaranteed to uniquely identify a block. - u64 LocationDescriptor::UniqueHash() const { - // This value MUST BE UNIQUE. - // This calculation has to match up with EmitX64::EmitTerminalPopRSBHint - u64 pc_u64 = u64(arm_pc) << 32; - u64 fpscr_u64 = u64(fpscr.Value()); - u64 t_u64 = cpsr.T() ? 1 : 0; - u64 e_u64 = cpsr.E() ? 2 : 0; - return pc_u64 | fpscr_u64 | t_u64 | e_u64; - } +```c++ +u64 LocationDescriptor::UniqueHash() const { + // This value MUST BE UNIQUE. + // This calculation has to match up with EmitX64::EmitTerminalPopRSBHint + u64 pc_u64 = u64(arm_pc) << 32; + u64 fpscr_u64 = u64(fpscr.Value()); + u64 t_u64 = cpsr.T() ? 1 : 0; + u64 e_u64 = cpsr.E() ? 2 : 0; + return pc_u64 | fpscr_u64 | t_u64 | e_u64; +} +``` ## Our implementation isn't actually a stack @@ -49,97 +51,107 @@ host addresses for the corresponding the compiled blocks. size of the real RSB in hardware (which has 3 entries). Larger RSBs than 8 showed degraded performance. - struct JitState { - // ... +```c++ +struct JitState { + // ... - static constexpr size_t RSBSize = 8; // MUST be a power of 2. - u32 rsb_ptr = 0; - std::array rsb_location_descriptors; - std::array rsb_codeptrs; - void ResetRSB(); + static constexpr size_t RSBSize = 8; // MUST be a power of 2. + u32 rsb_ptr = 0; + std::array rsb_location_descriptors; + std::array rsb_codeptrs; + void ResetRSB(); - // ... - }; + // ... +}; +``` ### RSB Push We insert our prediction at the insertion point iff the RSB doesn't already contain a prediction with the same `UniqueHash`. - void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) { - using namespace Xbyak::util; +```c++ +void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) { + using namespace Xbyak::util; - ASSERT(inst->GetArg(0).IsImmediate()); - u64 imm64 = inst->GetArg(0).GetU64(); + ASSERT(inst->GetArg(0).IsImmediate()); + u64 imm64 = inst->GetArg(0).GetU64(); - Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX}); - Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr(); - Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32(); - u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end() - ? u64(unique_hash_to_code_ptr[imm64]) - : u64(code->GetReturnFromRunCodeAddress()); + Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX}); + Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr(); + Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32(); + u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end() + ? u64(unique_hash_to_code_ptr[imm64]) + : u64(code->GetReturnFromRunCodeAddress()); - code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]); - code->add(index_reg, 1); - code->and_(index_reg, u32(JitState::RSBSize - 1)); + code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]); + code->add(index_reg, 1); + code->and_(index_reg, u32(JitState::RSBSize - 1)); - code->mov(loc_desc_reg, u64(imm64)); - CodePtr patch_location = code->getCurr(); - patch_unique_hash_locations[imm64].emplace_back(patch_location); - code->mov(code_ptr_reg, u64(code_ptr)); // This line has to match up with EmitX64::Patch. - code->EnsurePatchLocationSize(patch_location, 10); + code->mov(loc_desc_reg, u64(imm64)); + CodePtr patch_location = code->getCurr(); + patch_unique_hash_locations[imm64].emplace_back(patch_location); + code->mov(code_ptr_reg, u64(code_ptr)); // This line has to match up with EmitX64::Patch. + code->EnsurePatchLocationSize(patch_location, 10); - Xbyak::Label label; - for (size_t i = 0; i < JitState::RSBSize; ++i) { - code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); - code->je(label, code->T_SHORT); - } - - code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg); - code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg); - code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg); - code->L(label); + Xbyak::Label label; + for (size_t i = 0; i < JitState::RSBSize; ++i) { + code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); + code->je(label, code->T_SHORT); } + code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg); + code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg); + code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg); + code->L(label); +} +``` + In pseudocode: - for (i := 0 .. RSBSize-1) - if (rsb_location_descriptors[i] == imm64) - goto label; - rsb_ptr++; - rsb_ptr %= RSBSize; - rsb_location_desciptors[rsb_ptr] = imm64; //< The UniqueHash - rsb_codeptr[rsb_ptr] = /* codeptr corresponding to the UniqueHash */; - label: +```c++ + for (i := 0 .. RSBSize-1) + if (rsb_location_descriptors[i] == imm64) + goto label; + rsb_ptr++; + rsb_ptr %= RSBSize; + rsb_location_desciptors[rsb_ptr] = imm64; //< The UniqueHash + rsb_codeptr[rsb_ptr] = /* codeptr corresponding to the UniqueHash */; +label: +``` ## RSB Pop To check if a predicition is in the RSB, we linearly scan the RSB. - void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) { - using namespace Xbyak::util; +```c++ +void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) { + using namespace Xbyak::util; - // This calculation has to match up with IREmitter::PushRSB - code->mov(ecx, MJitStateReg(Arm::Reg::PC)); - code->shl(rcx, 32); - code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]); - code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]); - code->or_(rbx, rcx); + // This calculation has to match up with IREmitter::PushRSB + code->mov(ecx, MJitStateReg(Arm::Reg::PC)); + code->shl(rcx, 32); + code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]); + code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]); + code->or_(rbx, rcx); - code->mov(rax, u64(code->GetReturnFromRunCodeAddress())); - for (size_t i = 0; i < JitState::RSBSize; ++i) { - code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); - code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]); - } - - code->jmp(rax); + code->mov(rax, u64(code->GetReturnFromRunCodeAddress())); + for (size_t i = 0; i < JitState::RSBSize; ++i) { + code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); + code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]); } + code->jmp(rax); +} +``` + In pseudocode: - rbx := ComputeUniqueHash() - rax := ReturnToDispatch - for (i := 0 .. RSBSize-1) - if (rbx == rsb_location_descriptors[i]) - rax = rsb_codeptrs[i] - goto rax \ No newline at end of file +```c++ +rbx := ComputeUniqueHash() +rax := ReturnToDispatch +for (i := 0 .. RSBSize-1) + if (rbx == rsb_location_descriptors[i]) + rax = rsb_codeptrs[i] +goto rax +``` diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index efae44d917..f28a5ec8cb 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -56,8 +56,6 @@ add_library(dynarmic common/lut_from_list.h common/math_util.cpp common/math_util.h - common/memory_pool.cpp - common/memory_pool.h common/safe_ops.h common/spin_lock.h common/string_util.h @@ -78,7 +76,6 @@ add_library(dynarmic ir/basic_block.cpp ir/basic_block.h ir/cond.h - ir/ir_emitter.cpp ir/ir_emitter.h ir/location_descriptor.cpp ir/location_descriptor.h @@ -87,78 +84,59 @@ add_library(dynarmic ir/opcodes.cpp ir/opcodes.h ir/opcodes.inc - ir/opt/constant_propagation_pass.cpp - ir/opt/dead_code_elimination_pass.cpp - ir/opt/identity_removal_pass.cpp - ir/opt/ir_matcher.h - ir/opt/naming_pass.cpp - ir/opt/passes.h - ir/opt/polyfill_pass.cpp - ir/opt/verification_pass.cpp + ir/opt_passes.cpp + ir/opt_passes.h ir/terminal.h ir/type.cpp ir/type.h ir/value.cpp ir/value.h + # A32 + frontend/A32/a32_ir_emitter.cpp + frontend/A32/a32_ir_emitter.h + frontend/A32/a32_location_descriptor.cpp + frontend/A32/a32_location_descriptor.h + frontend/A32/decoder/arm.h + frontend/A32/decoder/arm.inc + frontend/A32/decoder/asimd.h + frontend/A32/decoder/asimd.inc + frontend/A32/decoder/thumb16.h + frontend/A32/decoder/thumb16.inc + frontend/A32/decoder/thumb32.h + frontend/A32/decoder/thumb32.inc + frontend/A32/decoder/vfp.h + frontend/A32/decoder/vfp.inc + frontend/A32/disassembler/disassembler.h + frontend/A32/disassembler/disassembler_arm.cpp + frontend/A32/disassembler/disassembler_thumb.cpp + frontend/A32/FPSCR.h + frontend/A32/ITState.h + frontend/A32/PSR.h + frontend/A32/translate/a32_translate.cpp + frontend/A32/translate/a32_translate.h + frontend/A32/translate/conditional_state.cpp + frontend/A32/translate/conditional_state.h + frontend/A32/translate/translate_arm.cpp + frontend/A32/translate/translate_thumb.cpp + interface/A32/a32.h + interface/A32/arch_version.h + interface/A32/config.h + interface/A32/coprocessor.h + interface/A32/coprocessor_util.h + interface/A32/disassembler.h + # A64 + frontend/A64/a64_ir_emitter.cpp + frontend/A64/a64_ir_emitter.h + frontend/A64/a64_location_descriptor.cpp + frontend/A64/a64_location_descriptor.h + frontend/A64/decoder/a64.h + frontend/A64/decoder/a64.inc + frontend/A64/translate/a64_translate.cpp + frontend/A64/translate/a64_translate.h + interface/A64/a64.h + interface/A64/config.h ) -if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic PRIVATE - frontend/A32/a32_ir_emitter.cpp - frontend/A32/a32_ir_emitter.h - frontend/A32/a32_location_descriptor.cpp - frontend/A32/a32_location_descriptor.h - frontend/A32/decoder/arm.h - frontend/A32/decoder/arm.inc - frontend/A32/decoder/asimd.h - frontend/A32/decoder/asimd.inc - frontend/A32/decoder/thumb16.h - frontend/A32/decoder/thumb16.inc - frontend/A32/decoder/thumb32.h - frontend/A32/decoder/thumb32.inc - frontend/A32/decoder/vfp.h - frontend/A32/decoder/vfp.inc - frontend/A32/disassembler/disassembler.h - frontend/A32/disassembler/disassembler_arm.cpp - frontend/A32/disassembler/disassembler_thumb.cpp - frontend/A32/FPSCR.h - frontend/A32/ITState.h - frontend/A32/PSR.h - frontend/A32/translate/a32_translate.cpp - frontend/A32/translate/a32_translate.h - frontend/A32/translate/conditional_state.cpp - frontend/A32/translate/conditional_state.h - frontend/A32/translate/translate_arm.cpp - frontend/A32/translate/translate_thumb.cpp - interface/A32/a32.h - interface/A32/arch_version.h - interface/A32/config.h - interface/A32/coprocessor.h - interface/A32/coprocessor_util.h - interface/A32/disassembler.h - ir/opt/a32_constant_memory_reads_pass.cpp - ir/opt/a32_get_set_elimination_pass.cpp - ) -endif() - -if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic PRIVATE - frontend/A64/a64_ir_emitter.cpp - frontend/A64/a64_ir_emitter.h - frontend/A64/a64_location_descriptor.cpp - frontend/A64/a64_location_descriptor.h - frontend/A64/decoder/a64.h - frontend/A64/decoder/a64.inc - frontend/A64/translate/a64_translate.cpp - frontend/A64/translate/a64_translate.h - interface/A64/a64.h - interface/A64/config.h - ir/opt/a64_callback_config_pass.cpp - ir/opt/a64_get_set_elimination_pass.cpp - ir/opt/a64_merge_interpret_blocks.cpp - ) -endif() - if ("x86_64" IN_LIST ARCHITECTURE) target_compile_definitions(dynarmic PRIVATE XBYAK_OLD_DISP_CHECK=1) target_link_libraries(dynarmic @@ -211,29 +189,21 @@ if ("x86_64" IN_LIST ARCHITECTURE) common/spin_lock_x64.h common/x64_disassemble.cpp common/x64_disassemble.h + # A32 + backend/x64/a32_emit_x64.cpp + backend/x64/a32_emit_x64.h + backend/x64/a32_emit_x64_memory.cpp + backend/x64/a32_interface.cpp + backend/x64/a32_jitstate.cpp + backend/x64/a32_jitstate.h + # A64 + backend/x64/a64_emit_x64.cpp + backend/x64/a64_emit_x64.h + backend/x64/a64_emit_x64_memory.cpp + backend/x64/a64_interface.cpp + backend/x64/a64_jitstate.cpp + backend/x64/a64_jitstate.h ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "x86_64" - backend/x64/a32_emit_x64.cpp - backend/x64/a32_emit_x64.h - backend/x64/a32_emit_x64_memory.cpp - backend/x64/a32_interface.cpp - backend/x64/a32_jitstate.cpp - backend/x64/a32_jitstate.h - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "x86_64" - backend/x64/a64_emit_x64.cpp - backend/x64/a64_emit_x64.h - backend/x64/a64_emit_x64_memory.cpp - backend/x64/a64_interface.cpp - backend/x64/a64_jitstate.cpp - backend/x64/a64_jitstate.h - ) - endif() endif() if ("arm64" IN_LIST ARCHITECTURE) @@ -277,25 +247,17 @@ if ("arm64" IN_LIST ARCHITECTURE) backend/arm64/verbose_debugging_output.h common/spin_lock_arm64.cpp common/spin_lock_arm64.h + # A32 + backend/arm64/a32_address_space.cpp + backend/arm64/a32_address_space.h + backend/arm64/a32_core.h + backend/arm64/a32_interface.cpp + # A64 + backend/arm64/a64_address_space.cpp + backend/arm64/a64_address_space.h + backend/arm64/a64_core.h + backend/arm64/a64_interface.cpp ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "arm64" - backend/arm64/a32_address_space.cpp - backend/arm64/a32_address_space.h - backend/arm64/a32_core.h - backend/arm64/a32_interface.cpp - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "arm64" - backend/arm64/a64_address_space.cpp - backend/arm64/a64_address_space.h - backend/arm64/a64_core.h - backend/arm64/a64_interface.cpp - ) - endif() endif() if ("riscv" IN_LIST ARCHITECTURE) @@ -324,21 +286,14 @@ if ("riscv" IN_LIST ARCHITECTURE) backend/riscv64/reg_alloc.cpp backend/riscv64/reg_alloc.h backend/riscv64/stack_layout.h + # A32 + backend/riscv64/a32_address_space.cpp + backend/riscv64/a32_address_space.h + backend/riscv64/a32_core.h + backend/riscv64/a32_interface.cpp + backend/riscv64/code_block.h ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic PRIVATE - backend/riscv64/a32_address_space.cpp - backend/riscv64/a32_address_space.h - backend/riscv64/a32_core.h - backend/riscv64/a32_interface.cpp - backend/riscv64/code_block.h - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture") - endif() + message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture") endif() if (WIN32) diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp index bbdf925ff4..8e48fa3687 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp @@ -16,7 +16,7 @@ #include "dynarmic/frontend/A32/translate/a32_translate.h" #include "dynarmic/interface/A32/config.h" #include "dynarmic/interface/exclusive_monitor.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::Backend::Arm64 { @@ -163,21 +163,7 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf) IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); - - Optimization::PolyfillPass(ir_block, {}); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) { - Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true}); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - Optimization::IdentityRemovalPass(ir_block); - Optimization::VerificationPass(ir_block); - + Optimization::Optimize(ir_block, conf, {}); return ir_block; } diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp index d4fe9a9cb7..2b50ad9ea3 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -15,7 +15,7 @@ #include "dynarmic/frontend/A64/translate/a64_translate.h" #include "dynarmic/interface/A64/config.h" #include "dynarmic/interface/exclusive_monitor.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::Backend::Arm64 { @@ -331,22 +331,7 @@ IR::Block A64AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }; IR::Block ir_block = A64::Translate(A64::LocationDescriptor{descriptor}, get_code, {conf.define_unpredictable_behaviour, conf.wall_clock_cntpct}); - - Optimization::A64CallbackConfigPass(ir_block, conf); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) { - Optimization::A64GetSetElimination(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) { - Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks); - } - Optimization::VerificationPass(ir_block); - + Optimization::Optimize(ir_block, conf, {}); return ir_block; } diff --git a/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp b/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp index efa211618b..0bb9591411 100644 --- a/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp @@ -15,7 +15,7 @@ #include "dynarmic/backend/riscv64/stack_layout.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/frontend/A32/translate/a32_translate.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::Backend::RV64 { @@ -28,19 +28,7 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf) IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); - - Optimization::PolyfillPass(ir_block, {}); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) { - Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true}); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - Optimization::VerificationPass(ir_block); - + Optimization::Optimize(ir_block, conf, {}); return ir_block; } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp b/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp index b116ec180e..382eb70f3f 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp @@ -29,7 +29,7 @@ #include "dynarmic/interface/A32/a32.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/location_descriptor.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::A32 { @@ -217,19 +217,7 @@ private: block_of_code.EnsureMemoryCommitted(MINIMUM_REMAINING_CODESIZE); IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); - Optimization::PolyfillPass(ir_block, polyfill_options); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) { - Optimization::A32GetSetElimination(ir_block, {.convert_nz_to_nzc = true}); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - Optimization::IdentityRemovalPass(ir_block); - Optimization::VerificationPass(ir_block); + Optimization::Optimize(ir_block, conf, polyfill_options); return emitter.Emit(ir_block); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp index ddd2327395..c65b582982 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp @@ -25,7 +25,7 @@ #include "dynarmic/frontend/A64/translate/a64_translate.h" #include "dynarmic/interface/A64/a64.h" #include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::A64 { @@ -275,21 +275,7 @@ private: const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }; IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code, {conf.define_unpredictable_behaviour, conf.wall_clock_cntpct}); - Optimization::PolyfillPass(ir_block, polyfill_options); - Optimization::A64CallbackConfigPass(ir_block, conf); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) { - Optimization::A64GetSetElimination(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) { - Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks); - } - Optimization::VerificationPass(ir_block); + Optimization::Optimize(ir_block, conf, polyfill_options); return emitter.Emit(ir_block).entrypoint; } diff --git a/src/dynarmic/src/dynarmic/common/memory_pool.cpp b/src/dynarmic/src/dynarmic/common/memory_pool.cpp deleted file mode 100644 index f41dd92af5..0000000000 --- a/src/dynarmic/src/dynarmic/common/memory_pool.cpp +++ /dev/null @@ -1,13 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/common/memory_pool.h" - -#include - -namespace Dynarmic::Common { - - -} // namespace Dynarmic::Common diff --git a/src/dynarmic/src/dynarmic/common/memory_pool.h b/src/dynarmic/src/dynarmic/common/memory_pool.h deleted file mode 100644 index c99316e107..0000000000 --- a/src/dynarmic/src/dynarmic/common/memory_pool.h +++ /dev/null @@ -1,61 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -#include -#include - -namespace Dynarmic::Common { - -/// @tparam object_size Byte-size of objects to construct -/// @tparam slab_size Number of objects to have per slab -template -class Pool { -public: - inline Pool() noexcept { - AllocateNewSlab(); - } - inline ~Pool() noexcept { - std::free(current_slab); - for (char* slab : slabs) { - std::free(slab); - } - } - - Pool(const Pool&) = delete; - Pool(Pool&&) = delete; - - Pool& operator=(const Pool&) = delete; - Pool& operator=(Pool&&) = delete; - - /// @brief Returns a pointer to an `object_size`-bytes block of memory. - [[nodiscard]] void* Alloc() noexcept { - if (remaining == 0) { - slabs.push_back(current_slab); - AllocateNewSlab(); - } - void* ret = static_cast(current_ptr); - current_ptr += object_size; - remaining--; - return ret; - } -private: - /// @brief Allocates a completely new memory slab. - /// Used when an entirely new slab is needed - /// due the current one running out of usable space. - void AllocateNewSlab() noexcept { - current_slab = static_cast(std::malloc(object_size * slab_size)); - current_ptr = current_slab; - remaining = slab_size; - } - - std::vector slabs; - char* current_slab = nullptr; - char* current_ptr = nullptr; - size_t remaining = 0; -}; - -} // namespace Dynarmic::Common diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.cpp b/src/dynarmic/src/dynarmic/ir/basic_block.cpp index b00ab3cb20..a13a7ebfc9 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.cpp +++ b/src/dynarmic/src/dynarmic/ir/basic_block.cpp @@ -15,8 +15,6 @@ #include #include "dynarmic/common/assert.h" - -#include "dynarmic/common/memory_pool.h" #include "dynarmic/frontend/A32/a32_types.h" #include "dynarmic/frontend/A64/a64_types.h" #include "dynarmic/ir/cond.h" @@ -27,8 +25,7 @@ namespace Dynarmic::IR { Block::Block(const LocationDescriptor& location) : location{location}, end_location{location}, - cond{Cond::AL}, - instruction_alloc_pool{std::make_unique>()} + cond{Cond::AL} { } @@ -40,7 +37,7 @@ Block::Block(const LocationDescriptor& location) /// @param args A sequence of Value instances used as arguments for the instruction. /// @returns Iterator to the newly created instruction. Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list args) noexcept { - IR::Inst* inst = new (instruction_alloc_pool->Alloc()) IR::Inst(opcode); + IR::Inst* inst = new IR::Inst(opcode); DEBUG_ASSERT(args.size() == inst->NumArgs()); std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable { inst->SetArg(index, arg); diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.h b/src/dynarmic/src/dynarmic/ir/basic_block.h index e88dc92fc4..2f2d9ab6de 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.h +++ b/src/dynarmic/src/dynarmic/ir/basic_block.h @@ -21,7 +21,6 @@ #include "dynarmic/ir/terminal.h" #include "dynarmic/ir/value.h" #include "dynarmic/ir/dense_list.h" -#include "dynarmic/common/memory_pool.h" namespace Dynarmic::IR { @@ -174,8 +173,6 @@ private: LocationDescriptor end_location; /// Conditional to pass in order to execute this block Cond cond; - /// Memory pool for instruction list - std::unique_ptr> instruction_alloc_pool; /// Terminal instruction of this block. Terminal terminal = Term::Invalid{}; /// Number of cycles this block takes to execute if the conditional fails. diff --git a/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp b/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp deleted file mode 100644 index a8ef7e2989..0000000000 --- a/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/ir/ir_emitter.h" - -#include - -#include "dynarmic/common/assert.h" -#include - -#include "dynarmic/ir/opcodes.h" - -namespace Dynarmic::IR { - - -} // namespace Dynarmic::IR diff --git a/src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp deleted file mode 100644 index 9699f18345..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp +++ /dev/null @@ -1,70 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/interface/A32/config.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { - for (auto& inst : block) { - switch (inst.GetOpcode()) { - case IR::Opcode::A32ReadMemory8: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u8 value_from_memory = cb->MemoryRead8(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - case IR::Opcode::A32ReadMemory16: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u16 value_from_memory = cb->MemoryRead16(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - case IR::Opcode::A32ReadMemory32: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u32 value_from_memory = cb->MemoryRead32(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - case IR::Opcode::A32ReadMemory64: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u64 value_from_memory = cb->MemoryRead64(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - default: - break; - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp deleted file mode 100644 index 499b38b120..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp +++ /dev/null @@ -1,382 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include -#include -#include - -#include "dynarmic/common/assert.h" -#include "dynarmic/common/common_types.h" - -#include "dynarmic/frontend/A32/a32_ir_emitter.h" -#include "dynarmic/frontend/A32/a32_types.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" -#include "dynarmic/ir/value.h" - -namespace Dynarmic::Optimization { - -namespace { - -void FlagsPass(IR::Block& block) { - using Iterator = std::reverse_iterator; - - struct FlagInfo { - bool set_not_required = false; - bool has_value_request = false; - Iterator value_request = {}; - }; - struct ValuelessFlagInfo { - bool set_not_required = false; - }; - ValuelessFlagInfo nzcvq; - ValuelessFlagInfo nzcv; - ValuelessFlagInfo nz; - FlagInfo c_flag; - FlagInfo ge; - - auto do_set = [&](FlagInfo& info, IR::Value value, Iterator inst) { - if (info.has_value_request) { - info.value_request->ReplaceUsesWith(value); - } - info.has_value_request = false; - - if (info.set_not_required) { - inst->Invalidate(); - } - info.set_not_required = true; - }; - - auto do_set_valueless = [&](ValuelessFlagInfo& info, Iterator inst) { - if (info.set_not_required) { - inst->Invalidate(); - } - info.set_not_required = true; - }; - - auto do_get = [](FlagInfo& info, Iterator inst) { - if (info.has_value_request) { - info.value_request->ReplaceUsesWith(IR::Value{&*inst}); - } - info.has_value_request = true; - info.value_request = inst; - }; - - A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; - - for (auto inst = block.rbegin(); inst != block.rend(); ++inst) { - auto const opcode = inst->GetOpcode(); - switch (opcode) { - case IR::Opcode::A32GetCFlag: { - do_get(c_flag, inst); - break; - } - case IR::Opcode::A32SetCpsrNZCV: { - if (c_flag.has_value_request) { - ir.SetInsertionPointBefore(inst.base()); // base is one ahead - IR::U1 c = ir.GetCFlagFromNZCV(IR::NZCV{inst->GetArg(0)}); - c_flag.value_request->ReplaceUsesWith(c); - c_flag.has_value_request = false; - break; // This case will be executed again because of the above - } - - do_set_valueless(nzcv, inst); - - nz = {.set_not_required = true}; - c_flag = {.set_not_required = true}; - break; - } - case IR::Opcode::A32SetCpsrNZCVRaw: { - if (c_flag.has_value_request) { - nzcv.set_not_required = false; - } - - do_set_valueless(nzcv, inst); - - nzcvq = {}; - nz = {.set_not_required = true}; - c_flag = {.set_not_required = true}; - break; - } - case IR::Opcode::A32SetCpsrNZCVQ: { - if (c_flag.has_value_request) { - nzcvq.set_not_required = false; - } - - do_set_valueless(nzcvq, inst); - - nzcv = {.set_not_required = true}; - nz = {.set_not_required = true}; - c_flag = {.set_not_required = true}; - break; - } - case IR::Opcode::A32SetCpsrNZ: { - do_set_valueless(nz, inst); - - nzcvq = {}; - nzcv = {}; - break; - } - case IR::Opcode::A32SetCpsrNZC: { - if (c_flag.has_value_request) { - c_flag.value_request->ReplaceUsesWith(inst->GetArg(1)); - c_flag.has_value_request = false; - } - - if (!inst->GetArg(1).IsImmediate() && inst->GetArg(1).GetInstRecursive()->GetOpcode() == IR::Opcode::A32GetCFlag) { - const auto nz_value = inst->GetArg(0); - - inst->Invalidate(); - - ir.SetInsertionPointBefore(inst.base()); - ir.SetCpsrNZ(IR::NZCV{nz_value}); - - nzcvq = {}; - nzcv = {}; - nz = {.set_not_required = true}; - break; - } - - if (nz.set_not_required && c_flag.set_not_required) { - inst->Invalidate(); - } else if (nz.set_not_required) { - inst->SetArg(0, IR::Value::EmptyNZCVImmediateMarker()); - } - nz.set_not_required = true; - c_flag.set_not_required = true; - - nzcv = {}; - nzcvq = {}; - break; - } - case IR::Opcode::A32SetGEFlags: { - do_set(ge, inst->GetArg(0), inst); - break; - } - case IR::Opcode::A32GetGEFlags: { - do_get(ge, inst); - break; - } - case IR::Opcode::A32SetGEFlagsCompressed: { - ge = {.set_not_required = true}; - break; - } - case IR::Opcode::A32OrQFlag: { - break; - } - default: { - if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { - nzcvq = {}; - nzcv = {}; - nz = {}; - c_flag = {}; - ge = {}; - } - break; - } - } - } -} - -void RegisterPass(IR::Block& block) { - using Iterator = IR::Block::iterator; - - struct RegInfo { - IR::Value register_value; - std::optional last_set_instruction; - }; - std::array reg_info; - - const auto do_get = [](RegInfo& info, Iterator get_inst) { - if (info.register_value.IsEmpty()) { - info.register_value = IR::Value(&*get_inst); - return; - } - get_inst->ReplaceUsesWith(info.register_value); - }; - - const auto do_set = [](RegInfo& info, IR::Value value, Iterator set_inst) { - if (info.last_set_instruction) { - (*info.last_set_instruction)->Invalidate(); - } - info = { - .register_value = value, - .last_set_instruction = set_inst, - }; - }; - - enum class ExtValueType { - Empty, - Single, - Double, - VectorDouble, - VectorQuad, - }; - struct ExtRegInfo { - ExtValueType value_type = {}; - IR::Value register_value; - std::optional last_set_instruction; - }; - std::array ext_reg_info; - - const auto do_ext_get = [](ExtValueType type, std::initializer_list> infos, Iterator get_inst) { - if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { - for (auto& info : infos) { - info.get() = { - .value_type = type, - .register_value = IR::Value(&*get_inst), - .last_set_instruction = std::nullopt, - }; - } - return; - } - get_inst->ReplaceUsesWith(std::data(infos)[0].get().register_value); - }; - - const auto do_ext_set = [](ExtValueType type, std::initializer_list> infos, IR::Value value, Iterator set_inst) { - if (std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { - if (std::data(infos)[0].get().last_set_instruction) { - (*std::data(infos)[0].get().last_set_instruction)->Invalidate(); - } - } - for (auto& info : infos) { - info.get() = { - .value_type = type, - .register_value = value, - .last_set_instruction = set_inst, - }; - } - }; - - // Location and version don't matter here. - A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; - - for (auto inst = block.begin(); inst != block.end(); ++inst) { - auto const opcode = inst->GetOpcode(); - switch (opcode) { - case IR::Opcode::A32GetRegister: { - const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); - ASSERT(reg != A32::Reg::PC); - const size_t reg_index = static_cast(reg); - do_get(reg_info[reg_index], inst); - break; - } - case IR::Opcode::A32SetRegister: { - const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); - if (reg == A32::Reg::PC) { - break; - } - const auto reg_index = static_cast(reg); - do_set(reg_info[reg_index], inst->GetArg(1), inst); - break; - } - case IR::Opcode::A32GetExtendedRegister32: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_get(ExtValueType::Single, {ext_reg_info[reg_index]}, inst); - break; - } - case IR::Opcode::A32SetExtendedRegister32: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_set(ExtValueType::Single, {ext_reg_info[reg_index]}, inst->GetArg(1), inst); - break; - } - case IR::Opcode::A32GetExtendedRegister64: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_get(ExtValueType::Double, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - inst); - break; - } - case IR::Opcode::A32SetExtendedRegister64: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_set(ExtValueType::Double, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - inst->GetArg(1), - inst); - break; - } - case IR::Opcode::A32GetVector: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - if (A32::IsDoubleExtReg(reg)) { - do_ext_get(ExtValueType::VectorDouble, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - inst); - } else { - DEBUG_ASSERT(A32::IsQuadExtReg(reg)); - do_ext_get(ExtValueType::VectorQuad, - { - ext_reg_info[reg_index * 4 + 0], - ext_reg_info[reg_index * 4 + 1], - ext_reg_info[reg_index * 4 + 2], - ext_reg_info[reg_index * 4 + 3], - }, - inst); - } - break; - } - case IR::Opcode::A32SetVector: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - if (A32::IsDoubleExtReg(reg)) { - ir.SetInsertionPointAfter(inst); - const IR::U128 stored_value = ir.VectorZeroUpper(IR::U128{inst->GetArg(1)}); - do_ext_set(ExtValueType::VectorDouble, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - stored_value, - inst); - } else { - DEBUG_ASSERT(A32::IsQuadExtReg(reg)); - do_ext_set(ExtValueType::VectorQuad, - { - ext_reg_info[reg_index * 4 + 0], - ext_reg_info[reg_index * 4 + 1], - ext_reg_info[reg_index * 4 + 2], - ext_reg_info[reg_index * 4 + 3], - }, - inst->GetArg(1), - inst); - } - break; - } - default: { - if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { - reg_info = {}; - ext_reg_info = {}; - } - break; - } - } - } -} - -} // namespace - -void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) { - FlagsPass(block); - RegisterPass(block); -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp deleted file mode 100644 index 79d9769520..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2018 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/frontend/A64/a64_ir_emitter.h" -#include "dynarmic/interface/A64/config.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) { - if (conf.hook_data_cache_operations) { - return; - } - - for (auto& inst : block) { - if (inst.GetOpcode() != IR::Opcode::A64DataCacheOperationRaised) { - continue; - } - - const auto op = static_cast(inst.GetArg(1).GetU64()); - if (op == A64::DataCacheOperation::ZeroByVA) { - A64::IREmitter ir{block}; - ir.current_location = A64::LocationDescriptor{IR::LocationDescriptor{inst.GetArg(0).GetU64()}}; - ir.SetInsertionPointBefore(&inst); - - size_t bytes = 4 << static_cast(conf.dczid_el0 & 0b1111); - IR::U64 addr{inst.GetArg(2)}; - - const IR::U128 zero_u128 = ir.ZeroExtendToQuad(ir.Imm64(0)); - while (bytes >= 16) { - ir.WriteMemory128(addr, zero_u128, IR::AccType::DCZVA); - addr = ir.Add(addr, ir.Imm64(16)); - bytes -= 16; - } - - while (bytes >= 8) { - ir.WriteMemory64(addr, ir.Imm64(0), IR::AccType::DCZVA); - addr = ir.Add(addr, ir.Imm64(8)); - bytes -= 8; - } - - while (bytes >= 4) { - ir.WriteMemory32(addr, ir.Imm32(0), IR::AccType::DCZVA); - addr = ir.Add(addr, ir.Imm64(4)); - bytes -= 4; - } - } - inst.Invalidate(); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp deleted file mode 100644 index 53e3b27176..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp +++ /dev/null @@ -1,165 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/common/common_types.h" - -#include "dynarmic/frontend/A64/a64_types.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" -#include "dynarmic/ir/value.h" - -namespace Dynarmic::Optimization { - -void A64GetSetElimination(IR::Block& block) { - using Iterator = IR::Block::iterator; - - enum class TrackingType { - W, - X, - S, - D, - Q, - SP, - NZCV, - NZCVRaw, - }; - struct RegisterInfo { - IR::Value register_value; - TrackingType tracking_type; - bool set_instruction_present = false; - Iterator last_set_instruction; - }; - std::array reg_info; - std::array vec_info; - RegisterInfo sp_info; - RegisterInfo nzcv_info; - - const auto do_set = [&block](RegisterInfo& info, IR::Value value, Iterator set_inst, TrackingType tracking_type) { - if (info.set_instruction_present) { - info.last_set_instruction->Invalidate(); - block.Instructions().erase(info.last_set_instruction); - } - - info.register_value = value; - info.tracking_type = tracking_type; - info.set_instruction_present = true; - info.last_set_instruction = set_inst; - }; - - const auto do_get = [](RegisterInfo& info, Iterator get_inst, TrackingType tracking_type) { - const auto do_nothing = [&] { - info = {}; - info.register_value = IR::Value(&*get_inst); - info.tracking_type = tracking_type; - }; - - if (info.register_value.IsEmpty()) { - do_nothing(); - return; - } - - if (info.tracking_type == tracking_type) { - get_inst->ReplaceUsesWith(info.register_value); - return; - } - - do_nothing(); - }; - - for (auto inst = block.begin(); inst != block.end(); ++inst) { - auto const opcode = inst->GetOpcode(); - switch (opcode) { - case IR::Opcode::A64GetW: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_get(reg_info.at(index), inst, TrackingType::W); - break; - } - case IR::Opcode::A64GetX: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_get(reg_info.at(index), inst, TrackingType::X); - break; - } - case IR::Opcode::A64GetS: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_get(vec_info.at(index), inst, TrackingType::S); - break; - } - case IR::Opcode::A64GetD: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_get(vec_info.at(index), inst, TrackingType::D); - break; - } - case IR::Opcode::A64GetQ: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_get(vec_info.at(index), inst, TrackingType::Q); - break; - } - case IR::Opcode::A64GetSP: { - do_get(sp_info, inst, TrackingType::SP); - break; - } - case IR::Opcode::A64GetNZCVRaw: { - do_get(nzcv_info, inst, TrackingType::NZCVRaw); - break; - } - case IR::Opcode::A64SetW: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::W); - break; - } - case IR::Opcode::A64SetX: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::X); - break; - } - case IR::Opcode::A64SetS: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::S); - break; - } - case IR::Opcode::A64SetD: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::D); - break; - } - case IR::Opcode::A64SetQ: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::Q); - break; - } - case IR::Opcode::A64SetSP: { - do_set(sp_info, inst->GetArg(0), inst, TrackingType::SP); - break; - } - case IR::Opcode::A64SetNZCV: { - do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCV); - break; - } - case IR::Opcode::A64SetNZCVRaw: { - do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCVRaw); - break; - } - default: { - if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { - nzcv_info = {}; - } - if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { - reg_info = {}; - vec_info = {}; - sp_info = {}; - } - break; - } - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp b/src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp deleted file mode 100644 index 25b7ef0ff1..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp +++ /dev/null @@ -1,57 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2018 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include -#include "dynarmic/common/common_types.h" - -#include "dynarmic/frontend/A64/a64_location_descriptor.h" -#include "dynarmic/frontend/A64/translate/a64_translate.h" -#include "dynarmic/interface/A64/config.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb) { - const auto is_interpret_instruction = [cb](A64::LocationDescriptor location) { - const auto instruction = cb->MemoryReadCode(location.PC()); - if (!instruction) - return false; - - IR::Block new_block{location}; - A64::TranslateSingleInstruction(new_block, location, *instruction); - - if (!new_block.Instructions().empty()) - return false; - - const IR::Terminal terminal = new_block.GetTerminal(); - if (auto term = boost::get(&terminal)) { - return term->next == location; - } - - return false; - }; - - IR::Terminal terminal = block.GetTerminal(); - auto term = boost::get(&terminal); - if (!term) - return; - - A64::LocationDescriptor location{term->next}; - size_t num_instructions = 1; - - while (is_interpret_instruction(location.AdvancePC(static_cast(num_instructions * 4)))) { - num_instructions++; - } - - term->num_instructions = num_instructions; - block.ReplaceTerminal(terminal); - block.CycleCount() += num_instructions - 1; -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp deleted file mode 100644 index 86ebca87d2..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp +++ /dev/null @@ -1,559 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/common/assert.h" -#include -#include -#include "dynarmic/common/common_types.h" - -#include "dynarmic/common/safe_ops.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/ir_emitter.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -using Op = Dynarmic::IR::Opcode; - -namespace { - -// Tiny helper to avoid the need to store based off the opcode -// bit size all over the place within folding functions. -void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) { - if (is_32_bit) { - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); - } else { - inst.ReplaceUsesWith(IR::Value{value}); - } -} - -IR::Value Value(bool is_32_bit, u64 value) { - return is_32_bit ? IR::Value{static_cast(value)} : IR::Value{value}; -} - -template -bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) { - const auto lhs = inst.GetArg(0); - const auto rhs = inst.GetArg(1); - - const bool is_lhs_immediate = lhs.IsImmediate(); - const bool is_rhs_immediate = rhs.IsImmediate(); - - if (is_lhs_immediate && is_rhs_immediate) { - const u64 result = imm_fn(lhs.GetImmediateAsU64(), rhs.GetImmediateAsU64()); - ReplaceUsesWith(inst, is_32_bit, result); - return false; - } - - if (is_lhs_immediate && !is_rhs_immediate) { - const IR::Inst* rhs_inst = rhs.GetInstRecursive(); - if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->GetArg(1).IsImmediate()) { - const u64 combined = imm_fn(lhs.GetImmediateAsU64(), rhs_inst->GetArg(1).GetImmediateAsU64()); - inst.SetArg(0, rhs_inst->GetArg(0)); - inst.SetArg(1, Value(is_32_bit, combined)); - } else { - // Normalize - inst.SetArg(0, rhs); - inst.SetArg(1, lhs); - } - } - - if (!is_lhs_immediate && is_rhs_immediate) { - const IR::Inst* lhs_inst = lhs.GetInstRecursive(); - if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate()) { - const u64 combined = imm_fn(rhs.GetImmediateAsU64(), lhs_inst->GetArg(1).GetImmediateAsU64()); - inst.SetArg(0, lhs_inst->GetArg(0)); - inst.SetArg(1, Value(is_32_bit, combined)); - } - } - - return true; -} - -void FoldAdd(IR::Inst& inst, bool is_32_bit) { - const auto lhs = inst.GetArg(0); - const auto rhs = inst.GetArg(1); - const auto carry = inst.GetArg(2); - - if (lhs.IsImmediate() && !rhs.IsImmediate()) { - // Normalize - inst.SetArg(0, rhs); - inst.SetArg(1, lhs); - FoldAdd(inst, is_32_bit); - return; - } - - if (inst.HasAssociatedPseudoOperation()) { - return; - } - - if (!lhs.IsImmediate() && rhs.IsImmediate()) { - const IR::Inst* lhs_inst = lhs.GetInstRecursive(); - if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate() && lhs_inst->GetArg(2).IsImmediate()) { - const u64 combined = rhs.GetImmediateAsU64() + lhs_inst->GetArg(1).GetImmediateAsU64() + lhs_inst->GetArg(2).GetU1(); - if (combined == 0) { - inst.ReplaceUsesWith(lhs_inst->GetArg(0)); - return; - } - inst.SetArg(0, lhs_inst->GetArg(0)); - inst.SetArg(1, Value(is_32_bit, combined)); - return; - } - if (rhs.IsZero() && carry.IsZero()) { - inst.ReplaceUsesWith(lhs); - return; - } - } - - if (inst.AreAllArgsImmediates()) { - const u64 result = lhs.GetImmediateAsU64() + rhs.GetImmediateAsU64() + carry.GetU1(); - ReplaceUsesWith(inst, is_32_bit, result); - return; - } -} - -/// Folds AND operations based on the following: -/// -/// 1. imm_x & imm_y -> result -/// 2. x & 0 -> 0 -/// 3. 0 & y -> 0 -/// 4. x & y -> y (where x has all bits set to 1) -/// 5. x & y -> x (where y has all bits set to 1) -/// -void FoldAND(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a & b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - ReplaceUsesWith(inst, is_32_bit, 0); - } else if (rhs.HasAllBitsSet()) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -/// Folds byte reversal opcodes based on the following: -/// -/// 1. imm -> swap(imm) -/// -void FoldByteReverse(IR::Inst& inst, Op op) { - const auto operand = inst.GetArg(0); - - if (!operand.IsImmediate()) { - return; - } - - if (op == Op::ByteReverseWord) { - const u32 result = mcl::bit::swap_bytes_32(static_cast(operand.GetImmediateAsU64())); - inst.ReplaceUsesWith(IR::Value{result}); - } else if (op == Op::ByteReverseHalf) { - const u16 result = mcl::bit::swap_bytes_16(static_cast(operand.GetImmediateAsU64())); - inst.ReplaceUsesWith(IR::Value{result}); - } else { - const u64 result = mcl::bit::swap_bytes_64(operand.GetImmediateAsU64()); - inst.ReplaceUsesWith(IR::Value{result}); - } -} - -/// Folds division operations based on the following: -/// -/// 1. x / 0 -> 0 (NOTE: This is an ARM-specific behavior defined in the architecture reference manual) -/// 2. imm_x / imm_y -> result -/// 3. x / 1 -> x -/// -void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) { - const auto rhs = inst.GetArg(1); - - if (rhs.IsZero()) { - ReplaceUsesWith(inst, is_32_bit, 0); - return; - } - - const auto lhs = inst.GetArg(0); - if (lhs.IsImmediate() && rhs.IsImmediate()) { - if (is_signed) { - const s64 result = lhs.GetImmediateAsS64() / rhs.GetImmediateAsS64(); - ReplaceUsesWith(inst, is_32_bit, static_cast(result)); - } else { - const u64 result = lhs.GetImmediateAsU64() / rhs.GetImmediateAsU64(); - ReplaceUsesWith(inst, is_32_bit, result); - } - } else if (rhs.IsUnsignedImmediate(1)) { - inst.ReplaceUsesWith(IR::Value{lhs}); - } -} - -// Folds EOR operations based on the following: -// -// 1. imm_x ^ imm_y -> result -// 2. x ^ 0 -> x -// 3. 0 ^ y -> y -// -void FoldEOR(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a ^ b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -void FoldLeastSignificantByte(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); -} - -void FoldLeastSignificantHalf(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); -} - -void FoldLeastSignificantWord(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); -} - -void FoldMostSignificantBit(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{(operand.GetImmediateAsU64() >> 31) != 0}); -} - -void FoldMostSignificantWord(IR::Inst& inst) { - IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); - - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - if (carry_inst) { - carry_inst->ReplaceUsesWith(IR::Value{mcl::bit::get_bit<31>(operand.GetImmediateAsU64())}); - } - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64() >> 32)}); -} - -// Folds multiplication operations based on the following: -// -// 1. imm_x * imm_y -> result -// 2. x * 0 -> 0 -// 3. 0 * y -> 0 -// 4. x * 1 -> x -// 5. 1 * y -> y -// -void FoldMultiply(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a * b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - ReplaceUsesWith(inst, is_32_bit, 0); - } else if (rhs.IsUnsignedImmediate(1)) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -// Folds NOT operations if the contained value is an immediate. -void FoldNOT(IR::Inst& inst, bool is_32_bit) { - const auto operand = inst.GetArg(0); - - if (!operand.IsImmediate()) { - return; - } - - const u64 result = ~operand.GetImmediateAsU64(); - ReplaceUsesWith(inst, is_32_bit, result); -} - -// Folds OR operations based on the following: -// -// 1. imm_x | imm_y -> result -// 2. x | 0 -> x -// 3. 0 | y -> y -// -void FoldOR(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a | b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -bool FoldShifts(IR::Inst& inst) { - IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); - - // The 32-bit variants can contain 3 arguments, while the - // 64-bit variants only contain 2. - if (inst.NumArgs() == 3 && !carry_inst) { - inst.SetArg(2, IR::Value(false)); - } - - const auto shift_amount = inst.GetArg(1); - - if (shift_amount.IsZero()) { - if (carry_inst) { - carry_inst->ReplaceUsesWith(inst.GetArg(2)); - } - inst.ReplaceUsesWith(inst.GetArg(0)); - return false; - } - - if (inst.NumArgs() == 3 && shift_amount.IsImmediate() && !shift_amount.IsZero()) { - inst.SetArg(2, IR::Value(false)); - } - - if (!inst.AreAllArgsImmediates() || carry_inst) { - return false; - } - - return true; -} - -void FoldSignExtendXToWord(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const s64 value = inst.GetArg(0).GetImmediateAsS64(); - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); -} - -void FoldSignExtendXToLong(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const s64 value = inst.GetArg(0).GetImmediateAsS64(); - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); -} - -void FoldSub(IR::Inst& inst, bool is_32_bit) { - if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { - return; - } - - const auto lhs = inst.GetArg(0); - const auto rhs = inst.GetArg(1); - const auto carry = inst.GetArg(2); - - const u64 result = lhs.GetImmediateAsU64() + (~rhs.GetImmediateAsU64()) + carry.GetU1(); - ReplaceUsesWith(inst, is_32_bit, result); -} - -void FoldZeroExtendXToWord(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const u64 value = inst.GetArg(0).GetImmediateAsU64(); - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); -} - -void FoldZeroExtendXToLong(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const u64 value = inst.GetArg(0).GetImmediateAsU64(); - inst.ReplaceUsesWith(IR::Value{value}); -} -} // Anonymous namespace - -void ConstantPropagation(IR::Block& block) { - for (auto& inst : block) { - const auto opcode = inst.GetOpcode(); - - switch (opcode) { - case Op::LeastSignificantWord: - FoldLeastSignificantWord(inst); - break; - case Op::MostSignificantWord: - FoldMostSignificantWord(inst); - break; - case Op::LeastSignificantHalf: - FoldLeastSignificantHalf(inst); - break; - case Op::LeastSignificantByte: - FoldLeastSignificantByte(inst); - break; - case Op::MostSignificantBit: - FoldMostSignificantBit(inst); - break; - case Op::IsZero32: - if (inst.AreAllArgsImmediates()) { - inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU32() == 0}); - } - break; - case Op::IsZero64: - if (inst.AreAllArgsImmediates()) { - inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU64() == 0}); - } - break; - case Op::LogicalShiftLeft32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, Safe::LogicalShiftLeft(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftLeft64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, Safe::LogicalShiftLeft(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftRight32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, Safe::LogicalShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftRight64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, Safe::LogicalShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::ArithmeticShiftRight32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::ArithmeticShiftRight64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::RotateRight32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::RotateRight64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftLeftMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() << (inst.GetArg(1).GetU32() & 0x1f)); - } - break; - case Op::LogicalShiftLeftMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() << (inst.GetArg(1).GetU64() & 0x3f)); - } - break; - case Op::LogicalShiftRightMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() >> (inst.GetArg(1).GetU32() & 0x1f)); - } - break; - case Op::LogicalShiftRightMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() >> (inst.GetArg(1).GetU64() & 0x3f)); - } - break; - case Op::ArithmeticShiftRightMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, static_cast(inst.GetArg(0).GetU32()) >> (inst.GetArg(1).GetU32() & 0x1f)); - } - break; - case Op::ArithmeticShiftRightMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, static_cast(inst.GetArg(0).GetU64()) >> (inst.GetArg(1).GetU64() & 0x3f)); - } - break; - case Op::RotateRightMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU32())); - } - break; - case Op::RotateRightMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU64())); - } - break; - case Op::Add32: - case Op::Add64: - FoldAdd(inst, opcode == Op::Add32); - break; - case Op::Sub32: - case Op::Sub64: - FoldSub(inst, opcode == Op::Sub32); - break; - case Op::Mul32: - case Op::Mul64: - FoldMultiply(inst, opcode == Op::Mul32); - break; - case Op::SignedDiv32: - case Op::SignedDiv64: - FoldDivide(inst, opcode == Op::SignedDiv32, true); - break; - case Op::UnsignedDiv32: - case Op::UnsignedDiv64: - FoldDivide(inst, opcode == Op::UnsignedDiv32, false); - break; - case Op::And32: - case Op::And64: - FoldAND(inst, opcode == Op::And32); - break; - case Op::Eor32: - case Op::Eor64: - FoldEOR(inst, opcode == Op::Eor32); - break; - case Op::Or32: - case Op::Or64: - FoldOR(inst, opcode == Op::Or32); - break; - case Op::Not32: - case Op::Not64: - FoldNOT(inst, opcode == Op::Not32); - break; - case Op::SignExtendByteToWord: - case Op::SignExtendHalfToWord: - FoldSignExtendXToWord(inst); - break; - case Op::SignExtendByteToLong: - case Op::SignExtendHalfToLong: - case Op::SignExtendWordToLong: - FoldSignExtendXToLong(inst); - break; - case Op::ZeroExtendByteToWord: - case Op::ZeroExtendHalfToWord: - FoldZeroExtendXToWord(inst); - break; - case Op::ZeroExtendByteToLong: - case Op::ZeroExtendHalfToLong: - case Op::ZeroExtendWordToLong: - FoldZeroExtendXToLong(inst); - break; - case Op::ByteReverseWord: - case Op::ByteReverseHalf: - case Op::ByteReverseDual: - FoldByteReverse(inst, opcode); - break; - default: - break; - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp deleted file mode 100644 index bda9f6efd1..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp +++ /dev/null @@ -1,23 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void DeadCodeElimination(IR::Block& block) { - // We iterate over the instructions in reverse order. - // This is because removing an instruction reduces the number of uses for earlier instructions. - for (auto& inst : mcl::iterator::reverse(block)) { - if (!inst.HasUses() && !MayHaveSideEffects(inst.GetOpcode())) { - inst.Invalidate(); - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp deleted file mode 100644 index e87fcc335b..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp +++ /dev/null @@ -1,44 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2020 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void IdentityRemovalPass(IR::Block& block) { - std::vector to_invalidate; - - auto iter = block.begin(); - while (iter != block.end()) { - IR::Inst& inst = *iter; - - const size_t num_args = inst.NumArgs(); - for (size_t i = 0; i < num_args; i++) { - while (true) { - IR::Value arg = inst.GetArg(i); - if (!arg.IsIdentity()) - break; - inst.SetArg(i, arg.GetInst()->GetArg(0)); - } - } - - if (inst.GetOpcode() == IR::Opcode::Identity || inst.GetOpcode() == IR::Opcode::Void) { - iter = block.Instructions().erase(inst); - to_invalidate.push_back(&inst); - } else { - ++iter; - } - } - - for (IR::Inst* inst : to_invalidate) { - inst->Invalidate(); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h b/src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h deleted file mode 100644 index 5eb1a55100..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h +++ /dev/null @@ -1,127 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2020 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/value.h" - -namespace Dynarmic::Optimization::IRMatcher { - -struct CaptureValue { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - return std::tuple(value); - } -}; - -struct CaptureInst { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - if (value.IsImmediate()) - return std::nullopt; - return std::tuple(value.GetInstRecursive()); - } -}; - -struct CaptureUImm { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - return std::tuple(value.GetImmediateAsU64()); - } -}; - -struct CaptureSImm { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - return std::tuple(value.GetImmediateAsS64()); - } -}; - -template -struct UImm { - using ReturnType = std::tuple<>; - - static std::optional> Match(IR::Value value) { - if (value.GetImmediateAsU64() == Value) - return std::tuple(); - return std::nullopt; - } -}; - -template -struct SImm { - using ReturnType = std::tuple<>; - - static std::optional> Match(IR::Value value) { - if (value.GetImmediateAsS64() == Value) - return std::tuple(); - return std::nullopt; - } -}; - -template -struct Inst { -public: - using ReturnType = mp::concat, typename Args::ReturnType...>; - - static std::optional Match(const IR::Inst& inst) { - if (inst.GetOpcode() != Opcode) - return std::nullopt; - if (inst.HasAssociatedPseudoOperation()) - return std::nullopt; - return MatchArgs<0>(inst); - } - - static std::optional Match(IR::Value value) { - if (value.IsImmediate()) - return std::nullopt; - return Match(*value.GetInstRecursive()); - } - -private: - template - static auto MatchArgs(const IR::Inst& inst) -> std::optional>, std::tuple<>>>> { - if constexpr (I >= sizeof...(Args)) { - return std::tuple(); - } else { - using Arg = mp::get>; - - if (const auto arg = Arg::Match(inst.GetArg(I))) { - if (const auto rest = MatchArgs(inst)) { - return std::tuple_cat(*arg, *rest); - } - } - - return std::nullopt; - } - } -}; - -inline bool IsSameInst(std::tuple t) { - return std::get<0>(t) == std::get<1>(t); -} - -inline bool IsSameInst(std::tuple t) { - return std::get<0>(t) == std::get<1>(t) && std::get<0>(t) == std::get<2>(t); -} - -} // namespace Dynarmic::Optimization::IRMatcher diff --git a/src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp deleted file mode 100644 index a766bdc83f..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp +++ /dev/null @@ -1,18 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2023 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/microinstruction.h" - -namespace Dynarmic::Optimization { - -void NamingPass(IR::Block& block) { - unsigned name = 1; - for (auto& inst : block) { - inst.SetName(name++); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/passes.h b/src/dynarmic/src/dynarmic/ir/opt/passes.h deleted file mode 100644 index 703145b556..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/passes.h +++ /dev/null @@ -1,47 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -namespace Dynarmic::A32 { -struct UserCallbacks; -} - -namespace Dynarmic::A64 { -struct UserCallbacks; -struct UserConfig; -} // namespace Dynarmic::A64 - -namespace Dynarmic::IR { -class Block; -} - -namespace Dynarmic::Optimization { - -struct PolyfillOptions { - bool sha256 = false; - bool vector_multiply_widen = false; - - bool operator==(const PolyfillOptions&) const = default; -}; - -struct A32GetSetEliminationOptions { - bool convert_nzc_to_nz = false; - bool convert_nz_to_nzc = false; -}; - -void PolyfillPass(IR::Block& block, const PolyfillOptions& opt); -void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb); -void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions opt); -void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf); -void A64GetSetElimination(IR::Block& block); -void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb); -void ConstantPropagation(IR::Block& block); -void DeadCodeElimination(IR::Block& block); -void IdentityRemovalPass(IR::Block& block); -void VerificationPass(const IR::Block& block); -void NamingPass(IR::Block& block); - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp deleted file mode 100644 index 1aa3aea91e..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp +++ /dev/null @@ -1,218 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2022 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/ir_emitter.h" -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -namespace { - -void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) { - const IR::U128 x = (IR::U128)inst.GetArg(0); - const IR::U128 y = (IR::U128)inst.GetArg(1); - - const IR::U128 t = ir.VectorExtract(x, y, 32); - - IR::U128 result = ir.ZeroVector(); - for (size_t i = 0; i < 4; i++) { - const IR::U32 modified_element = [&] { - const IR::U32 element = ir.VectorGetElement(32, t, i); - const IR::U32 tmp1 = ir.RotateRight(element, ir.Imm8(7)); - const IR::U32 tmp2 = ir.RotateRight(element, ir.Imm8(18)); - const IR::U32 tmp3 = ir.LogicalShiftRight(element, ir.Imm8(3)); - - return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); - }(); - - result = ir.VectorSetElement(32, result, i, modified_element); - } - result = ir.VectorAdd(32, result, x); - - inst.ReplaceUsesWith(result); -} - -void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) { - const IR::U128 x = (IR::U128)inst.GetArg(0); - const IR::U128 y = (IR::U128)inst.GetArg(1); - const IR::U128 z = (IR::U128)inst.GetArg(2); - - const IR::U128 T0 = ir.VectorExtract(y, z, 32); - - const IR::U128 lower_half = [&] { - const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64); - const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17); - const IR::U128 tmp2 = ir.VectorRotateRight(32, T, 19); - const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, T, 10); - const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); - const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, x, T0)); - return ir.VectorZeroUpper(tmp5); - }(); - - const IR::U64 upper_half = [&] { - const IR::U128 tmp1 = ir.VectorRotateRight(32, lower_half, 17); - const IR::U128 tmp2 = ir.VectorRotateRight(32, lower_half, 19); - const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, lower_half, 10); - const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); - - // Shuffle the top two 32-bit elements downwards [3, 2, 1, 0] -> [1, 0, 3, 2] - const IR::U128 shuffled_d = ir.VectorRotateWholeVectorRight(x, 64); - const IR::U128 shuffled_T0 = ir.VectorRotateWholeVectorRight(T0, 64); - - const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, shuffled_d, shuffled_T0)); - return ir.VectorGetElement(64, tmp5, 0); - }(); - - const IR::U128 result = ir.VectorSetElement(64, lower_half, 1, upper_half); - - inst.ReplaceUsesWith(result); -} - -IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { - return ir.Eor(ir.And(ir.Eor(y, z), x), z); -} - -IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { - return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z)); -} - -IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) { - const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2)); - const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13)); - const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22)); - - return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); -} - -IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) { - const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6)); - const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11)); - const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25)); - - return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); -} - -void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) { - IR::U128 x = (IR::U128)inst.GetArg(0); - IR::U128 y = (IR::U128)inst.GetArg(1); - const IR::U128 w = (IR::U128)inst.GetArg(2); - const bool part1 = inst.GetArg(3).GetU1(); - - for (size_t i = 0; i < 4; i++) { - const IR::U32 low_x = ir.VectorGetElement(32, x, 0); - const IR::U32 after_low_x = ir.VectorGetElement(32, x, 1); - const IR::U32 before_high_x = ir.VectorGetElement(32, x, 2); - const IR::U32 high_x = ir.VectorGetElement(32, x, 3); - - const IR::U32 low_y = ir.VectorGetElement(32, y, 0); - const IR::U32 after_low_y = ir.VectorGetElement(32, y, 1); - const IR::U32 before_high_y = ir.VectorGetElement(32, y, 2); - const IR::U32 high_y = ir.VectorGetElement(32, y, 3); - - const IR::U32 choice = SHAchoose(ir, low_y, after_low_y, before_high_y); - const IR::U32 majority = SHAmajority(ir, low_x, after_low_x, before_high_x); - - const IR::U32 t = [&] { - const IR::U32 w_element = ir.VectorGetElement(32, w, i); - const IR::U32 sig = SHAhashSIGMA1(ir, low_y); - - return ir.Add(high_y, ir.Add(sig, ir.Add(choice, w_element))); - }(); - - const IR::U32 new_low_x = ir.Add(t, ir.Add(SHAhashSIGMA0(ir, low_x), majority)); - const IR::U32 new_low_y = ir.Add(t, high_x); - - // Shuffle all words left by 1 element: [3, 2, 1, 0] -> [2, 1, 0, 3] - const IR::U128 shuffled_x = ir.VectorRotateWholeVectorRight(x, 96); - const IR::U128 shuffled_y = ir.VectorRotateWholeVectorRight(y, 96); - - x = ir.VectorSetElement(32, shuffled_x, 0, new_low_x); - y = ir.VectorSetElement(32, shuffled_y, 0, new_low_y); - } - - inst.ReplaceUsesWith(part1 ? x : y); -} - -template -void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) { - IR::U128 n = (IR::U128)inst.GetArg(0); - IR::U128 m = (IR::U128)inst.GetArg(1); - - const IR::U128 wide_n = is_signed ? ir.VectorSignExtend(esize, n) : ir.VectorZeroExtend(esize, n); - const IR::U128 wide_m = is_signed ? ir.VectorSignExtend(esize, m) : ir.VectorZeroExtend(esize, m); - - const IR::U128 result = ir.VectorMultiply(esize * 2, wide_n, wide_m); - - inst.ReplaceUsesWith(result); -} - -} // namespace - -void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) { - if (polyfill == PolyfillOptions{}) { - return; - } - - IR::IREmitter ir{block}; - - for (auto& inst : block) { - ir.SetInsertionPointBefore(&inst); - - switch (inst.GetOpcode()) { - case IR::Opcode::SHA256MessageSchedule0: - if (polyfill.sha256) { - PolyfillSHA256MessageSchedule0(ir, inst); - } - break; - case IR::Opcode::SHA256MessageSchedule1: - if (polyfill.sha256) { - PolyfillSHA256MessageSchedule1(ir, inst); - } - break; - case IR::Opcode::SHA256Hash: - if (polyfill.sha256) { - PolyfillSHA256Hash(ir, inst); - } - break; - case IR::Opcode::VectorMultiplySignedWiden8: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<8, true>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplySignedWiden16: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<16, true>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplySignedWiden32: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<32, true>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplyUnsignedWiden8: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<8, false>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplyUnsignedWiden16: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<16, false>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplyUnsignedWiden32: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<32, false>(ir, inst); - } - break; - default: - break; - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp deleted file mode 100644 index c6c2cff231..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp +++ /dev/null @@ -1,51 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include -#include - -#include "dynarmic/common/assert.h" -#include "dynarmic/common/common_types.h" -#include - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" -#include "dynarmic/ir/type.h" - -namespace Dynarmic::Optimization { - -void VerificationPass(const IR::Block& block) { - for (const auto& inst : block) { - for (size_t i = 0; i < inst.NumArgs(); i++) { - const IR::Type t1 = inst.GetArg(i).GetType(); - const IR::Type t2 = IR::GetArgTypeOf(inst.GetOpcode(), i); - if (!IR::AreTypesCompatible(t1, t2)) { - std::puts(IR::DumpBlock(block).c_str()); - ASSERT_FALSE("above block failed validation"); - } - } - } - - ankerl::unordered_dense::map actual_uses; - for (const auto& inst : block) { - for (size_t i = 0; i < inst.NumArgs(); i++) { - const auto arg = inst.GetArg(i); - if (!arg.IsImmediate()) { - actual_uses[arg.GetInst()]++; - } - } - } - - for (const auto& pair : actual_uses) { - ASSERT(pair.first->UseCount() == pair.second); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp new file mode 100644 index 0000000000..383b915839 --- /dev/null +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp @@ -0,0 +1,1502 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#include +#include + +#include +#include "boost/container/small_vector.hpp" +#include "dynarmic/frontend/A32/a32_ir_emitter.h" +#include "dynarmic/frontend/A32/a32_location_descriptor.h" +#include "dynarmic/frontend/A32/a32_types.h" +#include "dynarmic/frontend/A64/a64_ir_emitter.h" +#include "dynarmic/frontend/A64/a64_location_descriptor.h" +#include "dynarmic/frontend/A64/translate/a64_translate.h" +#include "dynarmic/interface/A32/config.h" +#include "dynarmic/interface/A64/config.h" +#include "dynarmic/interface/optimization_flags.h" +#include "dynarmic/common/safe_ops.h" +#include "dynarmic/ir/basic_block.h" +#include "dynarmic/ir/microinstruction.h" +#include "dynarmic/ir/opcodes.h" +#include "dynarmic/ir/opt_passes.h" +#include "dynarmic/ir/type.h" +#include "mcl/bit/swap.hpp" +#include "mcl/bit/rotate.hpp" +#include "mcl/iterator/reverse.hpp" + +namespace Dynarmic::Optimization { + +static void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { + for (auto& inst : block) { + switch (inst.GetOpcode()) { + case IR::Opcode::A32ReadMemory8: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u8 value_from_memory = cb->MemoryRead8(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + case IR::Opcode::A32ReadMemory16: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u16 value_from_memory = cb->MemoryRead16(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + case IR::Opcode::A32ReadMemory32: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u32 value_from_memory = cb->MemoryRead32(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + case IR::Opcode::A32ReadMemory64: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u64 value_from_memory = cb->MemoryRead64(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + default: + break; + } + } +} + +static void FlagsPass(IR::Block& block) { + using Iterator = std::reverse_iterator; + + struct FlagInfo { + bool set_not_required = false; + bool has_value_request = false; + Iterator value_request = {}; + }; + struct ValuelessFlagInfo { + bool set_not_required = false; + }; + ValuelessFlagInfo nzcvq; + ValuelessFlagInfo nzcv; + ValuelessFlagInfo nz; + FlagInfo c_flag; + FlagInfo ge; + + auto do_set = [&](FlagInfo& info, IR::Value value, Iterator inst) { + if (info.has_value_request) { + info.value_request->ReplaceUsesWith(value); + } + info.has_value_request = false; + + if (info.set_not_required) { + inst->Invalidate(); + } + info.set_not_required = true; + }; + + auto do_set_valueless = [&](ValuelessFlagInfo& info, Iterator inst) { + if (info.set_not_required) { + inst->Invalidate(); + } + info.set_not_required = true; + }; + + auto do_get = [](FlagInfo& info, Iterator inst) { + if (info.has_value_request) { + info.value_request->ReplaceUsesWith(IR::Value{&*inst}); + } + info.has_value_request = true; + info.value_request = inst; + }; + + A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; + + for (auto inst = block.rbegin(); inst != block.rend(); ++inst) { + auto const opcode = inst->GetOpcode(); + switch (opcode) { + case IR::Opcode::A32GetCFlag: { + do_get(c_flag, inst); + break; + } + case IR::Opcode::A32SetCpsrNZCV: { + if (c_flag.has_value_request) { + ir.SetInsertionPointBefore(inst.base()); // base is one ahead + IR::U1 c = ir.GetCFlagFromNZCV(IR::NZCV{inst->GetArg(0)}); + c_flag.value_request->ReplaceUsesWith(c); + c_flag.has_value_request = false; + break; // This case will be executed again because of the above + } + + do_set_valueless(nzcv, inst); + + nz = {.set_not_required = true}; + c_flag = {.set_not_required = true}; + break; + } + case IR::Opcode::A32SetCpsrNZCVRaw: { + if (c_flag.has_value_request) { + nzcv.set_not_required = false; + } + + do_set_valueless(nzcv, inst); + + nzcvq = {}; + nz = {.set_not_required = true}; + c_flag = {.set_not_required = true}; + break; + } + case IR::Opcode::A32SetCpsrNZCVQ: { + if (c_flag.has_value_request) { + nzcvq.set_not_required = false; + } + + do_set_valueless(nzcvq, inst); + + nzcv = {.set_not_required = true}; + nz = {.set_not_required = true}; + c_flag = {.set_not_required = true}; + break; + } + case IR::Opcode::A32SetCpsrNZ: { + do_set_valueless(nz, inst); + + nzcvq = {}; + nzcv = {}; + break; + } + case IR::Opcode::A32SetCpsrNZC: { + if (c_flag.has_value_request) { + c_flag.value_request->ReplaceUsesWith(inst->GetArg(1)); + c_flag.has_value_request = false; + } + + if (!inst->GetArg(1).IsImmediate() && inst->GetArg(1).GetInstRecursive()->GetOpcode() == IR::Opcode::A32GetCFlag) { + const auto nz_value = inst->GetArg(0); + + inst->Invalidate(); + + ir.SetInsertionPointBefore(inst.base()); + ir.SetCpsrNZ(IR::NZCV{nz_value}); + + nzcvq = {}; + nzcv = {}; + nz = {.set_not_required = true}; + break; + } + + if (nz.set_not_required && c_flag.set_not_required) { + inst->Invalidate(); + } else if (nz.set_not_required) { + inst->SetArg(0, IR::Value::EmptyNZCVImmediateMarker()); + } + nz.set_not_required = true; + c_flag.set_not_required = true; + + nzcv = {}; + nzcvq = {}; + break; + } + case IR::Opcode::A32SetGEFlags: { + do_set(ge, inst->GetArg(0), inst); + break; + } + case IR::Opcode::A32GetGEFlags: { + do_get(ge, inst); + break; + } + case IR::Opcode::A32SetGEFlagsCompressed: { + ge = {.set_not_required = true}; + break; + } + case IR::Opcode::A32OrQFlag: { + break; + } + default: { + if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { + nzcvq = {}; + nzcv = {}; + nz = {}; + c_flag = {}; + ge = {}; + } + break; + } + } + } +} + +static void RegisterPass(IR::Block& block) { + using Iterator = IR::Block::iterator; + + struct RegInfo { + IR::Value register_value; + std::optional last_set_instruction; + }; + std::array reg_info; + + const auto do_get = [](RegInfo& info, Iterator get_inst) { + if (info.register_value.IsEmpty()) { + info.register_value = IR::Value(&*get_inst); + return; + } + get_inst->ReplaceUsesWith(info.register_value); + }; + + const auto do_set = [](RegInfo& info, IR::Value value, Iterator set_inst) { + if (info.last_set_instruction) { + (*info.last_set_instruction)->Invalidate(); + } + info = { + .register_value = value, + .last_set_instruction = set_inst, + }; + }; + + enum class ExtValueType { + Empty, + Single, + Double, + VectorDouble, + VectorQuad, + }; + struct ExtRegInfo { + ExtValueType value_type = {}; + IR::Value register_value; + std::optional last_set_instruction; + }; + std::array ext_reg_info; + + const auto do_ext_get = [](ExtValueType type, std::initializer_list> infos, Iterator get_inst) { + if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { + for (auto& info : infos) { + info.get() = { + .value_type = type, + .register_value = IR::Value(&*get_inst), + .last_set_instruction = std::nullopt, + }; + } + return; + } + get_inst->ReplaceUsesWith(std::data(infos)[0].get().register_value); + }; + + const auto do_ext_set = [](ExtValueType type, std::initializer_list> infos, IR::Value value, Iterator set_inst) { + if (std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { + if (std::data(infos)[0].get().last_set_instruction) { + (*std::data(infos)[0].get().last_set_instruction)->Invalidate(); + } + } + for (auto& info : infos) { + info.get() = { + .value_type = type, + .register_value = value, + .last_set_instruction = set_inst, + }; + } + }; + + // Location and version don't matter here. + A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; + + for (auto inst = block.begin(); inst != block.end(); ++inst) { + auto const opcode = inst->GetOpcode(); + switch (opcode) { + case IR::Opcode::A32GetRegister: { + const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); + ASSERT(reg != A32::Reg::PC); + const size_t reg_index = size_t(reg); + do_get(reg_info[reg_index], inst); + break; + } + case IR::Opcode::A32SetRegister: { + const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); + if (reg == A32::Reg::PC) { + break; + } + const auto reg_index = size_t(reg); + do_set(reg_info[reg_index], inst->GetArg(1), inst); + break; + } + case IR::Opcode::A32GetExtendedRegister32: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_get(ExtValueType::Single, {ext_reg_info[reg_index]}, inst); + break; + } + case IR::Opcode::A32SetExtendedRegister32: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_set(ExtValueType::Single, {ext_reg_info[reg_index]}, inst->GetArg(1), inst); + break; + } + case IR::Opcode::A32GetExtendedRegister64: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_get(ExtValueType::Double, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + inst); + break; + } + case IR::Opcode::A32SetExtendedRegister64: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_set(ExtValueType::Double, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + inst->GetArg(1), + inst); + break; + } + case IR::Opcode::A32GetVector: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + if (A32::IsDoubleExtReg(reg)) { + do_ext_get(ExtValueType::VectorDouble, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + inst); + } else { + DEBUG_ASSERT(A32::IsQuadExtReg(reg)); + do_ext_get(ExtValueType::VectorQuad, + { + ext_reg_info[reg_index * 4 + 0], + ext_reg_info[reg_index * 4 + 1], + ext_reg_info[reg_index * 4 + 2], + ext_reg_info[reg_index * 4 + 3], + }, + inst); + } + break; + } + case IR::Opcode::A32SetVector: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + if (A32::IsDoubleExtReg(reg)) { + ir.SetInsertionPointAfter(inst); + const IR::U128 stored_value = ir.VectorZeroUpper(IR::U128{inst->GetArg(1)}); + do_ext_set(ExtValueType::VectorDouble, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + stored_value, + inst); + } else { + DEBUG_ASSERT(A32::IsQuadExtReg(reg)); + do_ext_set(ExtValueType::VectorQuad, + { + ext_reg_info[reg_index * 4 + 0], + ext_reg_info[reg_index * 4 + 1], + ext_reg_info[reg_index * 4 + 2], + ext_reg_info[reg_index * 4 + 3], + }, + inst->GetArg(1), + inst); + } + break; + } + default: { + if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { + reg_info = {}; + ext_reg_info = {}; + } + break; + } + } + } +} + +struct A32GetSetEliminationOptions { + bool convert_nzc_to_nz = false; + bool convert_nz_to_nzc = false; +}; + +static void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) { + FlagsPass(block); + RegisterPass(block); +} + +static void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) { + if (conf.hook_data_cache_operations) { + return; + } + + for (auto& inst : block) { + if (inst.GetOpcode() != IR::Opcode::A64DataCacheOperationRaised) { + continue; + } + + const auto op = static_cast(inst.GetArg(1).GetU64()); + if (op == A64::DataCacheOperation::ZeroByVA) { + A64::IREmitter ir{block}; + ir.current_location = A64::LocationDescriptor{IR::LocationDescriptor{inst.GetArg(0).GetU64()}}; + ir.SetInsertionPointBefore(&inst); + + size_t bytes = 4 << static_cast(conf.dczid_el0 & 0b1111); + IR::U64 addr{inst.GetArg(2)}; + + const IR::U128 zero_u128 = ir.ZeroExtendToQuad(ir.Imm64(0)); + while (bytes >= 16) { + ir.WriteMemory128(addr, zero_u128, IR::AccType::DCZVA); + addr = ir.Add(addr, ir.Imm64(16)); + bytes -= 16; + } + + while (bytes >= 8) { + ir.WriteMemory64(addr, ir.Imm64(0), IR::AccType::DCZVA); + addr = ir.Add(addr, ir.Imm64(8)); + bytes -= 8; + } + + while (bytes >= 4) { + ir.WriteMemory32(addr, ir.Imm32(0), IR::AccType::DCZVA); + addr = ir.Add(addr, ir.Imm64(4)); + bytes -= 4; + } + } + inst.Invalidate(); + } +} + +static void A64GetSetElimination(IR::Block& block) { + using Iterator = IR::Block::iterator; + + enum class TrackingType { + W, + X, + S, + D, + Q, + SP, + NZCV, + NZCVRaw, + }; + struct RegisterInfo { + IR::Value register_value; + TrackingType tracking_type; + bool set_instruction_present = false; + Iterator last_set_instruction; + }; + std::array reg_info; + std::array vec_info; + RegisterInfo sp_info; + RegisterInfo nzcv_info; + + const auto do_set = [&block](RegisterInfo& info, IR::Value value, Iterator set_inst, TrackingType tracking_type) { + if (info.set_instruction_present) { + info.last_set_instruction->Invalidate(); + block.Instructions().erase(info.last_set_instruction); + } + + info.register_value = value; + info.tracking_type = tracking_type; + info.set_instruction_present = true; + info.last_set_instruction = set_inst; + }; + + const auto do_get = [](RegisterInfo& info, Iterator get_inst, TrackingType tracking_type) { + const auto do_nothing = [&] { + info = {}; + info.register_value = IR::Value(&*get_inst); + info.tracking_type = tracking_type; + }; + + if (info.register_value.IsEmpty()) { + do_nothing(); + return; + } + + if (info.tracking_type == tracking_type) { + get_inst->ReplaceUsesWith(info.register_value); + return; + } + + do_nothing(); + }; + + for (auto inst = block.begin(); inst != block.end(); ++inst) { + auto const opcode = inst->GetOpcode(); + switch (opcode) { + case IR::Opcode::A64GetW: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_get(reg_info.at(index), inst, TrackingType::W); + break; + } + case IR::Opcode::A64GetX: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_get(reg_info.at(index), inst, TrackingType::X); + break; + } + case IR::Opcode::A64GetS: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_get(vec_info.at(index), inst, TrackingType::S); + break; + } + case IR::Opcode::A64GetD: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_get(vec_info.at(index), inst, TrackingType::D); + break; + } + case IR::Opcode::A64GetQ: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_get(vec_info.at(index), inst, TrackingType::Q); + break; + } + case IR::Opcode::A64GetSP: { + do_get(sp_info, inst, TrackingType::SP); + break; + } + case IR::Opcode::A64GetNZCVRaw: { + do_get(nzcv_info, inst, TrackingType::NZCVRaw); + break; + } + case IR::Opcode::A64SetW: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::W); + break; + } + case IR::Opcode::A64SetX: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::X); + break; + } + case IR::Opcode::A64SetS: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::S); + break; + } + case IR::Opcode::A64SetD: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::D); + break; + } + case IR::Opcode::A64SetQ: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::Q); + break; + } + case IR::Opcode::A64SetSP: { + do_set(sp_info, inst->GetArg(0), inst, TrackingType::SP); + break; + } + case IR::Opcode::A64SetNZCV: { + do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCV); + break; + } + case IR::Opcode::A64SetNZCVRaw: { + do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCVRaw); + break; + } + default: { + if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { + nzcv_info = {}; + } + if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { + reg_info = {}; + vec_info = {}; + sp_info = {}; + } + break; + } + } + } +} + +static void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb) { + const auto is_interpret_instruction = [cb](A64::LocationDescriptor location) { + const auto instruction = cb->MemoryReadCode(location.PC()); + if (!instruction) + return false; + + IR::Block new_block{location}; + A64::TranslateSingleInstruction(new_block, location, *instruction); + + if (!new_block.Instructions().empty()) + return false; + + const IR::Terminal terminal = new_block.GetTerminal(); + if (auto term = boost::get(&terminal)) { + return term->next == location; + } + + return false; + }; + + IR::Terminal terminal = block.GetTerminal(); + auto term = boost::get(&terminal); + if (!term) + return; + + A64::LocationDescriptor location{term->next}; + size_t num_instructions = 1; + + while (is_interpret_instruction(location.AdvancePC(static_cast(num_instructions * 4)))) { + num_instructions++; + } + + term->num_instructions = num_instructions; + block.ReplaceTerminal(terminal); + block.CycleCount() += num_instructions - 1; +} + +using Op = Dynarmic::IR::Opcode; + +// Tiny helper to avoid the need to store based off the opcode +// bit size all over the place within folding functions. +static void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) { + if (is_32_bit) { + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); + } else { + inst.ReplaceUsesWith(IR::Value{value}); + } +} + +static IR::Value Value(bool is_32_bit, u64 value) { + return is_32_bit ? IR::Value{static_cast(value)} : IR::Value{value}; +} + +template +static bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) { + const auto lhs = inst.GetArg(0); + const auto rhs = inst.GetArg(1); + + const bool is_lhs_immediate = lhs.IsImmediate(); + const bool is_rhs_immediate = rhs.IsImmediate(); + + if (is_lhs_immediate && is_rhs_immediate) { + const u64 result = imm_fn(lhs.GetImmediateAsU64(), rhs.GetImmediateAsU64()); + ReplaceUsesWith(inst, is_32_bit, result); + return false; + } + + if (is_lhs_immediate && !is_rhs_immediate) { + const IR::Inst* rhs_inst = rhs.GetInstRecursive(); + if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->GetArg(1).IsImmediate()) { + const u64 combined = imm_fn(lhs.GetImmediateAsU64(), rhs_inst->GetArg(1).GetImmediateAsU64()); + inst.SetArg(0, rhs_inst->GetArg(0)); + inst.SetArg(1, Value(is_32_bit, combined)); + } else { + // Normalize + inst.SetArg(0, rhs); + inst.SetArg(1, lhs); + } + } + + if (!is_lhs_immediate && is_rhs_immediate) { + const IR::Inst* lhs_inst = lhs.GetInstRecursive(); + if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate()) { + const u64 combined = imm_fn(rhs.GetImmediateAsU64(), lhs_inst->GetArg(1).GetImmediateAsU64()); + inst.SetArg(0, lhs_inst->GetArg(0)); + inst.SetArg(1, Value(is_32_bit, combined)); + } + } + + return true; +} + +static void FoldAdd(IR::Inst& inst, bool is_32_bit) { + const auto lhs = inst.GetArg(0); + const auto rhs = inst.GetArg(1); + const auto carry = inst.GetArg(2); + + if (lhs.IsImmediate() && !rhs.IsImmediate()) { + // Normalize + inst.SetArg(0, rhs); + inst.SetArg(1, lhs); + FoldAdd(inst, is_32_bit); + return; + } + + if (inst.HasAssociatedPseudoOperation()) { + return; + } + + if (!lhs.IsImmediate() && rhs.IsImmediate()) { + const IR::Inst* lhs_inst = lhs.GetInstRecursive(); + if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate() && lhs_inst->GetArg(2).IsImmediate()) { + const u64 combined = rhs.GetImmediateAsU64() + lhs_inst->GetArg(1).GetImmediateAsU64() + lhs_inst->GetArg(2).GetU1(); + if (combined == 0) { + inst.ReplaceUsesWith(lhs_inst->GetArg(0)); + return; + } + inst.SetArg(0, lhs_inst->GetArg(0)); + inst.SetArg(1, Value(is_32_bit, combined)); + return; + } + if (rhs.IsZero() && carry.IsZero()) { + inst.ReplaceUsesWith(lhs); + return; + } + } + + if (inst.AreAllArgsImmediates()) { + const u64 result = lhs.GetImmediateAsU64() + rhs.GetImmediateAsU64() + carry.GetU1(); + ReplaceUsesWith(inst, is_32_bit, result); + return; + } +} + +/// Folds AND operations based on the following: +/// +/// 1. imm_x & imm_y -> result +/// 2. x & 0 -> 0 +/// 3. 0 & y -> 0 +/// 4. x & y -> y (where x has all bits set to 1) +/// 5. x & y -> x (where y has all bits set to 1) +/// +static void FoldAND(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a & b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + ReplaceUsesWith(inst, is_32_bit, 0); + } else if (rhs.HasAllBitsSet()) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +/// Folds byte reversal opcodes based on the following: +/// +/// 1. imm -> swap(imm) +/// +static void FoldByteReverse(IR::Inst& inst, Op op) { + const auto operand = inst.GetArg(0); + + if (!operand.IsImmediate()) { + return; + } + + if (op == Op::ByteReverseWord) { + const u32 result = mcl::bit::swap_bytes_32(u32(operand.GetImmediateAsU64())); + inst.ReplaceUsesWith(IR::Value{result}); + } else if (op == Op::ByteReverseHalf) { + const u16 result = mcl::bit::swap_bytes_16(u16(operand.GetImmediateAsU64())); + inst.ReplaceUsesWith(IR::Value{result}); + } else { + const u64 result = mcl::bit::swap_bytes_64(operand.GetImmediateAsU64()); + inst.ReplaceUsesWith(IR::Value{result}); + } +} + +/// Folds division operations based on the following: +/// +/// 1. x / 0 -> 0 (NOTE: This is an ARM-specific behavior defined in the architecture reference manual) +/// 2. imm_x / imm_y -> result +/// 3. x / 1 -> x +/// +static void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) { + const auto rhs = inst.GetArg(1); + + if (rhs.IsZero()) { + ReplaceUsesWith(inst, is_32_bit, 0); + return; + } + + const auto lhs = inst.GetArg(0); + if (lhs.IsImmediate() && rhs.IsImmediate()) { + if (is_signed) { + const s64 result = lhs.GetImmediateAsS64() / rhs.GetImmediateAsS64(); + ReplaceUsesWith(inst, is_32_bit, static_cast(result)); + } else { + const u64 result = lhs.GetImmediateAsU64() / rhs.GetImmediateAsU64(); + ReplaceUsesWith(inst, is_32_bit, result); + } + } else if (rhs.IsUnsignedImmediate(1)) { + inst.ReplaceUsesWith(IR::Value{lhs}); + } +} + +// Folds EOR operations based on the following: +// +// 1. imm_x ^ imm_y -> result +// 2. x ^ 0 -> x +// 3. 0 ^ y -> y +// +static void FoldEOR(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a ^ b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +static void FoldLeastSignificantByte(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); +} + +static void FoldLeastSignificantHalf(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); +} + +static void FoldLeastSignificantWord(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); +} + +static void FoldMostSignificantBit(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{(operand.GetImmediateAsU64() >> 31) != 0}); +} + +static void FoldMostSignificantWord(IR::Inst& inst) { + IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); + + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + if (carry_inst) { + carry_inst->ReplaceUsesWith(IR::Value{mcl::bit::get_bit<31>(operand.GetImmediateAsU64())}); + } + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64() >> 32)}); +} + +// Folds multiplication operations based on the following: +// +// 1. imm_x * imm_y -> result +// 2. x * 0 -> 0 +// 3. 0 * y -> 0 +// 4. x * 1 -> x +// 5. 1 * y -> y +// +static void FoldMultiply(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a * b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + ReplaceUsesWith(inst, is_32_bit, 0); + } else if (rhs.IsUnsignedImmediate(1)) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +// Folds NOT operations if the contained value is an immediate. +static void FoldNOT(IR::Inst& inst, bool is_32_bit) { + const auto operand = inst.GetArg(0); + + if (!operand.IsImmediate()) { + return; + } + + const u64 result = ~operand.GetImmediateAsU64(); + ReplaceUsesWith(inst, is_32_bit, result); +} + +// Folds OR operations based on the following: +// +// 1. imm_x | imm_y -> result +// 2. x | 0 -> x +// 3. 0 | y -> y +// +static void FoldOR(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a | b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +static bool FoldShifts(IR::Inst& inst) { + IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); + + // The 32-bit variants can contain 3 arguments, while the + // 64-bit variants only contain 2. + if (inst.NumArgs() == 3 && !carry_inst) { + inst.SetArg(2, IR::Value(false)); + } + + const auto shift_amount = inst.GetArg(1); + + if (shift_amount.IsZero()) { + if (carry_inst) { + carry_inst->ReplaceUsesWith(inst.GetArg(2)); + } + inst.ReplaceUsesWith(inst.GetArg(0)); + return false; + } + + if (inst.NumArgs() == 3 && shift_amount.IsImmediate() && !shift_amount.IsZero()) { + inst.SetArg(2, IR::Value(false)); + } + + if (!inst.AreAllArgsImmediates() || carry_inst) { + return false; + } + + return true; +} + +static void FoldSignExtendXToWord(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const s64 value = inst.GetArg(0).GetImmediateAsS64(); + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); +} + +static void FoldSignExtendXToLong(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const s64 value = inst.GetArg(0).GetImmediateAsS64(); + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); +} + +static void FoldSub(IR::Inst& inst, bool is_32_bit) { + if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { + return; + } + + const auto lhs = inst.GetArg(0); + const auto rhs = inst.GetArg(1); + const auto carry = inst.GetArg(2); + + const u64 result = lhs.GetImmediateAsU64() + (~rhs.GetImmediateAsU64()) + carry.GetU1(); + ReplaceUsesWith(inst, is_32_bit, result); +} + +static void FoldZeroExtendXToWord(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const u64 value = inst.GetArg(0).GetImmediateAsU64(); + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); +} + +static void FoldZeroExtendXToLong(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const u64 value = inst.GetArg(0).GetImmediateAsU64(); + inst.ReplaceUsesWith(IR::Value{value}); +} + +static void ConstantPropagation(IR::Block& block) { + for (auto& inst : block) { + const auto opcode = inst.GetOpcode(); + + switch (opcode) { + case Op::LeastSignificantWord: + FoldLeastSignificantWord(inst); + break; + case Op::MostSignificantWord: + FoldMostSignificantWord(inst); + break; + case Op::LeastSignificantHalf: + FoldLeastSignificantHalf(inst); + break; + case Op::LeastSignificantByte: + FoldLeastSignificantByte(inst); + break; + case Op::MostSignificantBit: + FoldMostSignificantBit(inst); + break; + case Op::IsZero32: + if (inst.AreAllArgsImmediates()) { + inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU32() == 0}); + } + break; + case Op::IsZero64: + if (inst.AreAllArgsImmediates()) { + inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU64() == 0}); + } + break; + case Op::LogicalShiftLeft32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, Safe::LogicalShiftLeft(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftLeft64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, Safe::LogicalShiftLeft(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftRight32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, Safe::LogicalShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftRight64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, Safe::LogicalShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::ArithmeticShiftRight32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::ArithmeticShiftRight64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::RotateRight32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::RotateRight64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftLeftMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() << (inst.GetArg(1).GetU32() & 0x1f)); + } + break; + case Op::LogicalShiftLeftMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() << (inst.GetArg(1).GetU64() & 0x3f)); + } + break; + case Op::LogicalShiftRightMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() >> (inst.GetArg(1).GetU32() & 0x1f)); + } + break; + case Op::LogicalShiftRightMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() >> (inst.GetArg(1).GetU64() & 0x3f)); + } + break; + case Op::ArithmeticShiftRightMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, static_cast(inst.GetArg(0).GetU32()) >> (inst.GetArg(1).GetU32() & 0x1f)); + } + break; + case Op::ArithmeticShiftRightMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, static_cast(inst.GetArg(0).GetU64()) >> (inst.GetArg(1).GetU64() & 0x3f)); + } + break; + case Op::RotateRightMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU32())); + } + break; + case Op::RotateRightMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU64())); + } + break; + case Op::Add32: + case Op::Add64: + FoldAdd(inst, opcode == Op::Add32); + break; + case Op::Sub32: + case Op::Sub64: + FoldSub(inst, opcode == Op::Sub32); + break; + case Op::Mul32: + case Op::Mul64: + FoldMultiply(inst, opcode == Op::Mul32); + break; + case Op::SignedDiv32: + case Op::SignedDiv64: + FoldDivide(inst, opcode == Op::SignedDiv32, true); + break; + case Op::UnsignedDiv32: + case Op::UnsignedDiv64: + FoldDivide(inst, opcode == Op::UnsignedDiv32, false); + break; + case Op::And32: + case Op::And64: + FoldAND(inst, opcode == Op::And32); + break; + case Op::Eor32: + case Op::Eor64: + FoldEOR(inst, opcode == Op::Eor32); + break; + case Op::Or32: + case Op::Or64: + FoldOR(inst, opcode == Op::Or32); + break; + case Op::Not32: + case Op::Not64: + FoldNOT(inst, opcode == Op::Not32); + break; + case Op::SignExtendByteToWord: + case Op::SignExtendHalfToWord: + FoldSignExtendXToWord(inst); + break; + case Op::SignExtendByteToLong: + case Op::SignExtendHalfToLong: + case Op::SignExtendWordToLong: + FoldSignExtendXToLong(inst); + break; + case Op::ZeroExtendByteToWord: + case Op::ZeroExtendHalfToWord: + FoldZeroExtendXToWord(inst); + break; + case Op::ZeroExtendByteToLong: + case Op::ZeroExtendHalfToLong: + case Op::ZeroExtendWordToLong: + FoldZeroExtendXToLong(inst); + break; + case Op::ByteReverseWord: + case Op::ByteReverseHalf: + case Op::ByteReverseDual: + FoldByteReverse(inst, opcode); + break; + default: + break; + } + } +} + +static void DeadCodeElimination(IR::Block& block) { + // We iterate over the instructions in reverse order. + // This is because removing an instruction reduces the number of uses for earlier instructions. + for (auto& inst : mcl::iterator::reverse(block)) { + if (!inst.HasUses() && !MayHaveSideEffects(inst.GetOpcode())) { + inst.Invalidate(); + } + } +} + +static void IdentityRemovalPass(IR::Block& block) { + boost::container::small_vector to_invalidate; + + auto iter = block.begin(); + while (iter != block.end()) { + IR::Inst& inst = *iter; + + const size_t num_args = inst.NumArgs(); + for (size_t i = 0; i < num_args; i++) { + while (true) { + IR::Value arg = inst.GetArg(i); + if (!arg.IsIdentity()) + break; + inst.SetArg(i, arg.GetInst()->GetArg(0)); + } + } + + if (inst.GetOpcode() == IR::Opcode::Identity || inst.GetOpcode() == IR::Opcode::Void) { + iter = block.Instructions().erase(inst); + to_invalidate.push_back(&inst); + } else { + ++iter; + } + } + for (IR::Inst* inst : to_invalidate) { + inst->Invalidate(); + } +} + +static void NamingPass(IR::Block& block) { + u32 name = 1; + for (auto& inst : block) + inst.SetName(name++); +} + +static void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) { + const IR::U128 x = (IR::U128)inst.GetArg(0); + const IR::U128 y = (IR::U128)inst.GetArg(1); + + const IR::U128 t = ir.VectorExtract(x, y, 32); + + IR::U128 result = ir.ZeroVector(); + for (size_t i = 0; i < 4; i++) { + const IR::U32 modified_element = [&] { + const IR::U32 element = ir.VectorGetElement(32, t, i); + const IR::U32 tmp1 = ir.RotateRight(element, ir.Imm8(7)); + const IR::U32 tmp2 = ir.RotateRight(element, ir.Imm8(18)); + const IR::U32 tmp3 = ir.LogicalShiftRight(element, ir.Imm8(3)); + + return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); + }(); + + result = ir.VectorSetElement(32, result, i, modified_element); + } + result = ir.VectorAdd(32, result, x); + + inst.ReplaceUsesWith(result); +} + +static void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) { + const IR::U128 x = (IR::U128)inst.GetArg(0); + const IR::U128 y = (IR::U128)inst.GetArg(1); + const IR::U128 z = (IR::U128)inst.GetArg(2); + + const IR::U128 T0 = ir.VectorExtract(y, z, 32); + + const IR::U128 lower_half = [&] { + const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64); + const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17); + const IR::U128 tmp2 = ir.VectorRotateRight(32, T, 19); + const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, T, 10); + const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); + const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, x, T0)); + return ir.VectorZeroUpper(tmp5); + }(); + + const IR::U64 upper_half = [&] { + const IR::U128 tmp1 = ir.VectorRotateRight(32, lower_half, 17); + const IR::U128 tmp2 = ir.VectorRotateRight(32, lower_half, 19); + const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, lower_half, 10); + const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); + + // Shuffle the top two 32-bit elements downwards [3, 2, 1, 0] -> [1, 0, 3, 2] + const IR::U128 shuffled_d = ir.VectorRotateWholeVectorRight(x, 64); + const IR::U128 shuffled_T0 = ir.VectorRotateWholeVectorRight(T0, 64); + + const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, shuffled_d, shuffled_T0)); + return ir.VectorGetElement(64, tmp5, 0); + }(); + + const IR::U128 result = ir.VectorSetElement(64, lower_half, 1, upper_half); + + inst.ReplaceUsesWith(result); +} + +static IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { + return ir.Eor(ir.And(ir.Eor(y, z), x), z); +} + +static IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { + return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z)); +} + +static IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) { + const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2)); + const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13)); + const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22)); + + return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); +} + +static IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) { + const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6)); + const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11)); + const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25)); + + return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); +} + +static void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) { + IR::U128 x = (IR::U128)inst.GetArg(0); + IR::U128 y = (IR::U128)inst.GetArg(1); + const IR::U128 w = (IR::U128)inst.GetArg(2); + const bool part1 = inst.GetArg(3).GetU1(); + + for (size_t i = 0; i < 4; i++) { + const IR::U32 low_x = ir.VectorGetElement(32, x, 0); + const IR::U32 after_low_x = ir.VectorGetElement(32, x, 1); + const IR::U32 before_high_x = ir.VectorGetElement(32, x, 2); + const IR::U32 high_x = ir.VectorGetElement(32, x, 3); + + const IR::U32 low_y = ir.VectorGetElement(32, y, 0); + const IR::U32 after_low_y = ir.VectorGetElement(32, y, 1); + const IR::U32 before_high_y = ir.VectorGetElement(32, y, 2); + const IR::U32 high_y = ir.VectorGetElement(32, y, 3); + + const IR::U32 choice = SHAchoose(ir, low_y, after_low_y, before_high_y); + const IR::U32 majority = SHAmajority(ir, low_x, after_low_x, before_high_x); + + const IR::U32 t = [&] { + const IR::U32 w_element = ir.VectorGetElement(32, w, i); + const IR::U32 sig = SHAhashSIGMA1(ir, low_y); + + return ir.Add(high_y, ir.Add(sig, ir.Add(choice, w_element))); + }(); + + const IR::U32 new_low_x = ir.Add(t, ir.Add(SHAhashSIGMA0(ir, low_x), majority)); + const IR::U32 new_low_y = ir.Add(t, high_x); + + // Shuffle all words left by 1 element: [3, 2, 1, 0] -> [2, 1, 0, 3] + const IR::U128 shuffled_x = ir.VectorRotateWholeVectorRight(x, 96); + const IR::U128 shuffled_y = ir.VectorRotateWholeVectorRight(y, 96); + + x = ir.VectorSetElement(32, shuffled_x, 0, new_low_x); + y = ir.VectorSetElement(32, shuffled_y, 0, new_low_y); + } + + inst.ReplaceUsesWith(part1 ? x : y); +} + +template +static void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) { + IR::U128 n = (IR::U128)inst.GetArg(0); + IR::U128 m = (IR::U128)inst.GetArg(1); + + const IR::U128 wide_n = is_signed ? ir.VectorSignExtend(esize, n) : ir.VectorZeroExtend(esize, n); + const IR::U128 wide_m = is_signed ? ir.VectorSignExtend(esize, m) : ir.VectorZeroExtend(esize, m); + + const IR::U128 result = ir.VectorMultiply(esize * 2, wide_n, wide_m); + + inst.ReplaceUsesWith(result); +} + +static void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) { + if (polyfill == PolyfillOptions{}) { + return; + } + + IR::IREmitter ir{block}; + + for (auto& inst : block) { + ir.SetInsertionPointBefore(&inst); + + switch (inst.GetOpcode()) { + case IR::Opcode::SHA256MessageSchedule0: + if (polyfill.sha256) { + PolyfillSHA256MessageSchedule0(ir, inst); + } + break; + case IR::Opcode::SHA256MessageSchedule1: + if (polyfill.sha256) { + PolyfillSHA256MessageSchedule1(ir, inst); + } + break; + case IR::Opcode::SHA256Hash: + if (polyfill.sha256) { + PolyfillSHA256Hash(ir, inst); + } + break; + case IR::Opcode::VectorMultiplySignedWiden8: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<8, true>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplySignedWiden16: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<16, true>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplySignedWiden32: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<32, true>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplyUnsignedWiden8: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<8, false>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplyUnsignedWiden16: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<16, false>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplyUnsignedWiden32: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<32, false>(ir, inst); + } + break; + default: + break; + } + } +} + +static void VerificationPass(const IR::Block& block) { + for (auto const& inst : block) { + for (size_t i = 0; i < inst.NumArgs(); i++) { + const IR::Type t1 = inst.GetArg(i).GetType(); + const IR::Type t2 = IR::GetArgTypeOf(inst.GetOpcode(), i); + ASSERT_MSG(IR::AreTypesCompatible(t1, t2), "Block failed:\n{}", IR::DumpBlock(block)); + } + } + ankerl::unordered_dense::map actual_uses; + for (auto const& inst : block) { + for (size_t i = 0; i < inst.NumArgs(); i++) + if (IR::Value const arg = inst.GetArg(i); !arg.IsImmediate()) + actual_uses[arg.GetInst()]++; + } + for (auto const& pair : actual_uses) + ASSERT(pair.first->UseCount() == pair.second); +} + +void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options) { + Optimization::PolyfillPass(block, polyfill_options); + Optimization::NamingPass(block); + if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) [[likely]] { + Optimization::A32GetSetElimination(block, {.convert_nzc_to_nz = true}); + Optimization::DeadCodeElimination(block); + } + if (conf.HasOptimization(OptimizationFlag::ConstProp)) [[likely]] { + Optimization::A32ConstantMemoryReads(block, conf.callbacks); + Optimization::ConstantPropagation(block); + Optimization::DeadCodeElimination(block); + } + Optimization::IdentityRemovalPass(block); + Optimization::VerificationPass(block); +} + +void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options) { + Optimization::PolyfillPass(block, polyfill_options); + Optimization::A64CallbackConfigPass(block, conf); + Optimization::NamingPass(block); + if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) [[likely]] { + Optimization::A64GetSetElimination(block); + Optimization::DeadCodeElimination(block); + } + if (conf.HasOptimization(OptimizationFlag::ConstProp)) [[likely]] { + Optimization::ConstantPropagation(block); + Optimization::DeadCodeElimination(block); + } + if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) [[likely]] { + Optimization::A64MergeInterpretBlocksPass(block, conf.callbacks); + } + Optimization::VerificationPass(block); +} + +} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.h b/src/dynarmic/src/dynarmic/ir/opt_passes.h new file mode 100644 index 0000000000..1963fad0a0 --- /dev/null +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.h @@ -0,0 +1,34 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#pragma once + +namespace Dynarmic::A32 { +struct UserCallbacks; +struct UserConfig; +} + +namespace Dynarmic::A64 { +struct UserCallbacks; +struct UserConfig; +} + +namespace Dynarmic::IR { +class Block; +} + +namespace Dynarmic::Optimization { + +struct PolyfillOptions { + bool sha256 = false; + bool vector_multiply_widen = false; + + bool operator==(const PolyfillOptions&) const = default; +}; + +void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options); +void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options); + +} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/tests/A32/fuzz_thumb.cpp b/src/dynarmic/tests/A32/fuzz_thumb.cpp index 4d14141bbf..ad01e5718b 100644 --- a/src/dynarmic/tests/A32/fuzz_thumb.cpp +++ b/src/dynarmic/tests/A32/fuzz_thumb.cpp @@ -29,7 +29,7 @@ #include "dynarmic/frontend/A32/translate/a32_translate.h" #include "dynarmic/interface/A32/a32.h" #include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" using namespace Dynarmic; @@ -179,13 +179,7 @@ static void RunInstance(size_t run_number, ThumbTestEnv& test_env, A32Unicorn diff --git a/src/dynarmic/tests/CMakeLists.txt b/src/dynarmic/tests/CMakeLists.txt index 85d86c7966..29a8f23478 100644 --- a/src/dynarmic/tests/CMakeLists.txt +++ b/src/dynarmic/tests/CMakeLists.txt @@ -6,33 +6,24 @@ add_executable(dynarmic_tests fp/mantissa_util_tests.cpp fp/unpacked_tests.cpp rand_int.h + # A32 + A32/test_arm_disassembler.cpp + A32/test_arm_instructions.cpp + A32/test_coprocessor.cpp + A32/test_svc.cpp + A32/test_thumb_instructions.cpp + A32/testenv.h + decoder_tests.cpp + # A64 + A64/a64.cpp + A64/fibonacci.cpp + A64/fp_min_max.cpp + A64/misaligned_page_table.cpp + A64/test_invalidation.cpp + A64/real_world.cpp + A64/testenv.h ) - -if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic_tests PRIVATE - A32/test_arm_disassembler.cpp - A32/test_arm_instructions.cpp - A32/test_coprocessor.cpp - A32/test_svc.cpp - A32/test_thumb_instructions.cpp - A32/testenv.h - decoder_tests.cpp - ) -endif() - -if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_link_libraries(dynarmic_tests PRIVATE merry::oaknut) - - target_sources(dynarmic_tests PRIVATE - A64/a64.cpp - A64/fibonacci.cpp - A64/fp_min_max.cpp - A64/misaligned_page_table.cpp - A64/test_invalidation.cpp - A64/real_world.cpp - A64/testenv.h - ) -endif() +target_link_libraries(dynarmic_tests PRIVATE merry::oaknut) if (DYNARMIC_TESTS_USE_UNICORN) target_link_libraries(dynarmic_tests PRIVATE Unicorn::Unicorn) @@ -40,25 +31,17 @@ if (DYNARMIC_TESTS_USE_UNICORN) target_sources(dynarmic_tests PRIVATE fuzz_util.cpp fuzz_util.h + # A32 + A32/fuzz_arm.cpp + A32/fuzz_thumb.cpp + unicorn_emu/a32_unicorn.cpp + unicorn_emu/a32_unicorn.h + # A64 + A64/fuzz_with_unicorn.cpp + A64/verify_unicorn.cpp + unicorn_emu/a64_unicorn.cpp + unicorn_emu/a64_unicorn.h ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic_tests PRIVATE - A32/fuzz_arm.cpp - A32/fuzz_thumb.cpp - unicorn_emu/a32_unicorn.cpp - unicorn_emu/a32_unicorn.h - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic_tests PRIVATE - A64/fuzz_with_unicorn.cpp - A64/verify_unicorn.cpp - unicorn_emu/a64_unicorn.cpp - unicorn_emu/a64_unicorn.h - ) - endif() endif() if ("riscv" IN_LIST ARCHITECTURE) @@ -69,9 +52,6 @@ if ("x86_64" IN_LIST ARCHITECTURE) target_link_libraries(dynarmic_tests PRIVATE xbyak::xbyak) target_architecture_specific_sources(dynarmic_tests "x86_64" x64_cpu_info.cpp - ) - - target_architecture_specific_sources(dynarmic_tests "x86_64" native/preserve_xmm.cpp ) @@ -85,47 +65,47 @@ endif() include(CreateDirectoryGroups) -if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS)) - add_executable(dynarmic_print_info - print_info.cpp - ) +# +# dynarmic_print_info +# +add_executable(dynarmic_print_info + print_info.cpp +) +create_target_directory_groups(dynarmic_print_info) +target_link_libraries(dynarmic_print_info PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_include_directories(dynarmic_print_info PRIVATE . ../src) +target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS}) +target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) - create_target_directory_groups(dynarmic_print_info) +# +# dynarmic_test_generator +# +add_executable(dynarmic_test_generator + fuzz_util.cpp + fuzz_util.h + test_generator.cpp +) - target_link_libraries(dynarmic_print_info PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) - target_include_directories(dynarmic_print_info PRIVATE . ../src) - target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS}) - target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) -endif() +create_target_directory_groups(dynarmic_test_generator) -if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS)) - add_executable(dynarmic_test_generator - fuzz_util.cpp - fuzz_util.h - test_generator.cpp - ) +target_link_libraries(dynarmic_test_generator PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_include_directories(dynarmic_test_generator PRIVATE . ../src) +target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS}) +target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) - create_target_directory_groups(dynarmic_test_generator) - - target_link_libraries(dynarmic_test_generator PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) - target_include_directories(dynarmic_test_generator PRIVATE . ../src) - target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS}) - target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) -endif() - -if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS)) - add_executable(dynarmic_test_reader - test_reader.cpp - ) - - create_target_directory_groups(dynarmic_test_reader) - - target_link_libraries(dynarmic_test_reader PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) - target_include_directories(dynarmic_test_reader PRIVATE . ../src) - target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS}) - target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) -endif() +# +# dynarmic_test_reader +# +add_executable(dynarmic_test_reader + test_reader.cpp +) +create_target_directory_groups(dynarmic_test_reader) +target_link_libraries(dynarmic_test_reader PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_include_directories(dynarmic_test_reader PRIVATE . ../src) +target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS}) +target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) +# create_target_directory_groups(dynarmic_tests) target_link_libraries(dynarmic_tests PRIVATE dynarmic Boost::headers Catch2::Catch2WithMain fmt::fmt merry::mcl) diff --git a/src/dynarmic/tests/print_info.cpp b/src/dynarmic/tests/print_info.cpp index ef8b87bbd1..3263ca729a 100644 --- a/src/dynarmic/tests/print_info.cpp +++ b/src/dynarmic/tests/print_info.cpp @@ -34,7 +34,7 @@ #include "dynarmic/interface/A32/a32.h" #include "dynarmic/interface/A32/disassembler.h" #include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" using namespace Dynarmic; From 399a03a130733167375e5e7487fa61fdc1cdd609 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 30 Aug 2025 05:57:48 +0000 Subject: [PATCH 02/47] Fix license headers --- src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp | 3 +++ src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp | 3 +++ src/dynarmic/src/dynarmic/ir/opt_passes.h | 3 +++ 3 files changed, 9 insertions(+) diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp index 8e48fa3687..01af361b27 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp index 2b50ad9ea3..c4c1c42792 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.h b/src/dynarmic/src/dynarmic/ir/opt_passes.h index 1963fad0a0..88b8020031 100644 --- a/src/dynarmic/src/dynarmic/ir/opt_passes.h +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2016 MerryMage * SPDX-License-Identifier: 0BSD From 85a3b5fa1bd6b4a8e6c3b6a94364bf4499b49546 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 00:11:40 +0000 Subject: [PATCH 03/47] [dynarmic] use small vector experiment Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index f70329f471..df1619d552 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -12,6 +12,7 @@ #include #include +#include "boost/container/small_vector.hpp" #include "dynarmic/common/common_types.h" #include #include @@ -83,7 +84,7 @@ public: void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept; private: //non trivial - std::vector values; //24 + boost::container::small_vector values; //24 // Block state uint16_t total_uses = 0; //8 //sometimes zeroed @@ -96,7 +97,7 @@ private: uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128 bool is_scratch : 1 = false; //1 bool is_set_last_use : 1 = false; //1 - alignas(16) uint8_t lru_counter = 0; //1 + uint8_t lru_counter = 0; //1 friend class RegAlloc; }; static_assert(sizeof(HostLocInfo) == 64); From de798b56f6e85c96f232314381908283ae56bb5e Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 06:40:07 +0000 Subject: [PATCH 04/47] [dynarmic] reduce use 2 bits for LRU and 4 bits for clog2 of bit size Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp | 2 +- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index fa6006ed2a..014c121be5 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -118,7 +118,7 @@ void HostLocInfo::AddValue(IR::Inst* inst) noexcept { values.push_back(inst); ASSERT(size_t(total_uses) + inst->UseCount() < std::numeric_limits::max()); total_uses += inst->UseCount(); - max_bit_width = std::max(max_bit_width, GetBitWidth(inst->GetType())); + max_bit_width = std::max(max_bit_width, std::countr_zero(GetBitWidth(inst->GetType()))); } void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept { diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index df1619d552..160c2ef098 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -78,7 +78,7 @@ public: return std::find(values.begin(), values.end(), inst) != values.end(); } inline size_t GetMaxBitWidth() const noexcept { - return max_bit_width; + return 1 << max_bit_width; } void AddValue(IR::Inst* inst) noexcept; void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept; @@ -94,10 +94,10 @@ private: uint16_t is_being_used_count = 0; //8 uint16_t current_references = 0; //8 // Value state - uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128 + uint8_t max_bit_width : 4 = 0; //Valid values: log2(1,2,4,8,16,32,128) = (0, 1, 2, 3, 4, 5, 6) + uint8_t lru_counter : 2 = 0; //1 bool is_scratch : 1 = false; //1 bool is_set_last_use : 1 = false; //1 - uint8_t lru_counter = 0; //1 friend class RegAlloc; }; static_assert(sizeof(HostLocInfo) == 64); From 80652984676c4dc3b75d7a81e97ea314ba7e2b1e Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 06:40:22 +0000 Subject: [PATCH 05/47] [dynarmic] fix tests_reader and tests_generator Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/CMakeLists.txt | 2 +- src/dynarmic/tests/A32/testenv.h | 1 - src/dynarmic/tests/A64/testenv.h | 1 - src/dynarmic/tests/CMakeLists.txt | 28 ++++++++++++++++++++---- 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index f28a5ec8cb..3e25925a54 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -371,7 +371,7 @@ target_link_libraries(dynarmic ) if (BOOST_NO_HEADERS) -target_link_libraries(dynarmic PRIVATE Boost::variant Boost::icl Boost::pool) + target_link_libraries(dynarmic PRIVATE Boost::variant Boost::icl Boost::pool) else() target_link_libraries(dynarmic PRIVATE Boost::headers) endif() diff --git a/src/dynarmic/tests/A32/testenv.h b/src/dynarmic/tests/A32/testenv.h index a6df2017ce..72eaafce14 100644 --- a/src/dynarmic/tests/A32/testenv.h +++ b/src/dynarmic/tests/A32/testenv.h @@ -17,7 +17,6 @@ #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" #include "dynarmic/interface/A32/a32.h" -#include "../native/testenv.h" template class A32TestEnv : public Dynarmic::A32::UserCallbacks { diff --git a/src/dynarmic/tests/A64/testenv.h b/src/dynarmic/tests/A64/testenv.h index 31e338b138..fcdadb23e6 100644 --- a/src/dynarmic/tests/A64/testenv.h +++ b/src/dynarmic/tests/A64/testenv.h @@ -12,7 +12,6 @@ #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" #include "dynarmic/interface/A64/a64.h" -#include "../native/testenv.h" using Vector = Dynarmic::A64::Vector; diff --git a/src/dynarmic/tests/CMakeLists.txt b/src/dynarmic/tests/CMakeLists.txt index 29a8f23478..f8f420e2d6 100644 --- a/src/dynarmic/tests/CMakeLists.txt +++ b/src/dynarmic/tests/CMakeLists.txt @@ -72,7 +72,12 @@ add_executable(dynarmic_print_info print_info.cpp ) create_target_directory_groups(dynarmic_print_info) -target_link_libraries(dynarmic_print_info PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_link_libraries(dynarmic_print_info PRIVATE dynarmic fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_print_info PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_print_info PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_print_info PRIVATE . ../src) target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) @@ -88,7 +93,12 @@ add_executable(dynarmic_test_generator create_target_directory_groups(dynarmic_test_generator) -target_link_libraries(dynarmic_test_generator PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_link_libraries(dynarmic_test_generator PRIVATE dynarmic fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_test_generator PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_test_generator PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_test_generator PRIVATE . ../src) target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) @@ -100,7 +110,12 @@ add_executable(dynarmic_test_reader test_reader.cpp ) create_target_directory_groups(dynarmic_test_reader) -target_link_libraries(dynarmic_test_reader PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_link_libraries(dynarmic_test_reader PRIVATE dynarmic fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_test_reader PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_test_reader PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_test_reader PRIVATE . ../src) target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) @@ -108,7 +123,12 @@ target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LIT # create_target_directory_groups(dynarmic_tests) -target_link_libraries(dynarmic_tests PRIVATE dynarmic Boost::headers Catch2::Catch2WithMain fmt::fmt merry::mcl) +target_link_libraries(dynarmic_tests PRIVATE dynarmic Catch2::Catch2WithMain fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_tests PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_tests PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_tests PRIVATE . ../src) target_compile_options(dynarmic_tests PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_tests PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) From a8cecd84674b9eaecf26957c6687fbae366f97eb Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 09:10:18 +0000 Subject: [PATCH 06/47] [docs] fastmem draft Signed-off-by: lizzie --- src/dynarmic/docs/FastMemory.md | 5 +++++ src/dynarmic/docs/Fastmem.svg | 4 ++++ src/dynarmic/docs/HostToGuest.svg | 4 ++++ 3 files changed, 13 insertions(+) create mode 100644 src/dynarmic/docs/FastMemory.md create mode 100644 src/dynarmic/docs/Fastmem.svg create mode 100644 src/dynarmic/docs/HostToGuest.svg diff --git a/src/dynarmic/docs/FastMemory.md b/src/dynarmic/docs/FastMemory.md new file mode 100644 index 0000000000..2a7c1a2583 --- /dev/null +++ b/src/dynarmic/docs/FastMemory.md @@ -0,0 +1,5 @@ +# Fast memory (Fastmem) + +![Host to guest translation](./HostToGuest.svg) + +![Fastmem translation](./Fastmem.svg) diff --git a/src/dynarmic/docs/Fastmem.svg b/src/dynarmic/docs/Fastmem.svg new file mode 100644 index 0000000000..a3ed0bb68b --- /dev/null +++ b/src/dynarmic/docs/Fastmem.svg @@ -0,0 +1,4 @@ + + + +
Emulator
Address Space
Guest Address Space
SIGSEGV Trap
Fastmem
Only needs to linearly offset from fastmem arena
Less codegen (SIGSEGV traps)
Is fast only if SIGSEGV handlers are sufficiently fast
\ No newline at end of file diff --git a/src/dynarmic/docs/HostToGuest.svg b/src/dynarmic/docs/HostToGuest.svg new file mode 100644 index 0000000000..6a15a44b46 --- /dev/null +++ b/src/dynarmic/docs/HostToGuest.svg @@ -0,0 +1,4 @@ + + + +
Emulator
Address Space
Guest Address Space
Resolver
Host to Guest translation
Looks up correct PTE
Translates each address 
Is slow
\ No newline at end of file From 8c57b97eb87eaaf6d33da3242c4096cb62f2f097 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 10:16:22 +0000 Subject: [PATCH 07/47] [dynarmic] (prolly makes MSVC crash) - use 128MiB code cache Signed-off-by: lizzie --- src/core/arm/dynarmic/arm_dynarmic_32.cpp | 6 +- src/core/arm/dynarmic/arm_dynarmic_64.cpp | 6 +- .../frontend/decoder/decoder_detail.h | 65 ++++++++----------- 3 files changed, 28 insertions(+), 49 deletions(-) diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index 2c2c54a1ad..3f7bf7f967 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -210,12 +210,8 @@ std::shared_ptr ArmDynarmic32::MakeJit(Common::PageTable* pa config.wall_clock_cntpct = m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock; - // Code cache size -#ifdef ARCHITECTURE_arm64 + // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB config.code_cache_size = std::uint32_t(128_MiB); -#else - config.code_cache_size = std::uint32_t(512_MiB); -#endif // Allow memory fault handling to work if (m_system.DebuggerEnabled()) { diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 438b7b691c..682b0cfbf6 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -269,12 +269,8 @@ std::shared_ptr ArmDynarmic64::MakeJit(Common::PageTable* pa config.wall_clock_cntpct = m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock; - // Code cache size -#ifdef ARCHITECTURE_arm64 + // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB config.code_cache_size = std::uint32_t(128_MiB); -#else - config.code_cache_size = std::uint32_t(512_MiB); -#endif // Allow memory fault handling to work if (m_system.DebuggerEnabled()) { diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h index bd76efda2a..4050033ef5 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h @@ -70,11 +70,9 @@ struct detail { return std::make_tuple(mask, expect); } - /** - * Generates the masks and shifts for each argument. - * A '-' in a bitstring indicates that we don't care about that value. - * An argument is specified by a continuous string of the same character. - */ + /// @brief Generates the masks and shifts for each argument. + /// A '-' in a bitstring indicates that we don't care about that value. + /// An argument is specified by a continuous string of the same character. template static consteval auto GetArgInfo(std::array bitstring) { std::array masks = {}; @@ -98,7 +96,6 @@ struct detail { if constexpr (N > 0) { const size_t bit_position = opcode_bitsize - i - 1; - if (arg_index >= N) throw std::out_of_range("Unexpected field"); @@ -109,20 +106,16 @@ struct detail { } } } - #if !defined(DYNARMIC_IGNORE_ASSERTS) && !defined(__ANDROID__) // Avoids a MSVC ICE, and avoids Android NDK issue. ASSERT(std::all_of(masks.begin(), masks.end(), [](auto m) { return m != 0; })); #endif - return std::make_tuple(masks, shifts); } - /** - * This struct's Make member function generates a lambda which decodes an instruction based on - * the provided arg_masks and arg_shifts. The Visitor member function to call is provided as a - * template argument. - */ + /// @brief This struct's Make member function generates a lambda which decodes an instruction + /// based on the provided arg_masks and arg_shifts. The Visitor member function to call is + /// provided as a template argument. template struct VisitorCaller; @@ -130,36 +123,36 @@ struct detail { # pragma warning(push) # pragma warning(disable : 4800) // forcing value to bool 'true' or 'false' (performance warning) #endif - template - struct VisitorCaller { + template + struct VisitorCaller { template - static auto Make(std::integer_sequence, - CallRetT (Visitor::*const fn)(Args...), + static constexpr auto Make(std::integer_sequence, + ReturnType (V::*const fn)(Args...), const std::array arg_masks, const std::array arg_shifts) { - static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); - return [fn, arg_masks, arg_shifts](Visitor& v, opcode_type instruction) { + static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); + return [fn, arg_masks, arg_shifts](V& v, opcode_type instruction) { (void)instruction; (void)arg_masks; (void)arg_shifts; - return (v.*fn)(static_cast((instruction & arg_masks[iota]) >> arg_shifts[iota])...); + return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...); }; } }; - template - struct VisitorCaller { + template + struct VisitorCaller { template - static auto Make(std::integer_sequence, - CallRetT (Visitor::*const fn)(Args...) const, + static constexpr auto Make(std::integer_sequence, + ReturnType (V::*const fn)(Args...) const, const std::array arg_masks, const std::array arg_shifts) { - static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); - return [fn, arg_masks, arg_shifts](const Visitor& v, opcode_type instruction) { + static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); + return [fn, arg_masks, arg_shifts](const V& v, opcode_type instruction) { (void)instruction; (void)arg_masks; (void)arg_shifts; - return (v.*fn)(static_cast((instruction & arg_masks[iota]) >> arg_shifts[iota])...); + return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...); }; } }; @@ -167,22 +160,16 @@ struct detail { # pragma warning(pop) #endif - /** - * Creates a matcher that can match and parse instructions based on bitstring. - * See also: GetMaskAndExpect and GetArgInfo for format of bitstring. - */ - template - static auto GetMatcher(FnT fn, const char* const name) { - constexpr size_t args_count = mcl::parameter_count_v; - + /// @brief Creates a matcher that can match and parse instructions based on bitstring. + /// See also: GetMaskAndExpect and GetArgInfo for format of bitstring. + template + static constexpr auto GetMatcher(F fn, const char* const name) { + constexpr size_t args_count = mcl::parameter_count_v; constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring)); constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring)); constexpr auto arg_masks = std::get<0>(GetArgInfo(bitstring)); constexpr auto arg_shifts = std::get<1>(GetArgInfo(bitstring)); - - using Iota = std::make_index_sequence; - - const auto proxy_fn = VisitorCaller::Make(Iota(), fn, arg_masks, arg_shifts); + const auto proxy_fn = VisitorCaller::Make(std::make_index_sequence(), fn, arg_masks, arg_shifts); return MatcherT(name, mask, expect, proxy_fn); } }; From d4ff9e17b5bce32be329c56c203e51c50a75c84a Mon Sep 17 00:00:00 2001 From: lizzie Date: Thu, 4 Sep 2025 07:24:49 +0000 Subject: [PATCH 08/47] [dynarmic] reduce matcher table noise and cache misses Signed-off-by: lizzie --- .../src/dynarmic/frontend/A32/a32_types.h | 21 +++++---------- .../src/dynarmic/frontend/A32/decoder/asimd.h | 27 +++++++++---------- .../src/dynarmic/frontend/A32/decoder/vfp.h | 12 +++------ .../src/dynarmic/frontend/A64/decoder/a64.h | 23 +++++++--------- .../frontend/decoder/decoder_detail.h | 6 ++--- .../src/dynarmic/frontend/decoder/matcher.h | 11 ++------ 6 files changed, 38 insertions(+), 62 deletions(-) diff --git a/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h b/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h index fdadef8257..2a0cc25751 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h @@ -9,12 +9,9 @@ #pragma once #include -#include - #include #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" - #include "dynarmic/interface/A32/coprocessor_util.h" #include "dynarmic/ir/cond.h" @@ -89,23 +86,17 @@ constexpr bool IsQuadExtReg(ExtReg reg) { inline size_t RegNumber(Reg reg) { ASSERT(reg != Reg::INVALID_REG); - return static_cast(reg); + return size_t(reg); } inline size_t RegNumber(ExtReg reg) { if (IsSingleExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::S0); + return size_t(reg) - size_t(ExtReg::S0); + } else if (IsDoubleExtReg(reg)) { + return size_t(reg) - size_t(ExtReg::D0); } - - if (IsDoubleExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::D0); - } - - if (IsQuadExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::Q0); - } - - ASSERT_MSG(false, "Invalid extended register"); + ASSERT(IsQuadExtReg(reg)); + return size_t(reg) - size_t(ExtReg::Q0); } inline Reg operator+(Reg reg, size_t number) { diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h index f2e206695b..f63816d2b1 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h @@ -27,14 +27,11 @@ using ASIMDMatcher = Decoder::Matcher; template std::vector> GetASIMDDecodeTable() { - std::vector> table = { - -#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), + std::vector>> table = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./asimd.inc" #undef INST - }; - // Exceptions to the rule of thumb. const std::set comes_first{ "VBIC, VMOV, VMVN, VORR (immediate)", @@ -53,19 +50,21 @@ std::vector> GetASIMDDecodeTable() { "VQDMULH (scalar)", "VQRDMULH (scalar)", }; - const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) { - return comes_first.count(matcher.GetName()) > 0; + const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { + return comes_first.count(e.first) > 0; }); - const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) { - return comes_last.count(matcher.GetName()) == 0; + const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { + return comes_last.count(e.first) == 0; }); - // If a matcher has more bits in its mask it is more specific, so it should come first. - std::stable_sort(sort_begin, sort_end, [](const auto& matcher1, const auto& matcher2) { - return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask()); + std::stable_sort(sort_begin, sort_end, [](const auto& a, const auto& b) { + return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); }); - - return table; + std::vector> final_table; + std::transform(table.cbegin(), table.cend(), final_table.begin(), [](auto const& e) { + return e.second; + }); + return final_table; } template diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h index 5fcacd2bda..a969570547 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h @@ -30,22 +30,18 @@ std::optional>> DecodeVFP(u32 instruc static const struct Tables { Table unconditional; Table conditional; - } tables = [] { + } tables = []() { Table list = { - #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./vfp.inc" #undef INST - }; - - const auto division = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { + auto const it = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { return (matcher.GetMask() & 0xF0000000) == 0xF0000000; }); - return Tables{ - Table{list.begin(), division}, - Table{division, list.end()}, + Table{list.begin(), it}, + Table{it, list.end()}, }; }(); diff --git a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h index c651dd7cde..c7bc981ac6 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h +++ b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h @@ -37,34 +37,31 @@ inline size_t ToFastLookupIndex(u32 instruction) { template constexpr DecodeTable GetDecodeTable() { - std::vector> list = { -#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./a64.inc" #undef INST }; - // If a matcher has more bits in its mask it is more specific, so it should come first. - std::stable_sort(list.begin(), list.end(), [](const auto& matcher1, const auto& matcher2) { + std::stable_sort(list.begin(), list.end(), [](const auto& a, const auto& b) { // If a matcher has more bits in its mask it is more specific, so it should come first. - return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask()); + return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); }); - // Exceptions to the above rule of thumb. - std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { + std::stable_partition(list.begin(), list.end(), [&](const auto& e) { return std::set{ "MOVI, MVNI, ORR, BIC (vector, immediate)", "FMOV (vector, immediate)", "Unallocated SIMD modified immediate", - }.count(matcher.GetName()) > 0; + }.count(e.first) > 0; }); - DecodeTable table{}; for (size_t i = 0; i < table.size(); ++i) { - for (auto matcher : list) { - const auto expect = detail::ToFastLookupIndex(matcher.GetExpected()); - const auto mask = detail::ToFastLookupIndex(matcher.GetMask()); + for (auto const& e : list) { + const auto expect = detail::ToFastLookupIndex(e.second.GetExpected()); + const auto mask = detail::ToFastLookupIndex(e.second.GetMask()); if ((i & mask) == expect) { - table[i].push_back(matcher); + table[i].push_back(e.second); } } } diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h index 4050033ef5..0d9da6169c 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h @@ -163,18 +163,18 @@ struct detail { /// @brief Creates a matcher that can match and parse instructions based on bitstring. /// See also: GetMaskAndExpect and GetArgInfo for format of bitstring. template - static constexpr auto GetMatcher(F fn, const char* const name) { + static constexpr auto GetMatcher(F fn) { constexpr size_t args_count = mcl::parameter_count_v; constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring)); constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring)); constexpr auto arg_masks = std::get<0>(GetArgInfo(bitstring)); constexpr auto arg_shifts = std::get<1>(GetArgInfo(bitstring)); const auto proxy_fn = VisitorCaller::Make(std::make_index_sequence(), fn, arg_masks, arg_shifts); - return MatcherT(name, mask, expect, proxy_fn); + return MatcherT(mask, expect, proxy_fn); } }; -#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail>::template GetMatcher(&V::fn, name) +#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail>::template GetMatcher(&V::fn) } // namespace detail } // namespace Dynarmic::Decoder diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h index 7e5c9c5a8f..5a2afe57c5 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h @@ -31,14 +31,8 @@ public: using visitor_type = Visitor; using handler_return_type = typename Visitor::instruction_return_type; using handler_function = std::function; - - Matcher(const char* const name, opcode_type mask, opcode_type expected, handler_function func) - : name{name}, mask{mask}, expected{expected}, fn{std::move(func)} {} - - /// Gets the name of this type of instruction. - const char* GetName() const { - return name; - } + Matcher(opcode_type mask, opcode_type expected, handler_function func) + : mask{mask}, expected{expected}, fn{std::move(func)} {} /// Gets the mask for this instruction. opcode_type GetMask() const { @@ -70,7 +64,6 @@ public: } private: - const char* name; opcode_type mask; opcode_type expected; handler_function fn; From 6706c4cb56549918acfc8e949b11f948f63ec17c Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 6 Sep 2025 20:59:23 +0000 Subject: [PATCH 09/47] [dynarmic] fix tests Signed-off-by: lizzie --- .../src/dynarmic/frontend/decoder/matcher.h | 40 +++++++----------- src/dynarmic/tests/A32/fuzz_arm.cpp | 1 + src/dynarmic/tests/A32/fuzz_thumb.cpp | 1 + .../tests/A32/test_arm_instructions.cpp | 1 + src/dynarmic/tests/A32/test_coprocessor.cpp | 1 + src/dynarmic/tests/A32/test_svc.cpp | 1 + .../tests/A32/test_thumb_instructions.cpp | 1 + src/dynarmic/tests/A64/a64.cpp | 1 + src/dynarmic/tests/A64/fp_min_max.cpp | 1 + src/dynarmic/tests/A64/fuzz_with_unicorn.cpp | 1 + .../tests/A64/misaligned_page_table.cpp | 1 + src/dynarmic/tests/A64/real_world.cpp | 1 + src/dynarmic/tests/A64/test_invalidation.cpp | 1 + src/dynarmic/tests/decoder_tests.cpp | 2 + src/dynarmic/tests/native/preserve_xmm.cpp | 1 + src/dynarmic/tests/print_info.cpp | 42 ++++--------------- 16 files changed, 39 insertions(+), 58 deletions(-) diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h index 5a2afe57c5..f7e2884e0c 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h @@ -14,16 +14,12 @@ namespace Dynarmic::Decoder { -/** - * Generic instruction handling construct. - * - * @tparam Visitor An arbitrary visitor type that will be passed through - * to the function being handled. This type must be the - * type of the first parameter in a handler function. - * - * @tparam OpcodeType Type representing an opcode. This must be the - * type of the second parameter in a handler function. - */ +/// Generic instruction handling construct. +/// @tparam Visitor An arbitrary visitor type that will be passed through +/// to the function being handled. This type must be the +/// type of the first parameter in a handler function. +/// @tparam OpcodeType Type representing an opcode. This must be the +/// type of the second parameter in a handler function. template class Matcher { public: @@ -35,30 +31,26 @@ public: : mask{mask}, expected{expected}, fn{std::move(func)} {} /// Gets the mask for this instruction. - opcode_type GetMask() const { + inline opcode_type GetMask() const noexcept { return mask; } /// Gets the expected value after masking for this instruction. - opcode_type GetExpected() const { + inline opcode_type GetExpected() const noexcept { return expected; } - /** - * Tests to see if the given instruction is the instruction this matcher represents. - * @param instruction The instruction to test - * @returns true if the given instruction matches. - */ - bool Matches(opcode_type instruction) const { + /// Tests to see if the given instruction is the instruction this matcher represents. + /// @param instruction The instruction to test + /// @returns true if the given instruction matches. + inline bool Matches(opcode_type instruction) const noexcept { return (instruction & mask) == expected; } - /** - * Calls the corresponding instruction handler on visitor for this type of instruction. - * @param v The visitor to use - * @param instruction The instruction to decode. - */ - handler_return_type call(Visitor& v, opcode_type instruction) const { + /// Calls the corresponding instruction handler on visitor for this type of instruction. + /// @param v The visitor to use + /// @param instruction The instruction to decode. + inline handler_return_type call(Visitor& v, opcode_type instruction) const noexcept { ASSERT(Matches(instruction)); return fn(v, instruction); } diff --git a/src/dynarmic/tests/A32/fuzz_arm.cpp b/src/dynarmic/tests/A32/fuzz_arm.cpp index 087ce54813..5ee6d2bf02 100644 --- a/src/dynarmic/tests/A32/fuzz_arm.cpp +++ b/src/dynarmic/tests/A32/fuzz_arm.cpp @@ -24,6 +24,7 @@ #include "../rand_int.h" #include "../unicorn_emu/a32_unicorn.h" #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/llvm_disassemble.h" diff --git a/src/dynarmic/tests/A32/fuzz_thumb.cpp b/src/dynarmic/tests/A32/fuzz_thumb.cpp index ad01e5718b..7f64cb0ccb 100644 --- a/src/dynarmic/tests/A32/fuzz_thumb.cpp +++ b/src/dynarmic/tests/A32/fuzz_thumb.cpp @@ -22,6 +22,7 @@ #include "../rand_int.h" #include "../unicorn_emu/a32_unicorn.h" #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/frontend/A32/FPSCR.h" #include "dynarmic/frontend/A32/PSR.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" diff --git a/src/dynarmic/tests/A32/test_arm_instructions.cpp b/src/dynarmic/tests/A32/test_arm_instructions.cpp index 0411877823..c007a18299 100644 --- a/src/dynarmic/tests/A32/test_arm_instructions.cpp +++ b/src/dynarmic/tests/A32/test_arm_instructions.cpp @@ -6,6 +6,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/interface/A32/a32.h" diff --git a/src/dynarmic/tests/A32/test_coprocessor.cpp b/src/dynarmic/tests/A32/test_coprocessor.cpp index 98da0e5d34..49cb42bdf3 100644 --- a/src/dynarmic/tests/A32/test_coprocessor.cpp +++ b/src/dynarmic/tests/A32/test_coprocessor.cpp @@ -8,6 +8,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/interface/A32/a32.h" #include "dynarmic/interface/A32/coprocessor.h" diff --git a/src/dynarmic/tests/A32/test_svc.cpp b/src/dynarmic/tests/A32/test_svc.cpp index 8b55d6537c..998566130e 100644 --- a/src/dynarmic/tests/A32/test_svc.cpp +++ b/src/dynarmic/tests/A32/test_svc.cpp @@ -8,6 +8,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" using namespace Dynarmic; diff --git a/src/dynarmic/tests/A32/test_thumb_instructions.cpp b/src/dynarmic/tests/A32/test_thumb_instructions.cpp index 3501d5419f..d509acdd8d 100644 --- a/src/dynarmic/tests/A32/test_thumb_instructions.cpp +++ b/src/dynarmic/tests/A32/test_thumb_instructions.cpp @@ -10,6 +10,7 @@ #include "dynarmic/common/common_types.h" #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/interface/A32/a32.h" static Dynarmic::A32::UserConfig GetUserConfig(ThumbTestEnv* testenv) { diff --git a/src/dynarmic/tests/A64/a64.cpp b/src/dynarmic/tests/A64/a64.cpp index 40eff1f071..24e92d1210 100644 --- a/src/dynarmic/tests/A64/a64.cpp +++ b/src/dynarmic/tests/A64/a64.cpp @@ -7,6 +7,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/interface/exclusive_monitor.h" diff --git a/src/dynarmic/tests/A64/fp_min_max.cpp b/src/dynarmic/tests/A64/fp_min_max.cpp index d8b45db807..1669b63071 100644 --- a/src/dynarmic/tests/A64/fp_min_max.cpp +++ b/src/dynarmic/tests/A64/fp_min_max.cpp @@ -12,6 +12,7 @@ #include "dynarmic/common/common_types.h" #include "./testenv.h" +#include "../native/testenv.h" using namespace Dynarmic; diff --git a/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp b/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp index 3322af06e7..45ba728917 100644 --- a/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp +++ b/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp @@ -19,6 +19,7 @@ #include "../rand_int.h" #include "../unicorn_emu/a64_unicorn.h" #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/llvm_disassemble.h" diff --git a/src/dynarmic/tests/A64/misaligned_page_table.cpp b/src/dynarmic/tests/A64/misaligned_page_table.cpp index 8235e14a67..ecba5a3efa 100644 --- a/src/dynarmic/tests/A64/misaligned_page_table.cpp +++ b/src/dynarmic/tests/A64/misaligned_page_table.cpp @@ -6,6 +6,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/interface/A64/a64.h" TEST_CASE("misaligned load/store do not use page_table when detect_misaligned_access_via_page_table is set", "[a64]") { diff --git a/src/dynarmic/tests/A64/real_world.cpp b/src/dynarmic/tests/A64/real_world.cpp index 07532d95af..a083f16d61 100644 --- a/src/dynarmic/tests/A64/real_world.cpp +++ b/src/dynarmic/tests/A64/real_world.cpp @@ -5,6 +5,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/interface/A64/a64.h" using namespace Dynarmic; diff --git a/src/dynarmic/tests/A64/test_invalidation.cpp b/src/dynarmic/tests/A64/test_invalidation.cpp index 168043c1cb..8a63776fa4 100644 --- a/src/dynarmic/tests/A64/test_invalidation.cpp +++ b/src/dynarmic/tests/A64/test_invalidation.cpp @@ -6,6 +6,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/interface/A64/a64.h" using namespace Dynarmic; diff --git a/src/dynarmic/tests/decoder_tests.cpp b/src/dynarmic/tests/decoder_tests.cpp index e545309960..777ea8b510 100644 --- a/src/dynarmic/tests/decoder_tests.cpp +++ b/src/dynarmic/tests/decoder_tests.cpp @@ -20,6 +20,7 @@ using namespace Dynarmic; +/* TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { const auto table = A32::GetASIMDDecodeTable(); @@ -76,3 +77,4 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { } while (x != 0); } } +*/ diff --git a/src/dynarmic/tests/native/preserve_xmm.cpp b/src/dynarmic/tests/native/preserve_xmm.cpp index 0f69697b7a..7421252063 100644 --- a/src/dynarmic/tests/native/preserve_xmm.cpp +++ b/src/dynarmic/tests/native/preserve_xmm.cpp @@ -6,6 +6,7 @@ #include #include "../A64/testenv.h" +#include "../native/testenv.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/interface/exclusive_monitor.h" diff --git a/src/dynarmic/tests/print_info.cpp b/src/dynarmic/tests/print_info.cpp index 3263ca729a..0c89e780ad 100644 --- a/src/dynarmic/tests/print_info.cpp +++ b/src/dynarmic/tests/print_info.cpp @@ -32,6 +32,7 @@ #include "dynarmic/frontend/A64/translate/a64_translate.h" #include "dynarmic/frontend/A64/translate/impl/impl.h" #include "dynarmic/interface/A32/a32.h" +#include "dynarmic/interface/A32/config.h" #include "dynarmic/interface/A32/disassembler.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/opt_passes.h" @@ -39,20 +40,20 @@ using namespace Dynarmic; const char* GetNameOfA32Instruction(u32 instruction) { - if (auto vfp_decoder = A32::DecodeVFP(instruction)) { + /*if (auto vfp_decoder = A32::DecodeVFP(instruction)) { return vfp_decoder->get().GetName(); } else if (auto asimd_decoder = A32::DecodeASIMD(instruction)) { return asimd_decoder->get().GetName(); } else if (auto decoder = A32::DecodeArm(instruction)) { return decoder->get().GetName(); - } + }*/ return ""; } const char* GetNameOfA64Instruction(u32 instruction) { - if (auto decoder = A64::Decode(instruction)) { + /*if (auto decoder = A64::Decode(instruction)) { return decoder->get().GetName(); - } + }*/ return ""; } @@ -64,18 +65,9 @@ void PrintA32Instruction(u32 instruction) { IR::Block ir_block{location}; const bool should_continue = A32::TranslateSingleInstruction(ir_block, location, instruction); fmt::print("should_continue: {}\n\n", should_continue); - - Optimization::NamingPass(ir_block); - fmt::print("IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); - - Optimization::A32GetSetElimination(ir_block, {}); - Optimization::DeadCodeElimination(ir_block); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - Optimization::IdentityRemovalPass(ir_block); - + Optimization::Optimize(ir_block, A32::UserConfig{}, {}); fmt::print("Optimized IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); } @@ -88,18 +80,9 @@ void PrintA64Instruction(u32 instruction) { IR::Block ir_block{location}; const bool should_continue = A64::TranslateSingleInstruction(ir_block, location, instruction); fmt::print("should_continue: {}\n\n", should_continue); - - Optimization::NamingPass(ir_block); - fmt::print("IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); - - Optimization::A64GetSetElimination(ir_block); - Optimization::DeadCodeElimination(ir_block); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - Optimization::IdentityRemovalPass(ir_block); - + Optimization::Optimize(ir_block, A64::UserConfig{}, {}); fmt::print("Optimized IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); } @@ -115,18 +98,9 @@ void PrintThumbInstruction(u32 instruction) { IR::Block ir_block{location}; const bool should_continue = A32::TranslateSingleInstruction(ir_block, location, instruction); fmt::print("should_continue: {}\n\n", should_continue); - - Optimization::NamingPass(ir_block); - fmt::print("IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); - - Optimization::A32GetSetElimination(ir_block, {}); - Optimization::DeadCodeElimination(ir_block); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - Optimization::IdentityRemovalPass(ir_block); - + Optimization::Optimize(ir_block, A32::UserConfig{}, {}); fmt::print("Optimized IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); } From 02bec3ddb95b0321577a4dce9ec2c7ffdd521beb Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 6 Sep 2025 21:07:18 +0000 Subject: [PATCH 10/47] [dynarmic] fix ASIMD execution Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h index f63816d2b1..2cea7a14c1 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h @@ -61,7 +61,7 @@ std::vector> GetASIMDDecodeTable() { return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); }); std::vector> final_table; - std::transform(table.cbegin(), table.cend(), final_table.begin(), [](auto const& e) { + std::transform(table.cbegin(), table.cend(), std::back_inserter(final_table), [](auto const& e) { return e.second; }); return final_table; From bd8ed48d11520bd5151c7ef87b1cd25772b2ed0b Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 6 Sep 2025 21:40:13 +0000 Subject: [PATCH 11/47] [dynarmic] add back encoding names (for print_info) Signed-off-by: lizzie --- .../src/dynarmic/frontend/A32/decoder/arm.h | 20 ++++++++++--- .../src/dynarmic/frontend/A32/decoder/asimd.h | 26 ++++++++++++----- .../dynarmic/frontend/A32/decoder/thumb16.h | 24 +++++++++++----- .../dynarmic/frontend/A32/decoder/thumb32.h | 24 +++++++++++----- .../src/dynarmic/frontend/A32/decoder/vfp.h | 24 +++++++++++----- .../A32/translate/translate_thumb.cpp | 2 +- .../src/dynarmic/frontend/A64/decoder/a64.h | 20 ++++++++++--- src/dynarmic/tests/decoder_tests.cpp | 28 ++++++------------- src/dynarmic/tests/print_info.cpp | 22 +++++++-------- 9 files changed, 122 insertions(+), 68 deletions(-) diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h index 0257c28ddb..c6f034ae21 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h @@ -30,13 +30,13 @@ template using ArmDecodeTable = std::array>, 0x1000>; namespace detail { -inline size_t ToFastLookupIndexArm(u32 instruction) { +inline size_t ToFastLookupIndexArm(u32 instruction) noexcept { return ((instruction >> 4) & 0x00F) | ((instruction >> 16) & 0xFF0); } } // namespace detail template -constexpr ArmDecodeTable GetArmDecodeTable() { +constexpr ArmDecodeTable GetArmDecodeTable() noexcept { std::vector> list = { #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./arm.inc" @@ -62,15 +62,27 @@ constexpr ArmDecodeTable GetArmDecodeTable() { } template -std::optional>> DecodeArm(u32 instruction) { +std::optional>> DecodeArm(u32 instruction) noexcept { alignas(64) static const auto table = GetArmDecodeTable(); const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - const auto& subtable = table[detail::ToFastLookupIndexArm(instruction)]; auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction); return iter != subtable.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameARM(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./arm.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h index 2cea7a14c1..57e1392871 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h @@ -26,7 +26,7 @@ template using ASIMDMatcher = Decoder::Matcher; template -std::vector> GetASIMDDecodeTable() { +std::vector> GetASIMDDecodeTable() noexcept { std::vector>> table = { #define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./asimd.inc" @@ -68,13 +68,25 @@ std::vector> GetASIMDDecodeTable() { } template -std::optional>> DecodeASIMD(u32 instruction) { - static const auto table = GetASIMDDecodeTable(); - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); +std::optional>> DecodeASIMD(u32 instruction) noexcept { + alignas(64) static const auto table = GetASIMDDecodeTable(); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameASIMD(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./asimd.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h index 8073ee5d47..16b99ba5aa 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h @@ -25,18 +25,28 @@ using Thumb16Matcher = Decoder::Matcher; template std::optional>> DecodeThumb16(u16 instruction) { - static const std::vector> table = { - + alignas(64) static const std::vector> table = { #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)), #include "./thumb16.inc" #undef INST - }; - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameThumb16(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)) }, +#include "./thumb16.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h index 86a4d767a7..19418de67c 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h @@ -24,18 +24,28 @@ using Thumb32Matcher = Decoder::Matcher; template std::optional>> DecodeThumb32(u32 instruction) { - static const std::vector> table = { - + alignas(64) static const std::vector> table = { #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./thumb32.inc" #undef INST - }; - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameThumb32(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./thumb32.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h index a969570547..a346304a9a 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h @@ -26,8 +26,7 @@ using VFPMatcher = Decoder::Matcher; template std::optional>> DecodeVFP(u32 instruction) { using Table = std::vector>; - - static const struct Tables { + alignas(64) static const struct Tables { Table unconditional; Table conditional; } tables = []() { @@ -44,14 +43,25 @@ std::optional>> DecodeVFP(u32 instruc Table{it, list.end()}, }; }(); - const bool is_unconditional = (instruction & 0xF0000000) == 0xF0000000; const Table& table = is_unconditional ? tables.unconditional : tables.conditional; - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameVFP(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./vfp.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp b/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp index 5bb516ccfd..0381c984cc 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp @@ -97,7 +97,7 @@ u32 ConvertASIMDInstruction(u32 thumb_instruction) { return 0xF7F0A000; // UDF } -bool MaybeVFPOrASIMDInstruction(u32 thumb_instruction) { +inline bool MaybeVFPOrASIMDInstruction(u32 thumb_instruction) noexcept { return (thumb_instruction & 0xEC000000) == 0xEC000000 || (thumb_instruction & 0xFF100000) == 0xF9000000; } diff --git a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h index c7bc981ac6..7e6cdc3935 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h +++ b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h @@ -71,12 +71,24 @@ constexpr DecodeTable GetDecodeTable() { template std::optional>> Decode(u32 instruction) { alignas(64) static const auto table = GetDecodeTable(); - const auto matches_instruction = [instruction](const auto& matcher) { - return matcher.Matches(instruction); - }; const auto& subtable = table[detail::ToFastLookupIndex(instruction)]; - auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction); + auto iter = std::find_if(subtable.begin(), subtable.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != subtable.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetName(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./a64.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A64 diff --git a/src/dynarmic/tests/decoder_tests.cpp b/src/dynarmic/tests/decoder_tests.cpp index 777ea8b510..4ad9d90833 100644 --- a/src/dynarmic/tests/decoder_tests.cpp +++ b/src/dynarmic/tests/decoder_tests.cpp @@ -20,7 +20,6 @@ using namespace Dynarmic; -/* TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { const auto table = A32::GetASIMDDecodeTable(); @@ -37,22 +36,12 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { const auto is_decode_error = [&get_ir](const A32::ASIMDMatcher& matcher, u32 instruction) { const auto block = get_ir(matcher, instruction); - - for (const auto& ir_inst : block) { - if (ir_inst.GetOpcode() == IR::Opcode::A32ExceptionRaised) { - if (static_cast(ir_inst.GetArg(1).GetU64()) == A32::Exception::DecodeError) { - return true; - } - } - } - return false; + return std::find_if(block.cbegin(), block.cend(), [](auto const& e) { + return e.GetOpcode() == IR::Opcode::A32ExceptionRaised && A32::Exception(e.GetArg(1).GetU64()) == A32::Exception::DecodeError; + }) != block.cend(); }; for (auto iter = table.cbegin(); iter != table.cend(); ++iter) { - if (std::strncmp(iter->GetName(), "UNALLOCATED", 11) == 0) { - continue; - } - const u32 expect = iter->GetExpected(); const u32 mask = iter->GetMask(); u32 x = 0; @@ -60,15 +49,17 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { const u32 instruction = expect | x; const bool iserr = is_decode_error(*iter, instruction); - const auto alternative = std::find_if(table.cbegin(), iter, [instruction](const auto& m) { return m.Matches(instruction); }); + const auto alternative = std::find_if(table.cbegin(), iter, [instruction](const auto& m) { + return m.Matches(instruction); + }); const bool altiserr = is_decode_error(*alternative, instruction); INFO("Instruction: " << std::hex << std::setfill('0') << std::setw(8) << instruction); INFO("Expect: " << std::hex << std::setfill('0') << std::setw(8) << expect); INFO("Fill: " << std::hex << std::setfill('0') << std::setw(8) << x); - INFO("Name: " << iter->GetName()); + INFO("Name: " << *A32::GetNameASIMD(instruction)); INFO("iserr: " << iserr); - INFO("alternative: " << alternative->GetName()); + //INFO("alternative: " << alternative->GetName()); INFO("altiserr: " << altiserr); REQUIRE(((!iserr && alternative == iter) || (iserr && alternative != iter && !altiserr))); @@ -76,5 +67,4 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { x = ((x | mask) + 1) & ~mask; } while (x != 0); } -} -*/ +} \ No newline at end of file diff --git a/src/dynarmic/tests/print_info.cpp b/src/dynarmic/tests/print_info.cpp index 0c89e780ad..8936f32bd3 100644 --- a/src/dynarmic/tests/print_info.cpp +++ b/src/dynarmic/tests/print_info.cpp @@ -39,21 +39,19 @@ using namespace Dynarmic; -const char* GetNameOfA32Instruction(u32 instruction) { - /*if (auto vfp_decoder = A32::DecodeVFP(instruction)) { - return vfp_decoder->get().GetName(); - } else if (auto asimd_decoder = A32::DecodeASIMD(instruction)) { - return asimd_decoder->get().GetName(); - } else if (auto decoder = A32::DecodeArm(instruction)) { - return decoder->get().GetName(); - }*/ +std::string_view GetNameOfA32Instruction(u32 instruction) { + if (auto const vfp_decoder = A32::DecodeVFP(instruction)) + return *A32::GetNameVFP(instruction); + else if (auto const asimd_decoder = A32::DecodeASIMD(instruction)) + return *A32::GetNameASIMD(instruction); + else if (auto const decoder = A32::DecodeArm(instruction)) + return *A32::GetNameARM(instruction); return ""; } -const char* GetNameOfA64Instruction(u32 instruction) { - /*if (auto decoder = A64::Decode(instruction)) { - return decoder->get().GetName(); - }*/ +std::string_view GetNameOfA64Instruction(u32 instruction) { + if (auto const decoder = A64::Decode(instruction)) + return *A64::GetName(instruction); return ""; } From 36e775ccafa650c06252dd6beed4286ce6b7cd5c Mon Sep 17 00:00:00 2001 From: lizzie Date: Sun, 7 Sep 2025 14:08:17 +0000 Subject: [PATCH 12/47] [dynarmic] use ARCHITECTURE_ macros instead of MCL ones Signed-off-by: lizzie --- .../src/dynarmic/backend/exception_handler.h | 18 +-- .../backend/exception_handler_generic.cpp | 6 +- .../backend/exception_handler_macos.cpp | 12 +- .../backend/exception_handler_macos_mig.c | 4 +- .../backend/exception_handler_posix.cpp | 130 ++---------------- .../backend/exception_handler_windows.cpp | 4 +- src/dynarmic/src/dynarmic/common/context.h | 120 ++++++++++++++++ 7 files changed, 157 insertions(+), 137 deletions(-) create mode 100644 src/dynarmic/src/dynarmic/common/context.h diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler.h b/src/dynarmic/src/dynarmic/backend/exception_handler.h index 173949628c..cd274b111f 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler.h +++ b/src/dynarmic/src/dynarmic/backend/exception_handler.h @@ -15,15 +15,15 @@ #include #include "dynarmic/common/common_types.h" -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) namespace Dynarmic::Backend::X64 { class BlockOfCode; } // namespace Dynarmic::Backend::X64 -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) namespace oaknut { class CodeBlock; } // namespace oaknut -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) namespace Dynarmic::Backend::RV64 { class CodeBlock; } // namespace Dynarmic::Backend::RV64 @@ -33,16 +33,16 @@ class CodeBlock; namespace Dynarmic::Backend { -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) struct FakeCall { u64 call_rip; u64 ret_rip; }; -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) struct FakeCall { u64 call_pc; }; -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) struct FakeCall { }; #else @@ -54,11 +54,11 @@ public: ExceptionHandler(); ~ExceptionHandler(); -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) void Register(X64::BlockOfCode& code); -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) void Register(oaknut::CodeBlock& mem, std::size_t mem_size); -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) void Register(RV64::CodeBlock& mem, std::size_t mem_size); #else # error "Invalid architecture" diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_generic.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_generic.cpp index ad7df25ca6..985536d9f0 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_generic.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_generic.cpp @@ -13,15 +13,15 @@ struct ExceptionHandler::Impl final { ExceptionHandler::ExceptionHandler() = default; ExceptionHandler::~ExceptionHandler() = default; -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) void ExceptionHandler::Register(X64::BlockOfCode&) { // Do nothing } -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) void ExceptionHandler::Register(oaknut::CodeBlock&, std::size_t) { // Do nothing } -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) void ExceptionHandler::Register(RV64::CodeBlock&, std::size_t) { // Do nothing } diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp index 52bcf5972f..76e517f05b 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp @@ -25,7 +25,7 @@ #include "dynarmic/backend/exception_handler.h" -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) # include "dynarmic/backend/x64/block_of_code.h" # define mig_external extern "C" @@ -36,7 +36,7 @@ using dynarmic_thread_state_t = x86_thread_state64_t; -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) # include # define mig_external extern "C" @@ -133,7 +133,7 @@ void MachHandler::MessagePump() { } } -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) kern_return_t MachHandler::HandleRequest(x86_thread_state64_t* ts) { std::lock_guard guard(code_block_infos_mutex); @@ -151,7 +151,7 @@ kern_return_t MachHandler::HandleRequest(x86_thread_state64_t* ts) { return KERN_SUCCESS; } -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) kern_return_t MachHandler::HandleRequest(arm_thread_state64_t* ts) { std::lock_guard guard(code_block_infos_mutex); @@ -269,13 +269,13 @@ private: ExceptionHandler::ExceptionHandler() = default; ExceptionHandler::~ExceptionHandler() = default; -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) void ExceptionHandler::Register(X64::BlockOfCode& code) { const u64 code_begin = mcl::bit_cast(code.getCode()); const u64 code_end = code_begin + code.GetTotalCodeSize(); impl = std::make_unique(code_begin, code_end); } -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) void ExceptionHandler::Register(oaknut::CodeBlock& mem, std::size_t size) { const u64 code_begin = mcl::bit_cast(mem.ptr()); const u64 code_end = code_begin + size; diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_macos_mig.c b/src/dynarmic/src/dynarmic/backend/exception_handler_macos_mig.c index 762a80ca42..25678ab115 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_macos_mig.c +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_macos_mig.c @@ -5,9 +5,9 @@ #include -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) # include "dynarmic/backend/x64/mig/mach_exc_server.c" -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) # include "dynarmic/backend/arm64/mig/mach_exc_server.c" #else # error "Invalid architecture" diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp index 7695df57d2..ef28d99279 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp @@ -8,19 +8,6 @@ #include "dynarmic/backend/exception_handler.h" -#ifdef __APPLE__ -# include -# include -#else -# include -# ifndef __OpenBSD__ -# include -# endif -# ifdef __sun__ -# include -# endif -#endif - #include #include #include @@ -29,16 +16,17 @@ #include #include "dynarmic/common/assert.h" +#include "dynarmic/common/context.h" #include #include "dynarmic/common/common_types.h" -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) # include "dynarmic/backend/x64/block_of_code.h" -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) # include # include "dynarmic/backend/arm64/abi.h" -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) # include "dynarmic/backend/riscv64/code_block.h" #else # error "Invalid architecture" @@ -131,139 +119,51 @@ SigHandler::~SigHandler() { void SigHandler::AddCodeBlock(CodeBlockInfo cbi) { std::lock_guard guard(code_block_infos_mutex); - if (auto iter = FindCodeBlockInfo(cbi.code_begin); iter != code_block_infos.end()) { + if (auto const iter = FindCodeBlockInfo(cbi.code_begin); iter != code_block_infos.end()) code_block_infos.erase(iter); - } code_block_infos.push_back(cbi); } void SigHandler::RemoveCodeBlock(u64 host_pc) { std::lock_guard guard(code_block_infos_mutex); const auto iter = FindCodeBlockInfo(host_pc); - if (iter == code_block_infos.end()) { - return; - } - code_block_infos.erase(iter); + if (iter != code_block_infos.end()) + code_block_infos.erase(iter); } void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { ASSERT(sig == SIGSEGV || sig == SIGBUS); - -#ifndef MCL_ARCHITECTURE_RISCV - ucontext_t* ucontext = reinterpret_cast(raw_context); -#ifndef __OpenBSD__ - auto& mctx = ucontext->uc_mcontext; -#endif -#endif - -#if defined(MCL_ARCHITECTURE_X86_64) - -# if defined(__APPLE__) -# define CTX_RIP (mctx->__ss.__rip) -# define CTX_RSP (mctx->__ss.__rsp) -# elif defined(__linux__) -# define CTX_RIP (mctx.gregs[REG_RIP]) -# define CTX_RSP (mctx.gregs[REG_RSP]) -# elif defined(__FreeBSD__) -# define CTX_RIP (mctx.mc_rip) -# define CTX_RSP (mctx.mc_rsp) -# elif defined(__NetBSD__) -# define CTX_RIP (mctx.__gregs[_REG_RIP]) -# define CTX_RSP (mctx.__gregs[_REG_RSP]) -# elif defined(__OpenBSD__) -# define CTX_RIP (ucontext->sc_rip) -# define CTX_RSP (ucontext->sc_rsp) -# elif defined(__sun__) -# define CTX_RIP (mctx.gregs[REG_RIP]) -# define CTX_RSP (mctx.gregs[REG_RSP]) -# else -# error "Unknown platform" -# endif - + CTX_DECLARE(raw_context); +#if defined(ARCHITECTURE_x86_64) { std::lock_guard guard(sig_handler->code_block_infos_mutex); const auto iter = sig_handler->FindCodeBlockInfo(CTX_RIP); if (iter != sig_handler->code_block_infos.end()) { FakeCall fc = iter->cb(CTX_RIP); - CTX_RSP -= sizeof(u64); *mcl::bit_cast(CTX_RSP) = fc.ret_rip; CTX_RIP = fc.call_rip; - return; } } - fmt::print(stderr, "Unhandled {} at rip {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_RIP); - -#elif defined(MCL_ARCHITECTURE_ARM64) - -# if defined(__APPLE__) -# define CTX_PC (mctx->__ss.__pc) -# define CTX_SP (mctx->__ss.__sp) -# define CTX_LR (mctx->__ss.__lr) -# define CTX_X(i) (mctx->__ss.__x[i]) -# define CTX_Q(i) (mctx->__ns.__v[i]) -# elif defined(__linux__) -# define CTX_PC (mctx.pc) -# define CTX_SP (mctx.sp) -# define CTX_LR (mctx.regs[30]) -# define CTX_X(i) (mctx.regs[i]) -# define CTX_Q(i) (fpctx->vregs[i]) - [[maybe_unused]] const auto fpctx = [&mctx] { - _aarch64_ctx* header = (_aarch64_ctx*)&mctx.__reserved; - while (header->magic != FPSIMD_MAGIC) { - ASSERT(header->magic && header->size); - header = (_aarch64_ctx*)((char*)header + header->size); - } - return (fpsimd_context*)header; - }(); -# elif defined(__FreeBSD__) -# define CTX_PC (mctx.mc_gpregs.gp_elr) -# define CTX_SP (mctx.mc_gpregs.gp_sp) -# define CTX_LR (mctx.mc_gpregs.gp_lr) -# define CTX_X(i) (mctx.mc_gpregs.gp_x[i]) -# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i]) -# elif defined(__NetBSD__) -# define CTX_PC (mctx.mc_gpregs.gp_elr) -# define CTX_SP (mctx.mc_gpregs.gp_sp) -# define CTX_LR (mctx.mc_gpregs.gp_lr) -# define CTX_X(i) (mctx.mc_gpregs.gp_x[i]) -# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i]) -# elif defined(__OpenBSD__) -# define CTX_PC (ucontext->sc_elr) -# define CTX_SP (ucontext->sc_sp) -# define CTX_LR (ucontext->sc_lr) -# define CTX_X(i) (ucontext->sc_x[i]) -# define CTX_Q(i) (ucontext->sc_q[i]) -# else -# error "Unknown platform" -# endif - +#elif defined(ARCHITECTURE_arm64) + CTX_DECLARE(raw_context); { std::lock_guard guard(sig_handler->code_block_infos_mutex); - const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); if (iter != sig_handler->code_block_infos.end()) { FakeCall fc = iter->cb(CTX_PC); - CTX_PC = fc.call_pc; - return; } } - fmt::print(stderr, "Unhandled {} at pc {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_PC); - -#elif defined(MCL_ARCHITECTURE_RISCV) - +#elif defined(ARCHITECTURE_riscv64) ASSERT_FALSE("Unimplemented"); - #else - # error "Invalid architecture" - #endif struct sigaction* retry_sa = sig == SIGSEGV ? &sig_handler->old_sa_segv : &sig_handler->old_sa_bus; @@ -309,19 +209,19 @@ private: ExceptionHandler::ExceptionHandler() = default; ExceptionHandler::~ExceptionHandler() = default; -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) void ExceptionHandler::Register(X64::BlockOfCode& code) { const u64 code_begin = mcl::bit_cast(code.getCode()); const u64 code_end = code_begin + code.GetTotalCodeSize(); impl = std::make_unique(code_begin, code_end); } -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) void ExceptionHandler::Register(oaknut::CodeBlock& mem, std::size_t size) { const u64 code_begin = mcl::bit_cast(mem.ptr()); const u64 code_end = code_begin + size; impl = std::make_unique(code_begin, code_end); } -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) void ExceptionHandler::Register(RV64::CodeBlock& mem, std::size_t size) { const u64 code_begin = mcl::bit_cast(mem.ptr()); const u64 code_end = code_begin + size; diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_windows.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_windows.cpp index 719c007594..9e76cae912 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_windows.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_windows.cpp @@ -5,9 +5,9 @@ #include -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) # include "dynarmic/backend/x64/exception_handler_windows.cpp" -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) # include "dynarmic/backend/exception_handler_generic.cpp" #else # error "Invalid architecture" diff --git a/src/dynarmic/src/dynarmic/common/context.h b/src/dynarmic/src/dynarmic/common/context.h new file mode 100644 index 0000000000..0eb128449c --- /dev/null +++ b/src/dynarmic/src/dynarmic/common/context.h @@ -0,0 +1,120 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#ifdef __APPLE__ +# include +# include +#else +# include +# ifndef __OpenBSD__ +# include +# endif +# ifdef __sun__ +# include +# endif +# ifdef __linux__ +# include +# endif +#endif + +#ifdef ARCHITECTURE_x86_64 +# ifdef __OpenBSD__ +# define CTX_DECLARE(raw_context) ucontext_t* ucontext = reinterpret_cast(raw_context); +# else +# define CTX_DECLARE(raw_context) \ + ucontext_t* ucontext = reinterpret_cast(raw_context); \ + [[maybe_unused]] auto& mctx = ucontext->uc_mcontext; +# endif +#elif defined(ARCHITECTURE_arm64) +# ifdef __OpenBSD__ +# define CTX_DECLARE(raw_context) ucontext_t* ucontext = reinterpret_cast(raw_context); +# else +# define CTX_DECLARE(raw_context) \ + ucontext_t* ucontext = reinterpret_cast(raw_context); \ + [[maybe_unused]] auto& mctx = ucontext->uc_mcontext; \ + [[maybe_unused]] const auto fpctx = GetFloatingPointState(mctx); +# endif +#endif + +#if defined(ARCHITECTURE_x86_64) +# if defined(__APPLE__) +# define CTX_RIP (mctx->__ss.__rip) +# define CTX_RSP (mctx->__ss.__rsp) +# elif defined(__linux__) +# define CTX_RIP (mctx.gregs[REG_RIP]) +# define CTX_RSP (mctx.gregs[REG_RSP]) +# elif defined(__FreeBSD__) +# define CTX_RIP (mctx.mc_rip) +# define CTX_RSP (mctx.mc_rsp) +# elif defined(__NetBSD__) +# define CTX_RIP (mctx.__gregs[_REG_RIP]) +# define CTX_RSP (mctx.__gregs[_REG_RSP]) +# elif defined(__OpenBSD__) +# define CTX_RIP (ucontext->sc_rip) +# define CTX_RSP (ucontext->sc_rsp) +# elif defined(__sun__) +# define CTX_RIP (mctx.gregs[REG_RIP]) +# define CTX_RSP (mctx.gregs[REG_RSP]) +# else +# error "Unknown platform" +# endif +#elif defined(ARCHITECTURE_arm64) +# if defined(__APPLE__) +# define CTX_PC (mctx->__ss.__pc) +# define CTX_SP (mctx->__ss.__sp) +# define CTX_LR (mctx->__ss.__lr) +# define CTX_PSTATE (mctx->__ss.__cpsr) +# define CTX_X(i) (mctx->__ss.__x[i]) +# define CTX_Q(i) (mctx->__ns.__v[i]) +# define CTX_FPSR (mctx->__ns.__fpsr) +# define CTX_FPCR (mctx->__ns.__fpcr) +# elif defined(__linux__) +# define CTX_PC (mctx.pc) +# define CTX_SP (mctx.sp) +# define CTX_LR (mctx.regs[30]) +# define CTX_PSTATE (mctx.pstate) +# define CTX_X(i) (mctx.regs[i]) +# define CTX_Q(i) (fpctx->vregs[i]) +# define CTX_FPSR (fpctx->fpsr) +# define CTX_FPCR (fpctx->fpcr) +# elif defined(__FreeBSD__) +# define CTX_PC (mctx.mc_gpregs.gp_elr) +# define CTX_SP (mctx.mc_gpregs.gp_sp) +# define CTX_LR (mctx.mc_gpregs.gp_lr) +# define CTX_X(i) (mctx.mc_gpregs.gp_x[i]) +# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i]) +# elif defined(__NetBSD__) +# define CTX_PC (mctx.mc_gpregs.gp_elr) +# define CTX_SP (mctx.mc_gpregs.gp_sp) +# define CTX_LR (mctx.mc_gpregs.gp_lr) +# define CTX_X(i) (mctx.mc_gpregs.gp_x[i]) +# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i]) +# elif defined(__OpenBSD__) +# define CTX_PC (ucontext->sc_elr) +# define CTX_SP (ucontext->sc_sp) +# define CTX_LR (ucontext->sc_lr) +# define CTX_X(i) (ucontext->sc_x[i]) +# define CTX_Q(i) (ucontext->sc_q[i]) +# else +# error "Unknown platform" +# endif +#else +# error "unimplemented" +#endif + +#ifdef ARCHITECTURE_arm64 +#ifdef __APPLE__ +inline _STRUCT_ARM_NEON_STATE64* GetFloatingPointState(mcontext_t& host_ctx) { + return &(host_ctx->__ns); +} +#elif defined(__linux__) +inline fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) { + _aarch64_ctx* header = reinterpret_cast<_aarch64_ctx*>(&host_ctx.__reserved); + while (header->magic != FPSIMD_MAGIC) + header = reinterpret_cast<_aarch64_ctx*>(reinterpret_cast(header) + header->size); + return reinterpret_cast(header); +} +#endif +#endif From 1a8c9a7972a4824e1e6a8d31c161ad3b7fdacc04 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sun, 7 Sep 2025 17:03:36 +0000 Subject: [PATCH 13/47] [dynarmic] fix android Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp index ef28d99279..b22befaff9 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp @@ -149,7 +149,6 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { } fmt::print(stderr, "Unhandled {} at rip {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_RIP); #elif defined(ARCHITECTURE_arm64) - CTX_DECLARE(raw_context); { std::lock_guard guard(sig_handler->code_block_infos_mutex); const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); From 60ac09d2cfc22d832b641f9a9af9defd97ba1a28 Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 8 Sep 2025 02:47:04 +0000 Subject: [PATCH 14/47] [dynarmic, cmake][ remove unusd frontends var Signed-off-by: lizzie --- src/dynarmic/CMakeLists.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/dynarmic/CMakeLists.txt b/src/dynarmic/CMakeLists.txt index 0065b1cf7f..a69818e8e5 100644 --- a/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/CMakeLists.txt @@ -39,9 +39,6 @@ option(DYNARMIC_INSTALL "Install dynarmic headers and CMake files" OFF) option(DYNARMIC_USE_BUNDLED_EXTERNALS "Use all bundled externals (useful when e.g. cross-compiling)" OFF) option(DYNARMIC_WARNINGS_AS_ERRORS "Warnings as errors" ${MASTER_PROJECT}) option(DYNARMIC_ENABLE_LTO "Enable LTO" OFF) -if (NOT DEFINED DYNARMIC_FRONTENDS) - set(DYNARMIC_FRONTENDS "A32;A64" CACHE STRING "Selects which frontends to enable") -endif() # Default to a Release build if (NOT CMAKE_BUILD_TYPE) From c9b561134f50147c411304126e0a0cef8134ccaf Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 8 Sep 2025 03:36:35 +0000 Subject: [PATCH 15/47] [dynarmic, docs] fastmem docs Signed-off-by: lizzie --- src/dynarmic/docs/FastMemory.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/dynarmic/docs/FastMemory.md b/src/dynarmic/docs/FastMemory.md index 2a7c1a2583..c4f57996ba 100644 --- a/src/dynarmic/docs/FastMemory.md +++ b/src/dynarmic/docs/FastMemory.md @@ -1,5 +1,19 @@ # Fast memory (Fastmem) +The main way of accessing memory in JITed programs is via an invoked function, say "Read()" and "Write()". On our translator, such functions usually take a sizable amounts of code space (push + call + pop). Trash the i-cache (due to an indirect call) and overall make code emission more bloated. + +The solution? Delegate invalid accesses to a dedicated arena, similar to a swap. The main idea behind such mechanism is to allow the OS to transmit page faults from invalid accesses into the JIT translator directly, bypassing address space calls, while this sacrifices i-cache coherency, it allows for smaller code-size and "faster" throguhput. + +Many kernels however, do not support fast signal dispatching (Solaris, OpenBSD, FreeBSD). Only Linux and Windows support relatively "fast" signal dispatching. Hence this feature is better suited for them only. + ![Host to guest translation](./HostToGuest.svg) ![Fastmem translation](./Fastmem.svg) + +In x86_64 for example, when a page fault occurs, the CPU will transmit via control registers and the stack (see `IRETQ`) the appropriate arguments for a page fault handler, the OS then will transform that into something that can be sent into userspace. + +Most modern OSes implement kernel-page-table-isolation, which means a set of system calls will invoke a context switch (not often used syscalls), whereas others are handled by the same process address space (the smaller kernel portion, often used syscalls) without needing a context switch. This effect can be negated on systems with PCID (up to 4096 unique IDs). + +Signal dispatching takes a performance hit from reloading `%cr3` - but Linux does something more clever to avoid reloads: VDSO will take care of the entire thing in the same address space. Making dispatching as costly as an indirect call - without the hazards of increased code size. + +The main downside from this is the constant i-cache trashing and pipeline hazards introduced by the VDSO signal handlers. However on most benchmarks fastmem does perform faster than without (Linux only). This also abuses the fact of continous address space emulation by using an arena - which can then be potentially transparently mapped into a hugepage, reducing TLB walk times. From f1d527f9b0921cbba75ccf7b799742373475310f Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 03:28:29 +0000 Subject: [PATCH 16/47] [dynarmic] remove use of mcl reverse iterator Signed-off-by: lizzie --- .../src/dynarmic/backend/x64/a64_emit_x64.cpp | 2 +- src/dynarmic/src/dynarmic/backend/x64/abi.cpp | 8 ++- src/dynarmic/src/dynarmic/ir/opt_passes.cpp | 65 ++++++++----------- 3 files changed, 33 insertions(+), 42 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp index 1e673338a8..1691bbb3b7 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp @@ -764,7 +764,7 @@ void A64EmitX64::EmitPatchMovRcx(CodePtr target_code_ptr) { target_code_ptr = code.GetReturnFromRunCodeAddress(); } const CodePtr patch_location = code.getCurr(); - code.mov(code.rcx, reinterpret_cast(target_code_ptr)); + code.mov(code.rcx, u64(target_code_ptr)); code.EnsurePatchLocationSize(patch_location, 10); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/abi.cpp b/src/dynarmic/src/dynarmic/backend/x64/abi.cpp index a9bbab3d10..299bf1d1d6 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/abi.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/abi.cpp @@ -10,7 +10,6 @@ #include -#include #include "dynarmic/common/common_types.h" #include @@ -76,7 +75,8 @@ void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size, const FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size); size_t xmm_offset = frame_info.xmm_offset + (num_xmms * XMM_SIZE); - for (auto const xmm : mcl::iterator::reverse(regs)) { + for (auto it = regs.rbegin(); it != regs.rend(); ++it) { + auto const xmm = *it; if (HostLocIsXMM(xmm)) { xmm_offset -= XMM_SIZE; if (code.HasHostFeature(HostFeature::AVX)) { @@ -88,9 +88,11 @@ void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size, } if (frame_info.stack_subtraction != 0) code.add(rsp, u32(frame_info.stack_subtraction)); - for (auto const gpr : mcl::iterator::reverse(regs)) + for (auto it = regs.rbegin(); it != regs.rend(); ++it) { + auto const gpr = *it; if (HostLocIsGPR(gpr)) code.pop(HostLocToReg64(gpr)); + } } void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, const std::size_t frame_size) { diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp index 383b915839..88fc34fa10 100644 --- a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp @@ -28,7 +28,6 @@ #include "dynarmic/ir/type.h" #include "mcl/bit/swap.hpp" #include "mcl/bit/rotate.hpp" -#include "mcl/iterator/reverse.hpp" namespace Dynarmic::Optimization { @@ -36,50 +35,42 @@ static void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { for (auto& inst : block) { switch (inst.GetOpcode()) { case IR::Opcode::A32ReadMemory8: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u8 value_from_memory = cb->MemoryRead8(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); + if (inst.AreAllArgsImmediates()) { + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u8 value_from_memory = cb->MemoryRead8(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } } break; } case IR::Opcode::A32ReadMemory16: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u16 value_from_memory = cb->MemoryRead16(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); + if (inst.AreAllArgsImmediates()) { + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u16 value_from_memory = cb->MemoryRead16(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } } break; } case IR::Opcode::A32ReadMemory32: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u32 value_from_memory = cb->MemoryRead32(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); + if (inst.AreAllArgsImmediates()) { + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u32 value_from_memory = cb->MemoryRead32(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } } break; } case IR::Opcode::A32ReadMemory64: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u64 value_from_memory = cb->MemoryRead64(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); + if (inst.AreAllArgsImmediates()) { + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u64 value_from_memory = cb->MemoryRead64(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } } break; } @@ -1205,11 +1196,9 @@ static void ConstantPropagation(IR::Block& block) { static void DeadCodeElimination(IR::Block& block) { // We iterate over the instructions in reverse order. // This is because removing an instruction reduces the number of uses for earlier instructions. - for (auto& inst : mcl::iterator::reverse(block)) { - if (!inst.HasUses() && !MayHaveSideEffects(inst.GetOpcode())) { - inst.Invalidate(); - } - } + for (auto it = block.rbegin(); it != block.rend(); ++it) + if (!it->HasUses() && !MayHaveSideEffects(it->GetOpcode())) + it->Invalidate(); } static void IdentityRemovalPass(IR::Block& block) { From 56525d79d5038c07703f22b408509389c50ea81b Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 03:28:41 +0000 Subject: [PATCH 17/47] [dynarmic] checked code alignment Signed-off-by: lizzie --- .../src/dynarmic/backend/x64/a64_interface.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp index c65b582982..33acc46dfa 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp @@ -80,16 +80,16 @@ public: }; // TODO: Check code alignment - - const CodePtr current_code_ptr = [this] { + const CodePtr aligned_code_ptr = CodePtr((uintptr_t(GetCurrentBlock()) + 15) & ~uintptr_t(15)); + const CodePtr current_code_ptr = [this, aligned_code_ptr] { // RSB optimization const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A64JitState::RSBPtrMask; if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) { jit_state.rsb_ptr = new_rsb_ptr; - return reinterpret_cast(jit_state.rsb_codeptrs[new_rsb_ptr]); + return CodePtr(jit_state.rsb_codeptrs[new_rsb_ptr]); } - - return GetCurrentBlock(); + return aligned_code_ptr; + //return GetCurrentBlock(); }(); const HaltReason hr = block_of_code.RunCode(&jit_state, current_code_ptr); From ff70516807befc5a478b61a17f5276f980469a74 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 03:31:12 +0000 Subject: [PATCH 18/47] [dynarmic] fix hardcoded AVX512 registers, use xmm0 instead of xmm16 to align with spec Signed-off-by: lizzie --- .../backend/x64/emit_x64_floating_point.cpp | 7 ++----- .../src/dynarmic/backend/x64/emit_x64_vector.cpp | 16 ++++++++-------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp index 47e51acb03..c7d1f8aa1c 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp @@ -92,13 +92,10 @@ void ForceDenormalsToZero(BlockOfCode& code, std::initializer_list t FpFixup::Norm_Src, FpFixup::Norm_Src, FpFixup::Norm_Src); - - const Xbyak::Xmm tmp = xmm16; + const Xbyak::Xmm tmp = xmm0; FCODE(vmovap)(tmp, code.BConst(xword, denormal_to_zero)); - - for (const Xbyak::Xmm& xmm : to_daz) { + for (const Xbyak::Xmm& xmm : to_daz) FCODE(vfixupimms)(xmm, xmm, tmp, u8(0)); - } return; } diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp index e1b9e54df8..0a1e03ebbf 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -609,8 +609,8 @@ void EmitX64::EmitVectorArithmeticVShift16(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm right_shift = xmm16; - const Xbyak::Xmm tmp = xmm17; + const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.vpxord(right_shift, right_shift, right_shift); @@ -674,8 +674,8 @@ void EmitX64::EmitVectorArithmeticVShift64(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm right_shift = xmm16; - const Xbyak::Xmm tmp = xmm17; + const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); code.vmovdqa32(tmp, code.Const(xword, 0x00000000000000FF, 0x00000000000000FF)); code.vpxorq(right_shift, right_shift, right_shift); @@ -1955,8 +1955,8 @@ void EmitX64::EmitVectorLogicalVShift16(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm right_shift = xmm16; - const Xbyak::Xmm tmp = xmm17; + const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.vpxord(right_shift, right_shift, right_shift); @@ -2737,7 +2737,7 @@ void EmitX64::EmitVectorPairedAddSignedWiden32(EmitContext& ctx, IR::Inst* inst) const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - const Xbyak::Xmm c = xmm16; + const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); code.vpsraq(c, a, 32); code.vpsllq(a, a, 32); code.vpsraq(a, a, 32); @@ -5461,7 +5461,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW)) { const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(args[2]); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm masked = xmm16; + const Xbyak::Xmm masked = ctx.reg_alloc.ScratchXmm(); code.vpandd(masked, indicies, code.Const(xword_b, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); From 86a82da22852662c0f5942cd7c99fa58d34f9caf Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 10:33:12 +0000 Subject: [PATCH 19/47] [dynarmic] unconditional branches always take Signed-off-by: lizzie --- src/dynarmic/docs/Design.md | 49 +++++++++++++------ src/dynarmic/docs/RegisterAllocator.md | 3 ++ .../dynarmic/backend/x64/block_of_code.cpp | 4 +- .../src/dynarmic/backend/x64/reg_alloc.cpp | 7 +-- .../A64/translate/impl/a64_branch.cpp | 9 ++-- 5 files changed, 49 insertions(+), 23 deletions(-) diff --git a/src/dynarmic/docs/Design.md b/src/dynarmic/docs/Design.md index 3c0deb5972..ffa8ccecdb 100644 --- a/src/dynarmic/docs/Design.md +++ b/src/dynarmic/docs/Design.md @@ -273,52 +273,73 @@ Exclusive OR (i.e.: XOR) ### Callback: {Read,Write}Memory{8,16,32,64} - ReadMemory8( vaddr) - ReadMemory16( vaddr) - ReadMemory32( vaddr) - ReadMemory64( vaddr) - WriteMemory8( vaddr, value_to_store) - WriteMemory16( vaddr, value_to_store) - WriteMemory32( vaddr, value_to_store) - WriteMemory64( vaddr, value_to_store) +```c++ + ReadMemory8( vaddr) + ReadMemory16( vaddr) + ReadMemory32( vaddr) + ReadMemory64( vaddr) + WriteMemory8( vaddr, value_to_store) + WriteMemory16( vaddr, value_to_store) + WriteMemory32( vaddr, value_to_store) + WriteMemory64( vaddr, value_to_store) +``` Memory access. ### Terminal: Interpret - SetTerm(IR::Term::Interpret{next}) +```c++ +SetTerm(IR::Term::Interpret{next}) +``` This terminal instruction calls the interpreter, starting at `next`. The interpreter must interpret exactly one instruction. ### Terminal: ReturnToDispatch - SetTerm(IR::Term::ReturnToDispatch{}) +```c++ +SetTerm(IR::Term::ReturnToDispatch{}) +``` This terminal instruction returns control to the dispatcher. The dispatcher will use the value in R15 to determine what comes next. ### Terminal: LinkBlock - SetTerm(IR::Term::LinkBlock{next}) +```c++ +SetTerm(IR::Term::LinkBlock{next}) +``` This terminal instruction jumps to the basic block described by `next` if we have enough cycles remaining. If we do not have enough cycles remaining, we return to the dispatcher, which will return control to the host. +### Terminal: LinkBlockFast + +```c++ +SetTerm(IR::Term::LinkBlockFast{next}) +``` + +This terminal instruction jumps to the basic block described by `next` unconditionally. +This promises guarantees that must be held at runtime - i.e that the program wont hang, + ### Terminal: PopRSBHint - SetTerm(IR::Term::PopRSBHint{}) +```c++ +SetTerm(IR::Term::PopRSBHint{}) +``` This terminal instruction checks the top of the Return Stack Buffer against R15. If RSB lookup fails, control is returned to the dispatcher. This is an optimization for faster function calls. A backend that doesn't support this optimization or doesn't have a RSB may choose to implement this exactly as -ReturnToDispatch. +`ReturnToDispatch`. ### Terminal: If - SetTerm(IR::Term::If{cond, term_then, term_else}) +```c++ +SetTerm(IR::Term::If{cond, term_then, term_else}) +``` This terminal instruction conditionally executes one terminal or another depending on the run-time state of the ARM flags. diff --git a/src/dynarmic/docs/RegisterAllocator.md b/src/dynarmic/docs/RegisterAllocator.md index 5a7210c791..f5bbaaf168 100644 --- a/src/dynarmic/docs/RegisterAllocator.md +++ b/src/dynarmic/docs/RegisterAllocator.md @@ -19,6 +19,9 @@ The member functions on `RegAlloc` are just a combination of the above concepts. The following registers are reserved for internal use and should NOT participate in register allocation: - `%xmm0`, `%xmm1`, `%xmm2`: Used as scratch in exclusive memory access. - `%rsp`: Stack pointer. +- `%r15`: JIT pointer +- `%r14`: Page table pointer. +- `%r13`: Fastmem pointer. The layout convenes `%r15` as the JIT state pointer - while it may be tempting to turn it into a synthetic pointer, keeping an entire register (out of 12 available) is preferable over inlining a directly computed immediate. diff --git a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp index d5d5f089ff..31b10ec6d5 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp @@ -364,8 +364,7 @@ void BlockOfCode::GenRunCode(std::function rcp) { cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0); jne(return_to_caller_mxcsr_already_exited, T_NEAR); - lock(); - or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast(HaltReason::Step)); + lock(); or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast(HaltReason::Step)); SwitchMxcsrOnEntry(); jmp(ABI_PARAM2); @@ -415,7 +414,6 @@ void BlockOfCode::GenRunCode(std::function rcp) { } xor_(eax, eax); - lock(); xchg(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], eax); ABI_PopCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout)); diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index 014c121be5..df71593b7f 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -9,6 +9,7 @@ #include "dynarmic/backend/x64/reg_alloc.h" #include +#include #include #include @@ -152,19 +153,19 @@ bool Argument::GetImmediateU1() const noexcept { u8 Argument::GetImmediateU8() const noexcept { const u64 imm = value.GetImmediateAsU64(); - ASSERT(imm < 0x100); + ASSERT(imm <= u64(std::numeric_limits::max())); return u8(imm); } u16 Argument::GetImmediateU16() const noexcept { const u64 imm = value.GetImmediateAsU64(); - ASSERT(imm < 0x10000); + ASSERT(imm <= u64(std::numeric_limits::max())); return u16(imm); } u32 Argument::GetImmediateU32() const noexcept { const u64 imm = value.GetImmediateAsU64(); - ASSERT(imm < 0x100000000); + ASSERT(imm <= u64(std::numeric_limits::max())); return u32(imm); } diff --git a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp index faf0686231..1bb0be823a 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp @@ -20,9 +20,12 @@ bool TranslatorVisitor::B_cond(Imm<19> imm19, Cond cond) { bool TranslatorVisitor::B_uncond(Imm<26> imm26) { const s64 offset = concatenate(imm26, Imm<2>{0}).SignExtend(); const u64 target = ir.PC() + offset; - - //ir.SetTerm(IR::Term::LinkBlockFast{ir.current_location->SetPC(target)}); - ir.SetTerm(IR::Term::LinkBlock{ir.current_location->SetPC(target)}); + // Pattern to halt execution (B .) + if (target == ir.PC()) { + ir.SetTerm(IR::Term::LinkBlock{ir.current_location->SetPC(target)}); + return false; + } + ir.SetTerm(IR::Term::LinkBlockFast{ir.current_location->SetPC(target)}); return false; } From 615f2935aa613e5f0d1a54c801c4a37b68d26352 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 11:04:34 +0000 Subject: [PATCH 20/47] [dynarmic] Implement constant folding for CountLeadingZeros, add readXX constnat folding for A64 Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/ir/opt_passes.cpp | 48 +++++++++++++++------ 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp index 88fc34fa10..750a8a496b 100644 --- a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp @@ -31,10 +31,11 @@ namespace Dynarmic::Optimization { -static void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { +static void ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { for (auto& inst : block) { switch (inst.GetOpcode()) { - case IR::Opcode::A32ReadMemory8: { + case IR::Opcode::A32ReadMemory8: + case IR::Opcode::A64ReadMemory8: { if (inst.AreAllArgsImmediates()) { const u32 vaddr = inst.GetArg(1).GetU32(); if (cb->IsReadOnlyMemory(vaddr)) { @@ -44,7 +45,8 @@ static void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { } break; } - case IR::Opcode::A32ReadMemory16: { + case IR::Opcode::A32ReadMemory16: + case IR::Opcode::A64ReadMemory16: { if (inst.AreAllArgsImmediates()) { const u32 vaddr = inst.GetArg(1).GetU32(); if (cb->IsReadOnlyMemory(vaddr)) { @@ -54,7 +56,8 @@ static void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { } break; } - case IR::Opcode::A32ReadMemory32: { + case IR::Opcode::A32ReadMemory32: + case IR::Opcode::A64ReadMemory32: { if (inst.AreAllArgsImmediates()) { const u32 vaddr = inst.GetArg(1).GetU32(); if (cb->IsReadOnlyMemory(vaddr)) { @@ -64,7 +67,8 @@ static void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { } break; } - case IR::Opcode::A32ReadMemory64: { + case IR::Opcode::A32ReadMemory64: + case IR::Opcode::A64ReadMemory64: { if (inst.AreAllArgsImmediates()) { const u32 vaddr = inst.GetArg(1).GetU32(); if (cb->IsReadOnlyMemory(vaddr)) { @@ -667,14 +671,14 @@ using Op = Dynarmic::IR::Opcode; // bit size all over the place within folding functions. static void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) { if (is_32_bit) { - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); + inst.ReplaceUsesWith(IR::Value{u32(value)}); } else { inst.ReplaceUsesWith(IR::Value{value}); } } static IR::Value Value(bool is_32_bit, u64 value) { - return is_32_bit ? IR::Value{static_cast(value)} : IR::Value{value}; + return is_32_bit ? IR::Value{u32(value)} : IR::Value{value}; } template @@ -800,6 +804,23 @@ static void FoldByteReverse(IR::Inst& inst, Op op) { } } +/// Folds leading zero population count +/// +/// 1. imm -> countl_zero(imm) +/// +static void FoldCountLeadingZeros(IR::Inst& inst, bool is_32_bit) { + const auto operand = inst.GetArg(0); + if (operand.IsImmediate()) { + if (is_32_bit) { + const u32 result = std::countl_zero(u32(operand.GetImmediateAsU64())); + inst.ReplaceUsesWith(IR::Value{result}); + } else { + const u64 result = std::countl_zero(operand.GetImmediateAsU64()); + inst.ReplaceUsesWith(IR::Value{result}); + } + } +} + /// Folds division operations based on the following: /// /// 1. x / 0 -> 0 (NOTE: This is an ARM-specific behavior defined in the architecture reference manual) @@ -1020,8 +1041,7 @@ static void FoldZeroExtendXToLong(IR::Inst& inst) { static void ConstantPropagation(IR::Block& block) { for (auto& inst : block) { - const auto opcode = inst.GetOpcode(); - + auto const opcode = inst.GetOpcode(); switch (opcode) { case Op::LeastSignificantWord: FoldLeastSignificantWord(inst); @@ -1110,12 +1130,12 @@ static void ConstantPropagation(IR::Block& block) { break; case Op::ArithmeticShiftRightMasked32: if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, static_cast(inst.GetArg(0).GetU32()) >> (inst.GetArg(1).GetU32() & 0x1f)); + ReplaceUsesWith(inst, true, s32(inst.GetArg(0).GetU32()) >> (inst.GetArg(1).GetU32() & 0x1f)); } break; case Op::ArithmeticShiftRightMasked64: if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, static_cast(inst.GetArg(0).GetU64()) >> (inst.GetArg(1).GetU64() & 0x3f)); + ReplaceUsesWith(inst, false, s64(inst.GetArg(0).GetU64()) >> (inst.GetArg(1).GetU64() & 0x3f)); } break; case Op::RotateRightMasked32: @@ -1187,6 +1207,10 @@ static void ConstantPropagation(IR::Block& block) { case Op::ByteReverseDual: FoldByteReverse(inst, opcode); break; + case Op::CountLeadingZeros32: + case Op::CountLeadingZeros64: + FoldCountLeadingZeros(inst, opcode == Op::CountLeadingZeros32); + break; default: break; } @@ -1462,7 +1486,7 @@ void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization: Optimization::DeadCodeElimination(block); } if (conf.HasOptimization(OptimizationFlag::ConstProp)) [[likely]] { - Optimization::A32ConstantMemoryReads(block, conf.callbacks); + Optimization::ConstantMemoryReads(block, conf.callbacks); Optimization::ConstantPropagation(block); Optimization::DeadCodeElimination(block); } From 7b7588e9fbae11abfff15b28701d35e13ad2a440 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 14:46:35 +0000 Subject: [PATCH 21/47] [dynarmic] regalloc use scratchimpl that uses all instead of iteraiting Signed-off-by: lizzie --- .../src/dynarmic/backend/x64/a64_emit_x64.cpp | 4 +-- .../src/dynarmic/backend/x64/reg_alloc.cpp | 30 ++++++++++--------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp index 1691bbb3b7..fa1f071fbf 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp @@ -122,9 +122,9 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) noexcept { auto const opcode = inst.GetOpcode(); // Call the relevant Emit* member function. switch (opcode) { -#define OPCODE(name, type, ...) [[likely]] case IR::Opcode::name: goto opcode_branch; +#define OPCODE(name, type, ...) case IR::Opcode::name: goto opcode_branch; #define A32OPC(name, type, ...) -#define A64OPC(name, type, ...) [[likely]] case IR::Opcode::A64##name: goto a64_branch; +#define A64OPC(name, type, ...) case IR::Opcode::A64##name: goto a64_branch; #include "dynarmic/ir/opcodes.inc" #undef OPCODE #undef A32OPC diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index df71593b7f..b01727985a 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -367,10 +367,20 @@ void RegAlloc::HostCall(IR::Inst* result_def, if (result_def) { DefineValueImpl(result_def, ABI_RETURN); } - + for (size_t i = 0; i < args.size(); i++) { + if (args[i]) { + UseScratch(*args[i], args_hostloc[i]); + } else { + ScratchGpr(args_hostloc[i]); // TODO: Force spill + } + } + // Must match with with ScratchImpl + for (auto const gpr : other_caller_save) { + MoveOutOfTheWay(gpr); + LocInfo(gpr).WriteLock(); + } for (size_t i = 0; i < args.size(); i++) { if (args[i] && !args[i]->get().IsVoid()) { - UseScratch(*args[i], args_hostloc[i]); // LLVM puts the burden of zero-extension of 8 and 16 bit values on the caller instead of the callee const Xbyak::Reg64 reg = HostLocToReg64(args_hostloc[i]); switch (args[i]->get().GetType()) { @@ -390,14 +400,6 @@ void RegAlloc::HostCall(IR::Inst* result_def, } } } - - for (size_t i = 0; i < args.size(); i++) - if (!args[i]) { - // TODO: Force spill - ScratchGpr(args_hostloc[i]); - } - for (auto const caller_saved : other_caller_save) - ScratchImpl({caller_saved}); } void RegAlloc::AllocStackSpace(const size_t stack_space) noexcept { @@ -560,13 +562,12 @@ void RegAlloc::SpillRegister(HostLoc loc) noexcept { } HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept { -#if 0 // TODO(lizzie): Ok, Windows hates XMM spills, this means less perf for windows // but it's fine anyways. We can find other ways to cheat it later - but which?!?! // we should NOT save xmm each block entering... MAYBE xbyak has a bug on start/end? // TODO(lizzie): This needs to be investigated further later. // Do not spill XMM into other XMM silly - if (!is_xmm) { + /*if (!is_xmm) { // TODO(lizzie): Using lower (xmm0 and such) registers results in issues/crashes - INVESTIGATE WHY // Intel recommends to spill GPR onto XMM registers IF POSSIBLE // TODO(lizzie): Issues on DBZ, theory: Scratch XMM not properly restored after a function call? @@ -574,8 +575,9 @@ HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept { for (size_t i = size_t(HostLoc::XMM15); i >= size_t(HostLoc::XMM3); --i) if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty()) return loc; - } -#endif + }*/ + // TODO: Doing this would mean saving XMM on each call... need to benchmark the benefits + // of spilling on XMM versus the potential cost of using XMM registers..... // Otherwise go to stack spilling for (size_t i = size_t(HostLoc::FirstSpill); i < hostloc_info.size(); ++i) if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty()) From 6aaa579a98d3aacc2d58b9e33b105395b26cf5b3 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 18:29:39 +0000 Subject: [PATCH 22/47] [dynarmic] use better boost::visitor Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/CMakeLists.txt | 1 - .../src/dynarmic/backend/x64/a32_emit_x64.cpp | 1 - .../src/dynarmic/backend/x64/emit_x64.cpp | 6 ++-- .../src/dynarmic/common/variant_util.h | 29 ------------------- src/dynarmic/tests/A32/fuzz_arm.cpp | 4 +-- 5 files changed, 5 insertions(+), 36 deletions(-) delete mode 100644 src/dynarmic/src/dynarmic/common/variant_util.h diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index 3e25925a54..f6f224a685 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -61,7 +61,6 @@ add_library(dynarmic common/string_util.h common/u128.cpp common/u128.h - common/variant_util.h frontend/A32/a32_types.cpp frontend/A32/a32_types.h frontend/A64/a64_types.cpp diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp index fb306336cf..3186758380 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp @@ -28,7 +28,6 @@ #include "dynarmic/backend/x64/nzcv_util.h" #include "dynarmic/backend/x64/perf_map.h" #include "dynarmic/backend/x64/stack_layout.h" -#include "dynarmic/common/variant_util.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/frontend/A32/a32_types.h" #include "dynarmic/interface/A32/coprocessor.h" diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp index a13baa6a97..e587c7cf3f 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp @@ -11,6 +11,7 @@ #include #include "dynarmic/common/assert.h" +#include #include #include #include "dynarmic/common/common_types.h" @@ -21,7 +22,6 @@ #include "dynarmic/backend/x64/perf_map.h" #include "dynarmic/backend/x64/stack_layout.h" #include "dynarmic/backend/x64/verbose_debugging_output.h" -#include "dynarmic/common/variant_util.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/microinstruction.h" #include "dynarmic/ir/opcodes.h" @@ -347,14 +347,14 @@ EmitX64::BlockDescriptor EmitX64::RegisterBlock(const IR::LocationDescriptor& de } void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) { - Common::VisitVariant(terminal, [this, initial_location, is_single_step](auto x) { + boost::apply_visitor([this, initial_location, is_single_step](auto x) { using T = std::decay_t; if constexpr (!std::is_same_v) { this->EmitTerminalImpl(x, initial_location, is_single_step); } else { ASSERT_MSG(false, "Invalid terminal"); } - }); + }, terminal); } void EmitX64::Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) { diff --git a/src/dynarmic/src/dynarmic/common/variant_util.h b/src/dynarmic/src/dynarmic/common/variant_util.h deleted file mode 100644 index 4dd7f67167..0000000000 --- a/src/dynarmic/src/dynarmic/common/variant_util.h +++ /dev/null @@ -1,29 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -#include - -namespace Dynarmic::Common { -namespace detail { - -template -struct VariantVisitor : boost::static_visitor - , Lambda { - VariantVisitor(Lambda&& lambda) - : Lambda(std::move(lambda)) {} - - using Lambda::operator(); -}; - -} // namespace detail - -template -inline ReturnT VisitVariant(Variant&& variant, Lambda&& lambda) { - return boost::apply_visitor(detail::VariantVisitor(std::move(lambda)), variant); -} - -} // namespace Dynarmic::Common diff --git a/src/dynarmic/tests/A32/fuzz_arm.cpp b/src/dynarmic/tests/A32/fuzz_arm.cpp index 5ee6d2bf02..bef473a491 100644 --- a/src/dynarmic/tests/A32/fuzz_arm.cpp +++ b/src/dynarmic/tests/A32/fuzz_arm.cpp @@ -47,7 +47,7 @@ using namespace Dynarmic; template bool AnyLocationDescriptorForTerminalHas(IR::Terminal terminal, Fn fn) { - return Common::VisitVariant(terminal, [&](auto t) -> bool { + return boost::apply_visitor([&](auto t) -> bool { using T = std::decay_t; if constexpr (std::is_same_v) { return false; @@ -73,7 +73,7 @@ bool AnyLocationDescriptorForTerminalHas(IR::Terminal terminal, Fn fn) { ASSERT_MSG(false, "Invalid terminal type"); return false; } - }); + }, terminal); } bool ShouldTestInst(u32 instruction, u32 pc, bool is_thumb, bool is_last_inst, A32::ITState it_state = {}) { From 9d2681ecc9565681db623fb71799e76381998512 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 20:47:49 +0200 Subject: [PATCH 23/47] [cmake] enable clang-cl and WoA builds (#348) Compilation and CMake fixes for both Windows on ARM and clang-cl, meaning Windows can now be built on both MSVC and clang on both amd64 and aarch64. Compiling on clang is *dramatically* faster so this should be useful for CI. Co-authored-by: crueter Co-authored-by: crueter Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/348 Reviewed-by: CamilleLaVey Reviewed-by: crueter Co-authored-by: lizzie Co-committed-by: lizzie --- .ci/windows/build.sh | 67 +++---- .patch/boost/0001-clang-cl.patch | 13 ++ .patch/boost/0002-use-marmasm.patch | 11 ++ .patch/boost/0003-armasm-options.patch | 14 ++ .patch/cpp-jwt/0001-no-install.patch | 47 ----- .patch/cpp-jwt/0002-missing-decl.patch | 13 -- .patch/discord-rpc/0001-cmake-version.patch | 10 - .patch/discord-rpc/0002-no-clang-format.patch | 40 ---- .patch/discord-rpc/0003-fix-cpp17.patch | 31 --- .patch/unordered-dense/0001-cmake.patch | 22 --- CMakeLists.txt | 179 ++++++++++++------ CMakeModules/DownloadExternals.cmake | 13 +- CMakeModules/GenerateSCMRev.cmake | 2 + CMakeModules/WindowsCopyFiles.cmake | 33 ++-- cpmfile.json | 36 ++-- externals/CMakeLists.txt | 23 ++- externals/cpmfile.json | 31 +-- externals/sse2neon/sse2neon.h | 22 +-- src/CMakeLists.txt | 39 ++-- .../apps/audio_renderer/audio_renderer.cpp | 2 +- src/audio_core/common/common.h | 4 +- src/audio_core/device/audio_buffers.h | 4 +- src/audio_core/renderer/audio_device.cpp | 4 +- .../renderer/behavior/behavior_info.cpp | 2 +- .../renderer/command/command_buffer.cpp | 2 +- .../renderer/command/command_generator.h | 6 +- .../renderer/command/data_source/decode.cpp | 18 +- .../renderer/command/effect/aux_.cpp | 4 +- .../renderer/command/effect/biquad_filter.cpp | 8 +- .../renderer/command/effect/capture.cpp | 2 +- .../renderer/command/effect/i3dl2_reverb.cpp | 6 +- .../renderer/command/effect/light_limiter.cpp | 8 +- .../renderer/command/effect/reverb.cpp | 8 +- .../command/mix/depop_for_mix_buffers.cpp | 2 +- .../renderer/command/resample/upsample.cpp | 2 +- .../renderer/command/sink/circular_buffer.cpp | 4 +- .../renderer/command/sink/device.cpp | 4 +- src/audio_core/renderer/mix/mix_context.cpp | 2 +- src/audio_core/renderer/sink/sink_info_base.h | 4 +- .../renderer/splitter/splitter_context.cpp | 2 +- src/audio_core/renderer/system.cpp | 2 +- src/audio_core/sink/cubeb_sink.cpp | 6 +- src/audio_core/sink/sink_stream.cpp | 8 +- src/common/CMakeLists.txt | 4 +- src/common/free_region_manager.h | 4 +- src/common/fs/path_util.cpp | 6 +- src/common/heap_tracker.cpp | 7 +- src/common/host_memory.cpp | 12 +- src/common/logging/log.h | 2 +- src/common/math_util.h | 8 +- src/common/overflow.h | 4 +- src/common/range_map.h | 4 +- src/common/range_sets.inc | 2 +- src/common/ring_buffer.h | 12 +- src/common/scm_rev.cpp.in | 76 ++------ src/common/scm_rev.h | 10 +- src/common/settings.h | 8 +- src/common/settings_setting.h | 6 +- src/common/slot_vector.h | 2 +- src/common/thread.cpp | 4 +- src/common/tiny_mt.h | 2 +- src/common/uint128.h | 6 +- src/common/x64/cpu_wait.cpp | 2 +- src/core/CMakeLists.txt | 1 + src/core/arm/debug.cpp | 4 +- src/core/arm/dynarmic/dynarmic_cp15.cpp | 4 + src/core/arm/nce/interpreter_visitor.h | 5 + src/core/arm/nce/patcher.cpp | 2 + src/core/core.cpp | 2 +- src/core/crypto/xts_encryption_layer.cpp | 4 +- src/core/debugger/gdbstub.cpp | 4 +- src/core/debugger/gdbstub_arch.cpp | 2 +- src/core/device_memory_manager.inc | 2 +- src/core/file_sys/fs_path_utility.h | 2 +- src/core/file_sys/fsa/fs_i_directory.h | 2 +- src/core/file_sys/fsa/fs_i_file.h | 2 +- ...ystem_aes_ctr_counter_extended_storage.cpp | 2 +- .../fssystem/fssystem_aes_ctr_storage.cpp | 2 +- .../fssystem/fssystem_aes_xts_storage.cpp | 6 +- ...system_alignment_matching_storage_impl.cpp | 4 +- .../fssystem_crypto_configuration.cpp | 4 +- ...ssystem_integrity_verification_storage.cpp | 2 +- .../file_sys/fssystem/fssystem_nca_header.cpp | 2 +- .../fssystem/fssystem_pooled_buffer.cpp | 2 +- .../fssystem/fssystem_sparse_storage.h | 4 +- src/core/file_sys/nca_metadata.cpp | 2 +- src/core/file_sys/registered_cache.cpp | 2 +- src/core/file_sys/romfs.cpp | 2 +- src/core/file_sys/vfs/vfs.cpp | 4 +- src/core/file_sys/vfs/vfs_static.h | 4 +- src/core/file_sys/vfs/vfs_vector.cpp | 4 +- src/core/file_sys/vfs/vfs_vector.h | 2 +- src/core/frontend/emu_window.cpp | 8 +- src/core/frontend/framebuffer_layout.cpp | 2 +- .../board/nintendo/nx/k_memory_layout.cpp | 2 +- .../board/nintendo/nx/k_system_control.cpp | 6 +- src/core/hle/kernel/k_dynamic_page_manager.h | 4 +- src/core/hle/kernel/k_handle_table.h | 2 +- src/core/hle/kernel/k_hardware_timer.cpp | 8 +- src/core/hle/kernel/k_hardware_timer.h | 2 +- .../hle/kernel/k_light_server_session.cpp | 2 +- src/core/hle/kernel/k_light_server_session.h | 2 +- src/core/hle/kernel/k_memory_block.h | 2 +- src/core/hle/kernel/k_memory_layout.cpp | 4 +- src/core/hle/kernel/k_memory_manager.cpp | 6 +- src/core/hle/kernel/k_memory_manager.h | 6 +- src/core/hle/kernel/k_memory_region.h | 2 +- src/core/hle/kernel/k_page_bitmap.h | 2 +- src/core/hle/kernel/k_page_heap.h | 2 +- src/core/hle/kernel/k_page_table_base.cpp | 14 +- src/core/hle/kernel/k_process.h | 2 +- src/core/hle/kernel/k_resource_limit.cpp | 2 +- src/core/hle/kernel/k_slab_heap.h | 2 +- src/core/hle/kernel/k_thread.cpp | 2 +- src/core/hle/kernel/kernel.cpp | 2 +- .../hle/kernel/svc/svc_address_arbiter.cpp | 4 +- .../hle/kernel/svc/svc_condition_variable.cpp | 4 +- src/core/hle/kernel/svc/svc_ipc.cpp | 4 +- src/core/hle/kernel/svc/svc_process.cpp | 2 +- src/core/hle/kernel/svc/svc_thread.cpp | 6 +- src/core/hle/service/acc/acc.cpp | 2 +- .../service/am/frontend/applet_cabinet.cpp | 2 +- .../service/am/frontend/applet_controller.cpp | 2 +- .../am/service/application_accessor.cpp | 2 +- .../am/service/application_functions.cpp | 4 +- .../service/library_applet_self_accessor.cpp | 2 +- src/core/hle/service/bcat/bcat_service.cpp | 2 +- .../bcat/delivery_cache_directory_service.cpp | 4 +- .../bcat/delivery_cache_storage_service.cpp | 2 +- src/core/hle/service/cmif_serialization.h | 2 +- src/core/hle/service/es/es.cpp | 4 +- .../fsp/fs_i_save_data_info_reader.cpp | 2 +- src/core/hle/service/glue/notif.cpp | 4 +- src/core/hle/service/glue/time/manager.cpp | 6 +- src/core/hle/service/hid/hid_debug_server.cpp | 2 +- src/core/hle/service/jit/jit_context.cpp | 2 +- src/core/hle/service/ldn/ldn_types.h | 2 +- src/core/hle/service/lm/lm.cpp | 2 +- src/core/hle/service/nfc/common/device.cpp | 2 +- src/core/hle/service/nifm/nifm.cpp | 4 +- .../ns/application_manager_interface.cpp | 4 +- .../service/ns/platform_service_manager.cpp | 2 +- .../service/nvdrv/devices/nvhost_as_gpu.cpp | 2 +- .../hle/service/nvdrv/devices/nvhost_as_gpu.h | 4 +- .../nvdrv/devices/nvhost_nvdec_common.cpp | 4 +- .../nvnflinger/buffer_queue_consumer.cpp | 2 +- .../service/nvnflinger/buffer_queue_core.cpp | 2 +- .../nvnflinger/buffer_queue_producer.cpp | 4 +- .../service/nvnflinger/hardware_composer.cpp | 2 +- src/core/hle/service/psc/time/common.h | 10 +- .../psc/time/power_state_request_manager.cpp | 2 +- src/core/hle/service/set/settings_server.cpp | 10 +- .../service/set/system_settings_server.cpp | 4 +- src/core/hle/service/sm/sm_controller.cpp | 8 +- src/core/hle/service/sockets/bsd.cpp | 4 +- src/core/hle/service/spl/spl_module.cpp | 2 +- src/core/hle/service/ssl/ssl.cpp | 2 +- .../vi/application_display_service.cpp | 4 +- src/core/internal_network/network.cpp | 8 +- .../internal_network/network_interface.cpp | 2 +- src/core/internal_network/socket_proxy.cpp | 6 +- src/core/loader/nca.cpp | 2 +- src/core/memory.cpp | 8 +- src/core/tools/renderdoc.cpp | 13 +- src/dynarmic/CMakeLists.txt | 6 +- .../src/dynarmic/backend/x64/emit_x64.cpp | 2 +- .../backend/x64/emit_x64_saturation.cpp | 4 +- .../dynarmic/backend/x64/emit_x64_vector.cpp | 34 ++-- .../x64/emit_x64_vector_floating_point.cpp | 2 +- .../src/dynarmic/backend/x64/reg_alloc.cpp | 8 +- .../src/dynarmic/backend/x64/reg_alloc.h | 6 +- .../src/dynarmic/frontend/A32/a32_types.h | 1 + src/hid_core/frontend/emulated_controller.cpp | 4 +- src/hid_core/hidbus/ringcon.cpp | 2 +- .../irsensor/image_transfer_processor.cpp | 2 +- .../abstract_battery_handler.cpp | 2 +- .../abstract_button_handler.cpp | 2 +- .../abstract_ir_sensor_handler.cpp | 2 +- .../abstracted_pad/abstract_led_handler.cpp | 2 +- .../abstracted_pad/abstract_mcu_handler.cpp | 2 +- .../abstracted_pad/abstract_nfc_handler.cpp | 2 +- .../resources/abstracted_pad/abstract_pad.cpp | 2 +- .../abstracted_pad/abstract_pad_holder.cpp | 2 +- .../abstracted_pad/abstract_palma_handler.cpp | 2 +- .../abstract_properties_handler.cpp | 2 +- .../abstract_sixaxis_handler.cpp | 2 +- .../abstract_vibration_handler.cpp | 2 +- src/hid_core/resources/applet_resource.cpp | 2 +- src/hid_core/resources/npad/npad.cpp | 2 +- src/hid_core/resources/npad/npad_data.cpp | 2 +- src/hid_core/resources/npad/npad_resource.cpp | 2 +- src/hid_core/resources/palma/palma.cpp | 2 +- .../touch_screen/gesture_handler.cpp | 2 +- .../touch_screen/touch_screen_resource.cpp | 8 +- src/input_common/drivers/mouse.cpp | 4 +- src/input_common/drivers/sdl_driver.cpp | 4 +- src/input_common/drivers/udp_client.cpp | 4 +- .../helpers/joycon_protocol/calibration.cpp | 4 +- .../helpers/joycon_protocol/nfc.cpp | 12 +- .../helpers/joycon_protocol/rumble.cpp | 2 +- src/network/room.cpp | 2 +- .../backend/glasm/reg_alloc.cpp | 2 +- .../backend/glsl/emit_glsl_integer.cpp | 2 +- .../backend/glsl/glsl_emit_context.cpp | 6 +- .../backend/glsl/var_alloc.cpp | 2 +- .../spirv/emit_spirv_context_get_set.cpp | 2 +- .../backend/spirv/emit_spirv_integer.cpp | 2 +- .../backend/spirv/spirv_emit_context.cpp | 2 +- .../frontend/maxwell/decode.cpp | 4 +- .../floating_point_conversion_integer.cpp | 24 +-- .../integer_floating_point_conversion.cpp | 4 +- .../frontend/maxwell/translate_program.cpp | 2 +- .../ir_opt/collect_shader_info_pass.cpp | 2 +- .../ir_opt/constant_propagation_pass.cpp | 2 +- src/shader_recompiler/ir_opt/texture_pass.cpp | 4 +- src/video_core/CMakeLists.txt | 2 +- src/video_core/buffer_cache/buffer_cache.h | 28 +-- .../buffer_cache/memory_tracker_base.h | 6 +- src/video_core/buffer_cache/word_manager.h | 12 +- src/video_core/control/channel_state_cache.h | 2 +- src/video_core/engines/engine_interface.h | 2 +- src/video_core/engines/engine_upload.cpp | 4 +- src/video_core/engines/maxwell_3d.cpp | 12 +- src/video_core/engines/maxwell_3d.h | 6 +- src/video_core/engines/maxwell_dma.cpp | 6 +- .../engines/sw_blitter/converter.cpp | 2 +- src/video_core/host1x/codecs/h264.cpp | 2 +- src/video_core/host1x/codecs/vp9.cpp | 4 +- src/video_core/host1x/vic.cpp | 38 ++-- src/video_core/macro/macro_hle.cpp | 2 +- src/video_core/memory_manager.cpp | 4 +- src/video_core/memory_manager.h | 2 +- src/video_core/renderer_opengl/blit_image.cpp | 4 +- .../renderer_opengl/gl_buffer_cache.cpp | 2 +- .../renderer_opengl/gl_buffer_cache.h | 2 +- .../renderer_opengl/gl_rasterizer.cpp | 2 +- .../renderer_opengl/gl_shader_cache.cpp | 2 +- .../gl_staging_buffer_pool.cpp | 6 +- .../renderer_opengl/gl_state_tracker.h | 2 +- .../renderer_opengl/gl_texture_cache.cpp | 18 +- src/video_core/renderer_vulkan/blit_image.cpp | 4 +- .../renderer_vulkan/vk_buffer_cache.cpp | 4 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- .../renderer_vulkan/vk_present_manager.cpp | 4 +- .../renderer_vulkan/vk_query_cache.cpp | 20 +- .../renderer_vulkan/vk_rasterizer.cpp | 16 +- .../vk_staging_buffer_pool.cpp | 14 +- .../renderer_vulkan/vk_state_tracker.h | 2 +- .../renderer_vulkan/vk_swapchain.cpp | 16 +- .../renderer_vulkan/vk_texture_cache.cpp | 50 ++--- src/video_core/shader_environment.cpp | 6 +- src/video_core/shader_environment.h | 4 +- src/video_core/texture_cache/decode_bc.cpp | 4 +- src/video_core/texture_cache/image_base.cpp | 10 +- .../texture_cache/image_view_base.cpp | 6 +- .../texture_cache/image_view_info.cpp | 2 +- src/video_core/texture_cache/texture_cache.h | 24 +-- .../texture_cache/texture_cache_base.h | 2 +- src/video_core/texture_cache/util.cpp | 14 +- src/video_core/textures/astc.cpp | 10 +- src/video_core/textures/decoders.cpp | 8 +- src/video_core/textures/texture.cpp | 2 +- src/video_core/textures/workers.cpp | 2 +- src/video_core/transform_feedback.cpp | 4 +- .../vulkan_common/vulkan_device.cpp | 4 +- .../vulkan_common/vulkan_memory_allocator.cpp | 4 +- src/video_core/vulkan_common/vulkan_wrapper.h | 4 +- src/yuzu/CMakeLists.txt | 2 +- src/yuzu/about_dialog.cpp | 9 +- src/yuzu/bootmanager.cpp | 4 +- .../configure_touch_from_button.cpp | 4 +- src/yuzu/game_list.cpp | 2 +- src/yuzu/main.cpp | 29 +-- src/yuzu/play_time_manager.cpp | 2 +- src/yuzu/util/util.cpp | 2 +- tools/cpm-fetch.sh | 12 +- 276 files changed, 973 insertions(+), 1010 deletions(-) create mode 100644 .patch/boost/0001-clang-cl.patch create mode 100644 .patch/boost/0002-use-marmasm.patch create mode 100644 .patch/boost/0003-armasm-options.patch delete mode 100644 .patch/cpp-jwt/0001-no-install.patch delete mode 100644 .patch/cpp-jwt/0002-missing-decl.patch delete mode 100644 .patch/discord-rpc/0001-cmake-version.patch delete mode 100644 .patch/discord-rpc/0002-no-clang-format.patch delete mode 100644 .patch/discord-rpc/0003-fix-cpp17.patch delete mode 100644 .patch/unordered-dense/0001-cmake.patch diff --git a/.ci/windows/build.sh b/.ci/windows/build.sh index 681f327793..a0ab69a440 100644 --- a/.ci/windows/build.sh +++ b/.ci/windows/build.sh @@ -1,59 +1,45 @@ -#!/bin/bash -e +#!/bin/bash -ex -# SPDX-FileCopyrightText: 2025 eden Emulator Project +# SPDX-FileCopyrightText: 2025 Eden Emulator Project # SPDX-License-Identifier: GPL-3.0-or-later -if [ "$DEVEL" != "true" ]; then - export EXTRA_CMAKE_FLAGS=("${EXTRA_CMAKE_FLAGS[@]}" -DENABLE_QT_UPDATE_CHECKER=ON) +if [ "$COMPILER" == "clang" ] +then + EXTRA_CMAKE_FLAGS+=( + -DCMAKE_CXX_COMPILER=clang-cl + -DCMAKE_C_COMPILER=clang-cl + -DCMAKE_CXX_FLAGS="-O3" + -DCMAKE_C_FLAGS="-O3" + ) + + BUILD_TYPE="RelWithDebInfo" fi -if [ "$CCACHE" = "true" ]; then - export EXTRA_CMAKE_FLAGS=("${EXTRA_CMAKE_FLAGS[@]}" -DUSE_CCACHE=ON) -fi +[ -z "$WINDEPLOYQT" ] && { echo "WINDEPLOYQT environment variable required."; exit 1; } -if [ "$BUNDLE_QT" = "true" ]; then - export EXTRA_CMAKE_FLAGS=("${EXTRA_CMAKE_FLAGS[@]}" -DYUZU_USE_BUNDLED_QT=ON) -else - export EXTRA_CMAKE_FLAGS=("${EXTRA_CMAKE_FLAGS[@]}" -DYUZU_USE_BUNDLED_QT=OFF) -fi - -if [ -z "$BUILD_TYPE" ]; then - export BUILD_TYPE="Release" -fi - -if [ "$WINDEPLOYQT" == "" ]; then - echo "You must supply the WINDEPLOYQT environment variable." - exit 1 -fi - -if [ "$USE_WEBENGINE" = "true" ]; then - WEBENGINE=ON -else - WEBENGINE=OFF -fi - -if [ "$USE_MULTIMEDIA" = "false" ]; then - MULTIMEDIA=OFF -else - MULTIMEDIA=ON -fi - -export EXTRA_CMAKE_FLAGS=("${EXTRA_CMAKE_FLAGS[@]}" $@) +echo $EXTRA_CMAKE_FLAGS mkdir -p build && cd build cmake .. -G Ninja \ - -DCMAKE_BUILD_TYPE="$BUILD_TYPE" \ - -DENABLE_QT_TRANSLATION=ON \ + -DCMAKE_BUILD_TYPE="${BUILD_TYPE:-Release}" \ + -DENABLE_QT_TRANSLATION=ON \ -DUSE_DISCORD_PRESENCE=ON \ -DYUZU_USE_BUNDLED_SDL2=ON \ + -DBUILD_TESTING=OFF \ -DYUZU_TESTS=OFF \ + -DDYNARMIC_TESTS=OFF \ -DYUZU_CMD=OFF \ -DYUZU_ROOM_STANDALONE=OFF \ - -DYUZU_USE_QT_MULTIMEDIA=$MULTIMEDIA \ - -DYUZU_USE_QT_WEB_ENGINE=$WEBENGINE \ + -DYUZU_USE_QT_MULTIMEDIA=${USE_MULTIMEDIA:-false} \ + -DYUZU_USE_QT_WEB_ENGINE=${USE_WEBENGINE:-false} \ -DYUZU_ENABLE_LTO=ON \ + -DCMAKE_EXE_LINKER_FLAGS=" /LTCG" \ -DDYNARMIC_ENABLE_LTO=ON \ - "${EXTRA_CMAKE_FLAGS[@]}" + -DYUZU_USE_BUNDLED_QT=${BUNDLE_QT:-false} \ + -DUSE_CCACHE=${CCACHE:-false} \ + -DENABLE_QT_UPDATE_CHECKER=${DEVEL:-true} \ + "${EXTRA_CMAKE_FLAGS[@]}" \ + "$@" ninja @@ -62,4 +48,5 @@ rm -f bin/*.pdb set -e $WINDEPLOYQT --release --no-compiler-runtime --no-opengl-sw --no-system-dxc-compiler --no-system-d3d-compiler --dir pkg bin/eden.exe + cp bin/* pkg diff --git a/.patch/boost/0001-clang-cl.patch b/.patch/boost/0001-clang-cl.patch new file mode 100644 index 0000000000..cdabc712cb --- /dev/null +++ b/.patch/boost/0001-clang-cl.patch @@ -0,0 +1,13 @@ +diff --git a/libs/cobalt/include/boost/cobalt/concepts.hpp b/libs/cobalt/include/boost/cobalt/concepts.hpp +index d49f2ec..a9bdb80 100644 +--- a/libs/cobalt/include/boost/cobalt/concepts.hpp ++++ b/libs/cobalt/include/boost/cobalt/concepts.hpp +@@ -62,7 +62,7 @@ struct enable_awaitables + template + concept with_get_executor = requires (T& t) + { +- {t.get_executor()} -> asio::execution::executor; ++ t.get_executor(); + }; + + diff --git a/.patch/boost/0002-use-marmasm.patch b/.patch/boost/0002-use-marmasm.patch new file mode 100644 index 0000000000..10f490b878 --- /dev/null +++ b/.patch/boost/0002-use-marmasm.patch @@ -0,0 +1,11 @@ +--- a/libs/context/CMakeLists.txt 2025-09-08 00:42:31.303651800 -0400 ++++ b/libs/context/CMakeLists.txt 2025-09-08 00:42:40.592184300 -0400 +@@ -146,7 +146,7 @@ + set(ASM_LANGUAGE ASM) + endif() + elseif(BOOST_CONTEXT_ASSEMBLER STREQUAL armasm) +- set(ASM_LANGUAGE ASM_ARMASM) ++ set(ASM_LANGUAGE ASM_MARMASM) + else() + set(ASM_LANGUAGE ASM_MASM) + endif() diff --git a/.patch/boost/0003-armasm-options.patch b/.patch/boost/0003-armasm-options.patch new file mode 100644 index 0000000000..3869f95f6f --- /dev/null +++ b/.patch/boost/0003-armasm-options.patch @@ -0,0 +1,14 @@ +diff --git a/libs/context/CMakeLists.txt b/libs/context/CMakeLists.txt +index 8210f65..0e59dd7 100644 +--- a/libs/context/CMakeLists.txt ++++ b/libs/context/CMakeLists.txt +@@ -186,7 +186,8 @@ if(BOOST_CONTEXT_IMPLEMENTATION STREQUAL "fcontext") + set_property(SOURCE ${ASM_SOURCES} APPEND PROPERTY COMPILE_OPTIONS "/safeseh") + endif() + +- else() # masm ++ # armasm doesn't support most of these options ++ elseif(NOT BOOST_CONTEXT_ASSEMBLER STREQUAL armasm) # masm + if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + set_property(SOURCE ${ASM_SOURCES} APPEND PROPERTY COMPILE_OPTIONS "-x" "assembler-with-cpp") + elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") diff --git a/.patch/cpp-jwt/0001-no-install.patch b/.patch/cpp-jwt/0001-no-install.patch deleted file mode 100644 index b5be557a53..0000000000 --- a/.patch/cpp-jwt/0001-no-install.patch +++ /dev/null @@ -1,47 +0,0 @@ -diff --git a/CMakeLists.txt b/CMakeLists.txt -index 8c1761f..52c4ca4 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -69,42 +69,3 @@ endif() - if(CPP_JWT_BUILD_EXAMPLES) - add_subdirectory(examples) - endif() -- --# ############################################################################## --# INSTALL --# ############################################################################## -- --include(GNUInstallDirs) --include(CMakePackageConfigHelpers) --set(CPP_JWT_CONFIG_INSTALL_DIR ${CMAKE_INSTALL_DATADIR}/cmake/${PROJECT_NAME}) -- --install(TARGETS ${PROJECT_NAME} EXPORT ${PROJECT_NAME}Targets) --install( -- EXPORT ${PROJECT_NAME}Targets -- DESTINATION ${CPP_JWT_CONFIG_INSTALL_DIR} -- NAMESPACE ${PROJECT_NAME}:: -- COMPONENT dev) --configure_package_config_file(cmake/Config.cmake.in ${PROJECT_NAME}Config.cmake -- INSTALL_DESTINATION ${CPP_JWT_CONFIG_INSTALL_DIR} -- NO_SET_AND_CHECK_MACRO) --write_basic_package_version_file(${PROJECT_NAME}ConfigVersion.cmake -- COMPATIBILITY SameMajorVersion -- ARCH_INDEPENDENT) --install( -- FILES ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake -- ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake -- DESTINATION ${CPP_JWT_CONFIG_INSTALL_DIR} -- COMPONENT dev) -- --if(NOT CPP_JWT_USE_VENDORED_NLOHMANN_JSON) -- set(CPP_JWT_VENDORED_NLOHMANN_JSON_INSTALL_PATTERN PATTERN "json" EXCLUDE) --endif() --install( -- DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/jwt/ -- DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/jwt -- COMPONENT dev -- FILES_MATCHING -- PATTERN "*.hpp" -- PATTERN "*.ipp" -- PATTERN "test" EXCLUDE -- ${CPP_JWT_VENDORED_NLOHMANN_JSON_INSTALL_PATTERN}) diff --git a/.patch/cpp-jwt/0002-missing-decl.patch b/.patch/cpp-jwt/0002-missing-decl.patch deleted file mode 100644 index cd5175dbe0..0000000000 --- a/.patch/cpp-jwt/0002-missing-decl.patch +++ /dev/null @@ -1,13 +0,0 @@ -diff --git a/include/jwt/algorithm.hpp b/include/jwt/algorithm.hpp -index 0e3b843..1156e6a 100644 ---- a/include/jwt/algorithm.hpp -+++ b/include/jwt/algorithm.hpp -@@ -64,6 +64,8 @@ using verify_func_t = verify_result_t (*) (const jwt::string_view key, - const jwt::string_view head, - const jwt::string_view jwt_sign); - -+verify_result_t is_secret_a_public_key(const jwt::string_view secret); -+ - namespace algo { - - //Me: TODO: All these can be done using code generaion. diff --git a/.patch/discord-rpc/0001-cmake-version.patch b/.patch/discord-rpc/0001-cmake-version.patch deleted file mode 100644 index 6a1609fadf..0000000000 --- a/.patch/discord-rpc/0001-cmake-version.patch +++ /dev/null @@ -1,10 +0,0 @@ -diff --git a/CMakeLists.txt b/CMakeLists.txt -index 5dad9e9..760a1b2 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -1,4 +1,4 @@ --cmake_minimum_required (VERSION 3.2.0) -+cmake_minimum_required (VERSION 3.10) - project (DiscordRPC) - - include(GNUInstallDirs) diff --git a/.patch/discord-rpc/0002-no-clang-format.patch b/.patch/discord-rpc/0002-no-clang-format.patch deleted file mode 100644 index 4b1e37c29f..0000000000 --- a/.patch/discord-rpc/0002-no-clang-format.patch +++ /dev/null @@ -1,40 +0,0 @@ -diff --git a/CMakeLists.txt b/CMakeLists.txt -index 760a1b2..540d643 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -12,20 +12,6 @@ file(GLOB_RECURSE ALL_SOURCE_FILES - src/*.cpp src/*.h src/*.c - ) - --# Set CLANG_FORMAT_SUFFIX if you are using custom clang-format, e.g. clang-format-5.0 --find_program(CLANG_FORMAT_CMD clang-format${CLANG_FORMAT_SUFFIX}) -- --if (CLANG_FORMAT_CMD) -- add_custom_target( -- clangformat -- COMMAND ${CLANG_FORMAT_CMD} -- -i -style=file -fallback-style=none -- ${ALL_SOURCE_FILES} -- DEPENDS -- ${ALL_SOURCE_FILES} -- ) --endif(CLANG_FORMAT_CMD) -- - # thirdparty stuff - execute_process( - COMMAND mkdir ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty -diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt -index 290d761..cd2cc92 100644 ---- a/src/CMakeLists.txt -+++ b/src/CMakeLists.txt -@@ -120,10 +120,6 @@ if (${BUILD_SHARED_LIBS}) - target_compile_definitions(discord-rpc PRIVATE -DDISCORD_BUILDING_SDK) - endif(${BUILD_SHARED_LIBS}) - --if (CLANG_FORMAT_CMD) -- add_dependencies(discord-rpc clangformat) --endif(CLANG_FORMAT_CMD) -- - # install - - install( diff --git a/.patch/discord-rpc/0003-fix-cpp17.patch b/.patch/discord-rpc/0003-fix-cpp17.patch deleted file mode 100644 index 35b725d307..0000000000 --- a/.patch/discord-rpc/0003-fix-cpp17.patch +++ /dev/null @@ -1,31 +0,0 @@ -diff --git a/CMakeLists.txt b/CMakeLists.txt -index 540d643..5d12f3d 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -17,12 +17,14 @@ execute_process( - COMMAND mkdir ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty - ERROR_QUIET - ) -+# new commit that fixes c++17 -+set(RAPIDJSON_SHA 3b2441b87f99ab65f37b141a7b548ebadb607b96) - --find_file(RAPIDJSONTEST NAMES rapidjson rapidjson-1.1.0 PATHS ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty CMAKE_FIND_ROOT_PATH_BOTH) -+find_file(RAPIDJSONTEST NAMES rapidjson rapidjson-${RAPIDJSON_SHA} PATHS ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty CMAKE_FIND_ROOT_PATH_BOTH) - if (NOT RAPIDJSONTEST) - message("no rapidjson, download") -- set(RJ_TAR_FILE ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/v1.1.0.tar.gz) -- file(DOWNLOAD https://github.com/miloyip/rapidjson/archive/v1.1.0.tar.gz ${RJ_TAR_FILE}) -+ set(RJ_TAR_FILE ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/${RAPIDJSON_SHA}.tar.gz) -+ file(DOWNLOAD https://github.com/miloyip/rapidjson/archive/${RAPIDJSON_SHA}.tar.gz ${RJ_TAR_FILE}) - execute_process( - COMMAND ${CMAKE_COMMAND} -E tar xzf ${RJ_TAR_FILE} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty -@@ -30,7 +32,7 @@ if (NOT RAPIDJSONTEST) - file(REMOVE ${RJ_TAR_FILE}) - endif(NOT RAPIDJSONTEST) - --find_file(RAPIDJSON NAMES rapidjson rapidjson-1.1.0 PATHS ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty CMAKE_FIND_ROOT_PATH_BOTH) -+find_file(RAPIDJSON NAMES rapidjson rapidjson-${RAPIDJSON_SHA} PATHS ${CMAKE_CURRENT_SOURCE_DIR}/thirdparty CMAKE_FIND_ROOT_PATH_BOTH) - - add_library(rapidjson STATIC IMPORTED ${RAPIDJSON}) - diff --git a/.patch/unordered-dense/0001-cmake.patch b/.patch/unordered-dense/0001-cmake.patch deleted file mode 100644 index 39e7794b1f..0000000000 --- a/.patch/unordered-dense/0001-cmake.patch +++ /dev/null @@ -1,22 +0,0 @@ -From e59d30b7b12e1d04cc2fc9c6219e35bda447c17e Mon Sep 17 00:00:00 2001 -From: Lizzie <159065448+Lizzie841@users.noreply.github.com> -Date: Fri, 16 May 2025 04:12:13 +0100 -Subject: [PATCH] Update CMakeLists.txt - ---- - CMakeLists.txt | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index b5f4c4f..c5c6f31 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -24,7 +24,7 @@ target_include_directories( - - target_compile_features(unordered_dense INTERFACE cxx_std_17) - --if(_unordered_dense_is_toplevel_project) -+if(_unordered_dense_is_toplevel_project OR UNORDERED_DENSE_INSTALL) - # locations are provided by GNUInstallDirs - install( - TARGETS unordered_dense diff --git a/CMakeLists.txt b/CMakeLists.txt index 9e23f8f87f..144e77684e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,6 +15,21 @@ elseif (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") set(PLATFORM_LINUX ON) endif() +if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + set(CXX_CLANG ON) + if (MSVC) + set(CXX_CLANG_CL ON) + endif() +elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + set(CXX_GCC ON) +elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + set(CXX_CL ON) +elseif (CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM") + set(CXX_ICC ON) +elseif (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") + set(CXX_APPLE ON) +endif() + list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules") list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modules") if (PLATFORM_SUN) @@ -29,6 +44,77 @@ if (PLATFORM_SUN) endif() endif() +# Detect current compilation architecture and create standard definitions +# ======================================================================= + +include(CheckSymbolExists) +function(detect_architecture symbol arch) + if (NOT DEFINED ARCHITECTURE) + set(CMAKE_REQUIRED_QUIET 1) + check_symbol_exists("${symbol}" "" ARCHITECTURE_${arch}) + unset(CMAKE_REQUIRED_QUIET) + + # The output variable needs to be unique across invocations otherwise + # CMake's crazy scope rules will keep it defined + if (ARCHITECTURE_${arch}) + set(ARCHITECTURE "${arch}" PARENT_SCOPE) + set(ARCHITECTURE_${arch} 1 PARENT_SCOPE) + add_definitions(-DARCHITECTURE_${arch}=1) + endif() + endif() +endfunction() + +if (NOT ENABLE_GENERIC) + if (MSVC) + detect_architecture("_M_AMD64" x86_64) + detect_architecture("_M_IX86" x86) + detect_architecture("_M_ARM" arm) + detect_architecture("_M_ARM64" arm64) + else() + detect_architecture("__x86_64__" x86_64) + detect_architecture("__i386__" x86) + detect_architecture("__arm__" arm) + detect_architecture("__aarch64__" arm64) + endif() +endif() + +if (NOT DEFINED ARCHITECTURE) + set(ARCHITECTURE "GENERIC") + set(ARCHITECTURE_GENERIC 1) + add_definitions(-DARCHITECTURE_GENERIC=1) +endif() + +message(STATUS "Target architecture: ${ARCHITECTURE}") + +if (MSVC AND ARCHITECTURE_x86) + message(FATAL_ERROR "Attempting to build with the x86 environment is not supported. \ + This can typically happen if you used the Developer Command Prompt from the start menu;\ + instead, run vcvars64.bat directly, located at C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Auxiliary/Build/vcvars64.bat") +endif() + +if (CXX_CLANG_CL) + add_compile_options( + # clang-cl prints literally 10000+ warnings without this + $<$:-Wno-unused-command-line-argument> + $<$:-Wno-unsafe-buffer-usage> + $<$:-Wno-unused-value> + $<$:-Wno-extra-semi-stmt> + $<$:-Wno-sign-conversion> + $<$:-Wno-reserved-identifier> + $<$:-Wno-deprecated-declarations> + $<$:-Wno-cast-function-type-mismatch> + $<$:/EHsc> # thanks microsoft + ) + + if (ARCHITECTURE_x86_64) + add_compile_options( + # Required CPU features for amd64 + $<$:-msse4.1> + $<$:-mcx16> + ) + endif() +endif() + set(CPM_SOURCE_CACHE ${CMAKE_SOURCE_DIR}/.cache/cpm) include(DownloadExternals) @@ -36,7 +122,7 @@ include(CMakeDependentOption) include(CTest) # Disable Warnings as Errors for MSVC -if (MSVC) +if (CXX_CL) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W3 /WX-") endif() @@ -58,7 +144,7 @@ CMAKE_DEPENDENT_OPTION(YUZU_USE_EXTERNAL_SDL2 "Compile external SDL2" ${EXT_DEFA cmake_dependent_option(ENABLE_LIBUSB "Enable the use of LibUSB" ON "NOT ANDROID" OFF) -option(ENABLE_OPENGL "Enable OpenGL" ON) +cmake_dependent_option(ENABLE_OPENGL "Enable OpenGL" ON "NOT WIN32 OR NOT ARCHITECTURE_arm64" OFF) mark_as_advanced(FORCE ENABLE_OPENGL) option(ENABLE_QT "Enable the Qt frontend" ON) @@ -212,54 +298,6 @@ if (NOT EXISTS ${PROJECT_BINARY_DIR}/dist/compatibility_list/compatibility_list. file(WRITE ${PROJECT_BINARY_DIR}/dist/compatibility_list/compatibility_list.json "") endif() -# Detect current compilation architecture and create standard definitions -# ======================================================================= - -include(CheckSymbolExists) -function(detect_architecture symbol arch) - if (NOT DEFINED ARCHITECTURE) - set(CMAKE_REQUIRED_QUIET 1) - check_symbol_exists("${symbol}" "" ARCHITECTURE_${arch}) - unset(CMAKE_REQUIRED_QUIET) - - # The output variable needs to be unique across invocations otherwise - # CMake's crazy scope rules will keep it defined - if (ARCHITECTURE_${arch}) - set(ARCHITECTURE "${arch}" PARENT_SCOPE) - set(ARCHITECTURE_${arch} 1 PARENT_SCOPE) - add_compile_definitions(ARCHITECTURE_${arch}=1) - endif() - endif() -endfunction() - -if (NOT ENABLE_GENERIC) - if (MSVC) - detect_architecture("_M_AMD64" x86_64) - detect_architecture("_M_IX86" x86) - detect_architecture("_M_ARM" arm) - detect_architecture("_M_ARM64" arm64) - else() - detect_architecture("__x86_64__" x86_64) - detect_architecture("__i386__" x86) - detect_architecture("__arm__" arm) - detect_architecture("__aarch64__" arm64) - endif() -endif() - -if (NOT DEFINED ARCHITECTURE) - set(ARCHITECTURE "GENERIC") - set(ARCHITECTURE_GENERIC 1) - add_compile_definitions(ARCHITECTURE_GENERIC=1) -endif() - -message(STATUS "Target architecture: ${ARCHITECTURE}") - -if (MSVC AND ARCHITECTURE_x86) - message(FATAL_ERROR "Attempting to build with the x86 environment is not supported. \ - This can typically happen if you used the Developer Command Prompt from the start menu;\ - instead, run vcvars64.bat directly, located at C:/Program Files/Microsoft Visual Studio/2022/Community/VC/Auxiliary/Build/vcvars64.bat") -endif() - if (UNIX) add_compile_definitions(YUZU_UNIX=1) endif() @@ -274,7 +312,7 @@ if (YUZU_ROOM) endif() # Build/optimization presets -if (PLATFORM_LINUX) +if (PLATFORM_LINUX OR CXX_CLANG) if (ARCHITECTURE_x86_64) set(YUZU_BUILD_PRESET "custom" CACHE STRING "Build preset to use. One of: custom, generic, v3, zen2, zen4, native") if (${YUZU_BUILD_PRESET} STREQUAL "generic") @@ -341,6 +379,7 @@ if (YUZU_USE_CPM) # boost set(BOOST_INCLUDE_LIBRARIES algorithm icl pool container heap asio headers process filesystem crc variant) + AddJsonPackage(boost) # really annoying thing where boost::headers doesn't work with cpm @@ -350,13 +389,10 @@ if (YUZU_USE_CPM) if (Boost_ADDED) if (MSVC OR ANDROID) add_compile_definitions(YUZU_BOOST_v1) - else() - message(WARNING "Using bundled Boost on a non-MSVC or Android system is not recommended. You are strongly encouraged to install Boost through your system's package manager.") endif() - if (NOT MSVC) + if (NOT MSVC OR CXX_CLANG) # boost sucks - # Solaris (and probably other NIXes) need explicit pthread definition if (PLATFORM_SUN) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthreads") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthreads") @@ -409,6 +445,14 @@ if (YUZU_USE_CPM) # Opus AddJsonPackage(opus) + + if (Opus_ADDED) + if (MSVC AND CXX_CLANG) + target_compile_options(opus PRIVATE + -Wno-implicit-function-declaration + ) + endif() + endif() else() # Enforce the search mode of non-required packages for better and shorter failure messages find_package(fmt 8 REQUIRED) @@ -441,6 +485,10 @@ endif() # DiscordRPC if (USE_DISCORD_PRESENCE) + if (ARCHITECTURE_arm64) + add_compile_definitions(RAPIDJSON_ENDIAN=RAPIDJSON_LITTLEENDIAN) + endif() + AddJsonPackage(discord-rpc) target_include_directories(discord-rpc INTERFACE ${discord-rpc_SOURCE_DIR}/include) @@ -748,6 +796,27 @@ if (MSVC AND CMAKE_GENERATOR STREQUAL "Ninja") ) endif() +# Adjustments for clang-cl +if (MSVC AND CXX_CLANG) + if (ARCHITECTURE_x86_64) + set(FILE_ARCH x86_64) + elseif (ARCHITECTURE_arm64) + set(FILE_ARCH aarch64) + else() + message(FATAL_ERROR "clang-cl: Unsupported architecture ${ARCHITECTURE}") + endif() + + AddJsonPackage(llvm-mingw) + set(LIB_PATH "${llvm-mingw_SOURCE_DIR}/libclang_rt.builtins-${FILE_ARCH}.a") + + add_library(llvm-mingw-runtime STATIC IMPORTED) + set_target_properties(llvm-mingw-runtime PROPERTIES + IMPORTED_LOCATION "${LIB_PATH}" + ) + + link_libraries(llvm-mingw-runtime) +endif() + if (YUZU_USE_FASTER_LD AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU") # We will assume that if the compiler is GCC, it will attempt to use ld.bfd by default. # Try to pick a faster linker. diff --git a/CMakeModules/DownloadExternals.cmake b/CMakeModules/DownloadExternals.cmake index de45d15d2a..88fa183061 100644 --- a/CMakeModules/DownloadExternals.cmake +++ b/CMakeModules/DownloadExternals.cmake @@ -11,10 +11,17 @@ function(download_bundled_external remote_path lib_name cpm_key prefix_var versi set(package_repo "no_platform") set(package_extension "no_platform") + # TODO(crueter): Need to convert ffmpeg to a CI. if (WIN32 OR FORCE_WIN_ARCHIVES) - set(CACHE_KEY "windows") - set(package_repo "ext-windows-bin/raw/master/") - set(package_extension ".7z") + if (ARCHITECTURE_arm64) + set(CACHE_KEY "windows") + set(package_repo "ext-windows-arm64-bin/raw/master/") + set(package_extension ".zip") + elseif(ARCHITECTURE_x86_64) + set(CACHE_KEY "windows") + set(package_repo "ext-windows-bin/raw/master/") + set(package_extension ".7z") + endif() elseif (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") set(CACHE_KEY "linux") set(package_repo "ext-linux-bin/raw/master/") diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index 3b8e996751..bcb5dc466a 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake @@ -35,4 +35,6 @@ set(REPO_NAME "Eden") set(BUILD_ID ${GIT_BRANCH}) set(BUILD_FULLNAME "${REPO_NAME} ${BUILD_VERSION} ") +set(CXX_COMPILER "${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}") + configure_file(scm_rev.cpp.in scm_rev.cpp @ONLY) diff --git a/CMakeModules/WindowsCopyFiles.cmake b/CMakeModules/WindowsCopyFiles.cmake index 08b598365d..8d37bd5c2c 100644 --- a/CMakeModules/WindowsCopyFiles.cmake +++ b/CMakeModules/WindowsCopyFiles.cmake @@ -12,16 +12,25 @@ set(__windows_copy_files YES) # Any number of files to copy from SOURCE_DIR to DEST_DIR can be specified after DEST_DIR. # This copying happens post-build. -function(windows_copy_files TARGET SOURCE_DIR DEST_DIR) - # windows commandline expects the / to be \ so switch them - string(REPLACE "/" "\\\\" SOURCE_DIR ${SOURCE_DIR}) - string(REPLACE "/" "\\\\" DEST_DIR ${DEST_DIR}) +if (CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows") + function(windows_copy_files TARGET SOURCE_DIR DEST_DIR) + # windows commandline expects the / to be \ so switch them + string(REPLACE "/" "\\\\" SOURCE_DIR ${SOURCE_DIR}) + string(REPLACE "/" "\\\\" DEST_DIR ${DEST_DIR}) - # /NJH /NJS /NDL /NFL /NC /NS /NP - Silence any output - # cmake adds an extra check for command success which doesn't work too well with robocopy - # so trick it into thinking the command was successful with the || cmd /c "exit /b 0" - add_custom_command(TARGET ${TARGET} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory ${DEST_DIR} - COMMAND robocopy ${SOURCE_DIR} ${DEST_DIR} ${ARGN} /NJH /NJS /NDL /NFL /NC /NS /NP || cmd /c "exit /b 0" - ) -endfunction() + # /NJH /NJS /NDL /NFL /NC /NS /NP - Silence any output + # cmake adds an extra check for command success which doesn't work too well with robocopy + # so trick it into thinking the command was successful with the || cmd /c "exit /b 0" + add_custom_command(TARGET ${TARGET} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${DEST_DIR} + COMMAND robocopy ${SOURCE_DIR} ${DEST_DIR} ${ARGN} /NJH /NJS /NDL /NFL /NC /NS /NP || cmd /c "exit /b 0" + ) + endfunction() +else() + function(windows_copy_files TARGET SOURCE_DIR DEST_DIR) + add_custom_command(TARGET ${TARGET} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${DEST_DIR} + COMMAND cp -ra ${SOURCE_DIR}/. ${DEST_DIR} + ) + endfunction() +endif() diff --git a/cpmfile.json b/cpmfile.json index c720b69e89..e071e0a8b8 100644 --- a/cpmfile.json +++ b/cpmfile.json @@ -11,10 +11,15 @@ "package": "Boost", "repo": "boostorg/boost", "tag": "boost-%VERSION%", - "artifact": "%TAG%-cmake.7z", - "hash": "e5b049e5b61964480ca816395f63f95621e66cb9bcf616a8b10e441e0e69f129e22443acb11e77bc1e8170f8e4171b9b7719891efc43699782bfcd4b3a365f01", - "git_version": "1.88.0", - "version": "1.57" + "artifact": "%TAG%-cmake.tar.xz", + "hash": "4fb7f6fde92762305aad8754d7643cd918dd1f3f67e104e9ab385b18c73178d72a17321354eb203b790b6702f2cf6d725a5d6e2dfbc63b1e35f9eb59fb42ece9", + "git_version": "1.89.0", + "version": "1.57", + "patches": [ + "0001-clang-cl.patch", + "0002-use-marmasm.patch", + "0003-armasm-options.patch" + ] }, "fmt": { "repo": "fmtlib/fmt", @@ -77,16 +82,13 @@ }, "opus": { "package": "Opus", - "repo": "xiph/opus", - "sha": "5ded705cf4", - "hash": "0dc89e58ddda1f3bc6a7037963994770c5806c10e66f5cc55c59286fc76d0544fe4eca7626772b888fd719f434bc8a92f792bdb350c807968b2ac14cfc04b203", + "repo": "crueter/opus", + "sha": "ab19c44fad", + "hash": "79d0d015b19e74ce6076197fc32b86fe91d724a0b5a79e86adfc4bdcb946ece384e252adbbf742b74d03040913b70bb0e9556eafa59ef20e42d2f3f4d6f2859a", "version": "1.3", "find_args": "MODULE", "options": [ - "OPUS_BUILD_TESTING OFF", - "OPUS_BUILD_PROGRAMS OFF", - "OPUS_INSTALL_PKG_CONFIG_MODULE OFF", - "OPUS_INSTALL_CMAKE_CONFIG_MODULE OFF" + "OPUS_PRESUME_NEON ON" ] }, "cubeb": { @@ -103,8 +105,8 @@ }, "boost_headers": { "repo": "boostorg/headers", - "sha": "0456900fad", - "hash": "50cd75dcdfc5f082225cdace058f47b4fb114a47585f7aee1d22236a910a80b667186254c214fa2fcebac67ae6d37ba4b6e695e1faea8affd6fd42a03cf996e3", + "sha": "95930ca8f5", + "hash": "d1dece16f3b209109de02123c537bfe1adf07a62b16c166367e7e5d62e0f7c323bf804c89b3192dd6871bc58a9d879d25a1cc3f7b9da0e497cf266f165816e2a", "bundled": true }, "discord-rpc": { @@ -143,5 +145,13 @@ "version": "2.32.8", "min_version": "2.26.4", "cmake_filename": "sdl2" + }, + "llvm-mingw": { + "repo": "misc/llvm-mingw", + "git_host": "git.crueter.xyz", + "tag": "20250828", + "version": "20250828", + "artifact": "clang-rt-builtins.tar.zst", + "hash": "d902392caf94e84f223766e2cc51ca5fab6cae36ab8dc6ef9ef6a683ab1c483bfcfe291ef0bd38ab16a4ecc4078344fa8af72da2f225ab4c378dee23f6186181" } } diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 6f64c79f5d..9f89cfc1f5 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -63,7 +63,14 @@ if (mbedtls_ADDED) if (NOT MSVC) target_compile_options(mbedcrypto PRIVATE -Wno-unused-but-set-variable - -Wno-string-concatenation) + -Wno-string-concatenation + ) + elseif(CXX_CLANG) + foreach(TARGET mbedtls mbedcrypto mbedx509) + target_compile_options(${TARGET} PRIVATE + -w + ) + endforeach() endif() endif() @@ -84,6 +91,8 @@ if(MSVC AND USE_CCACHE AND sirit_ADDED) list(FILTER _opts EXCLUDE REGEX "/Zi") list(APPEND _opts "/Z7") set_target_properties(sirit PROPERTIES COMPILE_OPTIONS "${_opts}") +elseif(MSVC AND CXX_CLANG) + target_compile_options(sirit PRIVATE -Wno-error=unused-command-line-argument) endif() # httplib @@ -136,8 +145,16 @@ add_subdirectory(nx_tzdb) # VMA AddJsonPackage(vulkan-memory-allocator) -if (VulkanMemoryAllocator_ADDED AND MSVC) - target_compile_options(VulkanMemoryAllocator INTERFACE /wd4189) +if (VulkanMemoryAllocator_ADDED) + if (CXX_CLANG) + target_compile_options(VulkanMemoryAllocator INTERFACE + -Wno-unused-variable + ) + elseif(MSVC) + target_compile_options(VulkanMemoryAllocator INTERFACE + /wd4189 + ) + endif() endif() if (NOT TARGET LLVM::Demangle) diff --git a/externals/cpmfile.json b/externals/cpmfile.json index f8ca528951..57258f771b 100644 --- a/externals/cpmfile.json +++ b/externals/cpmfile.json @@ -1,12 +1,9 @@ { "mbedtls": { - "repo": "Mbed-TLS/mbedtls", - "sha": "8c88150ca1", - "hash": "769ad1e94c570671071e1f2a5c0f1027e0bf6bcdd1a80ea8ac970f2c86bc45ce4e31aa88d6d8110fc1bed1de81c48bc624df1b38a26f8b340a44e109d784a966", - "find_args": "MODULE", - "patches": [ - "0001-cmake-version.patch" - ] + "repo": "eden-emulator/mbedtls", + "sha": "ce4f81f4a9", + "hash": "f2e7f887651b28745e508149214d409fd7cfdb92cb94b4146b47ff1e0fc09e47143f203ac18e34c2c1814b5bd031d04c74828676c0d4342920a2ddb7fd35e9a5", + "find_args": "MODULE" }, "spirv-headers": { "package": "SPIRV-Headers", @@ -29,18 +26,12 @@ }, "cpp-jwt": { "version": "1.4", - "repo": "arun11299/cpp-jwt", - "sha": "a54fa08a3b", - "hash": "a90f7e594ada0c7e49d5ff9211c71097534e7742a8e44bf0851b0362642a7271d53f5d83d04eeaae2bad17ef3f35e09e6818434d8eaefa038f3d1f7359d0969a", + "repo": "crueter/cpp-jwt", + "sha": "9eaea6328f", + "hash": "e237d92c59ebbf0dc8ac0bae3bc80340e1e9cf430e1c1c9638443001118e16de2b3e9036ac4b98105427667b0386d97831415170b68c432438dcad9ef8052de7", "find_args": "CONFIG", "options": [ - "CPP_JWT_BUILD_EXAMPLES OFF", - "CPP_JWT_BUILD_TESTS OFF", "CPP_JWT_USE_VENDORED_NLOHMANN_JSON OFF" - ], - "patches": [ - "0001-no-install.patch", - "0002-missing-decl.patch" ] }, "vulkan-utility-headers": { @@ -107,12 +98,6 @@ "repo": "martinus/unordered_dense", "sha": "73f3cbb237", "hash": "c08c03063938339d61392b687562909c1a92615b6ef39ec8df19ea472aa6b6478e70d7d5e33d4a27b5d23f7806daf57fe1bacb8124c8a945c918c7663a9e8532", - "find_args": "CONFIG", - "options": [ - "UNORDERED_DENSE_INSTALL OFF" - ], - "patches": [ - "0001-cmake.patch" - ] + "find_args": "CONFIG" } } diff --git a/externals/sse2neon/sse2neon.h b/externals/sse2neon/sse2neon.h index 66b93c1c74..67ad0ae6f8 100755 --- a/externals/sse2neon/sse2neon.h +++ b/externals/sse2neon/sse2neon.h @@ -183,7 +183,7 @@ } /* Compiler barrier */ -#if defined(_MSC_VER) +#if defined(_MSC_VER) && !defined(__clang__) #define SSE2NEON_BARRIER() _ReadWriteBarrier() #else #define SSE2NEON_BARRIER() \ @@ -859,7 +859,7 @@ FORCE_INLINE uint64x2_t _sse2neon_vmull_p64(uint64x1_t _a, uint64x1_t _b) { poly64_t a = vget_lane_p64(vreinterpret_p64_u64(_a), 0); poly64_t b = vget_lane_p64(vreinterpret_p64_u64(_b), 0); -#if defined(_MSC_VER) +#if defined(_MSC_VER) && !defined(__clang__) __n64 a1 = {a}, b1 = {b}; return vreinterpretq_u64_p128(vmull_p64(a1, b1)); #else @@ -1770,7 +1770,7 @@ FORCE_INLINE void _mm_free(void *addr) FORCE_INLINE uint64_t _sse2neon_get_fpcr(void) { uint64_t value; -#if defined(_MSC_VER) +#if defined(_MSC_VER) && !defined(__clang__) value = _ReadStatusReg(ARM64_FPCR); #else __asm__ __volatile__("mrs %0, FPCR" : "=r"(value)); /* read */ @@ -1780,7 +1780,7 @@ FORCE_INLINE uint64_t _sse2neon_get_fpcr(void) FORCE_INLINE void _sse2neon_set_fpcr(uint64_t value) { -#if defined(_MSC_VER) +#if defined(_MSC_VER) && !defined(__clang__) _WriteStatusReg(ARM64_FPCR, value); #else __asm__ __volatile__("msr FPCR, %0" ::"r"(value)); /* write */ @@ -2249,7 +2249,7 @@ FORCE_INLINE __m128 _mm_or_ps(__m128 a, __m128 b) FORCE_INLINE void _mm_prefetch(char const *p, int i) { (void) i; -#if defined(_MSC_VER) +#if defined(_MSC_VER) && !defined(__clang__) switch (i) { case _MM_HINT_NTA: __prefetch2(p, 1); @@ -4820,7 +4820,7 @@ FORCE_INLINE __m128i _mm_packus_epi16(const __m128i a, const __m128i b) // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_pause FORCE_INLINE void _mm_pause(void) { -#if defined(_MSC_VER) +#if defined(_MSC_VER) && !defined(__clang__) __isb(_ARM64_BARRIER_SY); #else __asm__ __volatile__("isb\n"); @@ -5716,7 +5716,7 @@ FORCE_INLINE __m128d _mm_undefined_pd(void) #pragma GCC diagnostic ignored "-Wuninitialized" #endif __m128d a; -#if defined(_MSC_VER) +#if defined(_MSC_VER) && !defined(__clang__) a = _mm_setzero_pd(); #endif return a; @@ -8130,7 +8130,7 @@ FORCE_INLINE int _sse2neon_sido_negative(int res, int lb, int imm8, int bound) FORCE_INLINE int _sse2neon_clz(unsigned int x) { -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) unsigned long cnt = 0; if (_BitScanReverse(&cnt, x)) return 31 - cnt; @@ -8142,7 +8142,7 @@ FORCE_INLINE int _sse2neon_clz(unsigned int x) FORCE_INLINE int _sse2neon_ctz(unsigned int x) { -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) unsigned long cnt = 0; if (_BitScanForward(&cnt, x)) return cnt; @@ -9058,7 +9058,7 @@ FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon) // AESE does ShiftRows and SubBytes on A uint8x16_t u8 = vaeseq_u8(vreinterpretq_u8_m128i(a), vdupq_n_u8(0)); -#ifndef _MSC_VER +#if !defined(_MSC_VER) || defined(__clang__) uint8x16_t dest = { // Undo ShiftRows step from AESE and extract X1 and X3 u8[0x4], u8[0x1], u8[0xE], u8[0xB], // SubBytes(X1) @@ -9245,7 +9245,7 @@ FORCE_INLINE uint64_t _rdtsc(void) * bits wide and it is attributed with the flag 'cap_user_time_short' * is true. */ -#if defined(_MSC_VER) +#if defined(_MSC_VER) && !defined(__clang__) val = _ReadStatusReg(ARM64_SYSREG(3, 3, 14, 0, 2)); #else __asm__ __volatile__("mrs %0, cntvct_el0" : "=r"(val)); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b1fbab6a59..eb66e55964 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -18,7 +18,7 @@ set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS $<$:_DEBUG> $<$>:NDEBUG>) # Set compilation flags -if (MSVC) +if (MSVC AND NOT CXX_CLANG) set(CMAKE_CONFIGURATION_TYPES Debug Release CACHE STRING "" FORCE) # Silence "deprecation" warnings @@ -69,10 +69,6 @@ if (MSVC) /external:anglebrackets # Treats all headers included by #include
, where the header file is enclosed in angle brackets (< >), as external headers /external:W0 # Sets the default warning level to 0 for external headers, effectively disabling warnings for them. - # Warnings - /W4 - /WX- - /we4062 # Enumerator 'identifier' in a switch of enum 'enumeration' is not handled /we4189 # 'identifier': local variable is initialized but not referenced /we4265 # 'class': class has virtual functions, but destructor is not virtual @@ -97,6 +93,14 @@ if (MSVC) /wd4702 # unreachable code (when used with LTO) ) + if (NOT CXX_CLANG) + add_compile_options( + # Warnings + /W4 + /WX- + ) + endif() + if (USE_CCACHE OR YUZU_USE_PRECOMPILED_HEADERS) # when caching, we need to use /Z7 to downgrade debug info to use an older but more cacheable format # Precompiled headers are deleted if not using /Z7. See https://github.com/nanoant/CMakePCHCompiler/issues/21 @@ -118,9 +122,13 @@ if (MSVC) set(CMAKE_EXE_LINKER_FLAGS_DEBUG "/DEBUG /MANIFEST:NO" CACHE STRING "" FORCE) set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE) else() - add_compile_options( - -fwrapv + if (NOT MSVC) + add_compile_options( + -fwrapv + ) + endif() + add_compile_options( -Werror=all -Werror=extra -Werror=missing-declarations @@ -133,14 +141,19 @@ else() -Wno-missing-field-initializers ) - if (CMAKE_CXX_COMPILER_ID MATCHES Clang OR CMAKE_CXX_COMPILER_ID MATCHES IntelLLVM) # Clang or AppleClang + if (CXX_CLANG OR CXX_ICC) # Clang or AppleClang + if (NOT MSVC) + add_compile_options( + -Werror=shadow-uncaptured-local + -Werror=implicit-fallthrough + -Werror=type-limits + ) + endif() + add_compile_options( -Wno-braced-scalar-init -Wno-unused-private-field -Wno-nullability-completeness - -Werror=shadow-uncaptured-local - -Werror=implicit-fallthrough - -Werror=type-limits ) endif() @@ -148,12 +161,12 @@ else() add_compile_options("-mcx16") endif() - if (APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL Clang) + if (APPLE AND CXX_CLANG) add_compile_options("-stdlib=libc++") endif() # GCC bugs - if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "11" AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "11" AND CXX_GCC) # These diagnostics would be great if they worked, but are just completely broken # and produce bogus errors on external libraries like fmt. add_compile_options( diff --git a/src/audio_core/adsp/apps/audio_renderer/audio_renderer.cpp b/src/audio_core/adsp/apps/audio_renderer/audio_renderer.cpp index d799f3f06f..b874f87ae6 100644 --- a/src/audio_core/adsp/apps/audio_renderer/audio_renderer.cpp +++ b/src/audio_core/adsp/apps/audio_renderer/audio_renderer.cpp @@ -193,7 +193,7 @@ void AudioRenderer::Main(std::stop_token stop_token) { } } - max_time = std::min(command_buffer.time_limit, max_time); + max_time = (std::min)(command_buffer.time_limit, max_time); command_list_processor.SetProcessTimeMax(max_time); if (index == 0) { diff --git a/src/audio_core/common/common.h b/src/audio_core/common/common.h index 6abd9be45e..eaf0c6470f 100644 --- a/src/audio_core/common/common.h +++ b/src/audio_core/common/common.h @@ -73,9 +73,9 @@ constexpr s32 HighestVoicePriority = 0; constexpr u32 BufferAlignment = 0x40; constexpr u32 WorkbufferAlignment = 0x1000; constexpr s32 FinalMixId = 0; -constexpr s32 InvalidDistanceFromFinalMix = std::numeric_limits::min(); +constexpr s32 InvalidDistanceFromFinalMix = (std::numeric_limits::min)(); constexpr s32 UnusedSplitterId = -1; -constexpr s32 UnusedMixId = std::numeric_limits::max(); +constexpr s32 UnusedMixId = (std::numeric_limits::max)(); constexpr u32 InvalidNodeId = 0xF0000000; constexpr s32 InvalidProcessOrder = -1; constexpr u32 MaxBiquadFilters = 2; diff --git a/src/audio_core/device/audio_buffers.h b/src/audio_core/device/audio_buffers.h index 9e84a9c059..9016246fbf 100644 --- a/src/audio_core/device/audio_buffers.h +++ b/src/audio_core/device/audio_buffers.h @@ -51,7 +51,7 @@ public: */ void RegisterBuffers(boost::container::static_vector& out_buffers) { std::scoped_lock l{lock}; - const s32 to_register{std::min(std::min(appended_count, BufferAppendLimit), + const s32 to_register{(std::min)((std::min)(appended_count, BufferAppendLimit), BufferAppendLimit - registered_count)}; for (s32 i = 0; i < to_register; i++) { @@ -175,7 +175,7 @@ public: } size_t buffers_to_flush{ - std::min(static_cast(registered_count + appended_count), max_buffers)}; + (std::min)(static_cast(registered_count + appended_count), max_buffers)}; if (buffers_to_flush == 0) { return 0; } diff --git a/src/audio_core/renderer/audio_device.cpp b/src/audio_core/renderer/audio_device.cpp index 5be5594f6f..387d23b0b4 100644 --- a/src/audio_core/renderer/audio_device.cpp +++ b/src/audio_core/renderer/audio_device.cpp @@ -45,7 +45,7 @@ u32 AudioDevice::ListAudioDeviceName(std::span out_buffer) cons names = device_names; } - const u32 out_count{static_cast(std::min(out_buffer.size(), names.size()))}; + const u32 out_count{static_cast((std::min)(out_buffer.size(), names.size()))}; for (u32 i = 0; i < out_count; i++) { out_buffer[i] = names[i]; } @@ -53,7 +53,7 @@ u32 AudioDevice::ListAudioDeviceName(std::span out_buffer) cons } u32 AudioDevice::ListAudioOutputDeviceName(std::span out_buffer) const { - const u32 out_count{static_cast(std::min(out_buffer.size(), output_device_names.size()))}; + const u32 out_count{static_cast((std::min)(out_buffer.size(), output_device_names.size()))}; for (u32 i = 0; i < out_count; i++) { out_buffer[i] = output_device_names[i]; diff --git a/src/audio_core/renderer/behavior/behavior_info.cpp b/src/audio_core/renderer/behavior/behavior_info.cpp index 0585390426..f139e63ffb 100644 --- a/src/audio_core/renderer/behavior/behavior_info.cpp +++ b/src/audio_core/renderer/behavior/behavior_info.cpp @@ -43,7 +43,7 @@ void BehaviorInfo::AppendError(const ErrorInfo& error) { } void BehaviorInfo::CopyErrorInfo(std::span out_errors, u32& out_count) const { - out_count = std::min(error_count, MaxErrors); + out_count = (std::min)(error_count, MaxErrors); for (size_t i = 0; i < MaxErrors; i++) { if (i < out_count) { diff --git a/src/audio_core/renderer/command/command_buffer.cpp b/src/audio_core/renderer/command/command_buffer.cpp index 67d43e69aa..f9e8575691 100644 --- a/src/audio_core/renderer/command/command_buffer.cpp +++ b/src/audio_core/renderer/command/command_buffer.cpp @@ -464,7 +464,7 @@ void CommandBuffer::GenerateDeviceSinkCommand(const s32 node_id, const s16 buffe s16 max_input{0}; for (u32 i = 0; i < parameter.input_count; i++) { cmd.inputs[i] = buffer_offset + parameter.inputs[i]; - max_input = std::max(max_input, cmd.inputs[i]); + max_input = (std::max)(max_input, cmd.inputs[i]); } if (state.upsampler_info != nullptr) { diff --git a/src/audio_core/renderer/command/command_generator.h b/src/audio_core/renderer/command/command_generator.h index 38ee2a64ee..497cfa92f2 100644 --- a/src/audio_core/renderer/command/command_generator.h +++ b/src/audio_core/renderer/command/command_generator.h @@ -56,11 +56,11 @@ public: // Voices u64 voice_size{0}; if (behavior.IsWaveBufferVer2Supported()) { - voice_size = std::max(std::max(sizeof(AdpcmDataSourceVersion2Command), + voice_size = (std::max)((std::max)(sizeof(AdpcmDataSourceVersion2Command), sizeof(PcmInt16DataSourceVersion2Command)), sizeof(PcmFloatDataSourceVersion2Command)); } else { - voice_size = std::max(std::max(sizeof(AdpcmDataSourceVersion1Command), + voice_size = (std::max)((std::max)(sizeof(AdpcmDataSourceVersion1Command), sizeof(PcmInt16DataSourceVersion1Command)), sizeof(PcmFloatDataSourceVersion1Command)); } @@ -82,7 +82,7 @@ public: // Sinks size += - params.sinks * std::max(sizeof(DeviceSinkCommand), sizeof(CircularBufferSinkCommand)); + params.sinks * (std::max)(sizeof(DeviceSinkCommand), sizeof(CircularBufferSinkCommand)); // Performance size += (params.effects + params.voices + params.sinks + params.sub_mixes + 1 + diff --git a/src/audio_core/renderer/command/data_source/decode.cpp b/src/audio_core/renderer/command/data_source/decode.cpp index 905613a5a8..b42ecb961f 100644 --- a/src/audio_core/renderer/command/data_source/decode.cpp +++ b/src/audio_core/renderer/command/data_source/decode.cpp @@ -29,8 +29,8 @@ constexpr std::array PitchBySrcQuality = {4, 8, 4}; template static u32 DecodePcm(Core::Memory::Memory& memory, std::span out_buffer, const DecodeArg& req) { - constexpr s32 min{std::numeric_limits::min()}; - constexpr s32 max{std::numeric_limits::max()}; + constexpr s32 min{(std::numeric_limits::min)()}; + constexpr s32 max{(std::numeric_limits::max)()}; if (req.buffer == 0 || req.buffer_size == 0) { return 0; @@ -41,7 +41,7 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span out_buffer, } auto samples_to_decode{ - std::min(req.samples_to_read, req.end_offset - req.start_offset - req.offset)}; + (std::min)(req.samples_to_read, req.end_offset - req.start_offset - req.offset)}; u32 channel_count{static_cast(req.channel_count)}; switch (req.channel_count) { @@ -55,7 +55,7 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span out_buffer, if constexpr (std::is_floating_point_v) { for (u32 i = 0; i < samples_to_decode; i++) { auto sample{static_cast(samples[i * channel_count + req.target_channel] * - std::numeric_limits::max())}; + (std::numeric_limits::max)())}; out_buffer[i] = static_cast(std::clamp(sample, min, max)); } } else { @@ -79,7 +79,7 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span out_buffer, if constexpr (std::is_floating_point_v) { for (u32 i = 0; i < samples_to_decode; i++) { auto sample{static_cast(samples[i * channel_count + req.target_channel] * - std::numeric_limits::max())}; + (std::numeric_limits::max)())}; out_buffer[i] = static_cast(std::clamp(sample, min, max)); } } else { @@ -125,7 +125,7 @@ static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span out_buffer, } auto start_pos{req.start_offset + req.offset}; - auto samples_to_process{std::min(req.end_offset - start_pos, req.samples_to_read)}; + auto samples_to_process{(std::min)(req.end_offset - start_pos, req.samples_to_read)}; if (samples_to_process == 0) { return 0; } @@ -139,7 +139,7 @@ static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span out_buffer, position_in_frame += 2; } - const auto size{std::max((samples_to_process / 8U) * SamplesPerFrame, 8U)}; + const auto size{(std::max)((samples_to_process / 8U) * SamplesPerFrame, 8U)}; Core::Memory::CpuGuestMemory wavebuffer( memory, req.buffer + position_in_frame / 2, size); @@ -260,7 +260,7 @@ void DecodeFromWaveBuffers(Core::Memory::Memory& memory, const DecodeFromWaveBuf auto max_remaining_sample_count{ ((Common::FixedPoint<17, 15>(TempBufferSize) - fraction) / sample_rate_ratio) .to_uint_floor()}; - max_remaining_sample_count = std::min(max_remaining_sample_count, remaining_sample_count); + max_remaining_sample_count = (std::min)(max_remaining_sample_count, remaining_sample_count); auto wavebuffers_consumed{voice_state.wave_buffers_consumed}; auto wavebuffer_index{voice_state.wave_buffer_index}; @@ -273,7 +273,7 @@ void DecodeFromWaveBuffers(Core::Memory::Memory& memory, const DecodeFromWaveBuf std::array temp_buffer{}; while (remaining_sample_count > 0) { - const auto samples_to_write{std::min(remaining_sample_count, max_remaining_sample_count)}; + const auto samples_to_write{(std::min)(remaining_sample_count, max_remaining_sample_count)}; const auto samples_to_read{ (fraction + samples_to_write * sample_rate_ratio).to_uint_floor()}; diff --git a/src/audio_core/renderer/command/effect/aux_.cpp b/src/audio_core/renderer/command/effect/aux_.cpp index 74d9c229f3..cb23007a66 100644 --- a/src/audio_core/renderer/command/effect/aux_.cpp +++ b/src/audio_core/renderer/command/effect/aux_.cpp @@ -86,7 +86,7 @@ static u32 WriteAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr send_info_, u32 write_count{write_count_}; u32 read_pos{0}; while (write_count > 0) { - u32 to_write{std::min(count_max - target_write_offset, write_count)}; + u32 to_write{(std::min)(count_max - target_write_offset, write_count)}; if (to_write > 0) { const auto write_addr = send_buffer + target_write_offset * sizeof(s32); memory.WriteBlockUnsafe(write_addr, &input[read_pos], to_write * sizeof(s32)); @@ -157,7 +157,7 @@ static u32 ReadAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr return_info_, u32 read_count{read_count_}; u32 write_pos{0}; while (read_count > 0) { - u32 to_read{std::min(count_max - target_read_offset, read_count)}; + u32 to_read{(std::min)(count_max - target_read_offset, read_count)}; if (to_read > 0) { const auto read_addr = return_buffer + target_read_offset * sizeof(s32); memory.ReadBlockUnsafe(read_addr, &output[write_pos], to_read * sizeof(s32)); diff --git a/src/audio_core/renderer/command/effect/biquad_filter.cpp b/src/audio_core/renderer/command/effect/biquad_filter.cpp index 3392e7747d..dbc7085d16 100644 --- a/src/audio_core/renderer/command/effect/biquad_filter.cpp +++ b/src/audio_core/renderer/command/effect/biquad_filter.cpp @@ -20,8 +20,8 @@ namespace AudioCore::Renderer { void ApplyBiquadFilterFloat(std::span output, std::span input, std::array& b_, std::array& a_, VoiceState::BiquadFilterState& state, const u32 sample_count) { - constexpr f64 min{std::numeric_limits::min()}; - constexpr f64 max{std::numeric_limits::max()}; + constexpr f64 min{(std::numeric_limits::min)()}; + constexpr f64 max{(std::numeric_limits::max)()}; std::array b{Common::FixedPoint<50, 14>::from_base(b_[0]).to_double(), Common::FixedPoint<50, 14>::from_base(b_[1]).to_double(), Common::FixedPoint<50, 14>::from_base(b_[2]).to_double()}; @@ -61,8 +61,8 @@ void ApplyBiquadFilterFloat(std::span output, std::span input, static void ApplyBiquadFilterInt(std::span output, std::span input, std::array& b, std::array& a, VoiceState::BiquadFilterState& state, const u32 sample_count) { - constexpr s64 min{std::numeric_limits::min()}; - constexpr s64 max{std::numeric_limits::max()}; + constexpr s64 min{(std::numeric_limits::min)()}; + constexpr s64 max{(std::numeric_limits::max)()}; for (u32 i = 0; i < sample_count; i++) { const s64 in_sample{input[i]}; diff --git a/src/audio_core/renderer/command/effect/capture.cpp b/src/audio_core/renderer/command/effect/capture.cpp index f235ce027f..95bc88e464 100644 --- a/src/audio_core/renderer/command/effect/capture.cpp +++ b/src/audio_core/renderer/command/effect/capture.cpp @@ -79,7 +79,7 @@ static u32 WriteAuxBufferDsp(Core::Memory::Memory& memory, const CpuAddr send_in u32 write_count{write_count_}; u32 write_pos{0}; while (write_count > 0) { - u32 to_write{std::min(count_max - target_write_offset, write_count)}; + u32 to_write{(std::min)(count_max - target_write_offset, write_count)}; if (to_write > 0) { memory.WriteBlockUnsafe(send_buffer + target_write_offset * sizeof(s32), diff --git a/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp b/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp index ecfdfabc61..14e77b3cb2 100644 --- a/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp +++ b/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp @@ -76,9 +76,9 @@ static void UpdateI3dl2ReverbEffectParameter(const I3dl2ReverbInfo::ParameterVer state.dry_gain = params.dry_gain; Common::FixedPoint<50, 14> early_gain{ - std::min(params.room_gain + params.reflection_gain, 5000.0f) / 2000.0f}; + (std::min)(params.room_gain + params.reflection_gain, 5000.0f) / 2000.0f}; state.early_gain = pow_10(early_gain.to_float()); - Common::FixedPoint<50, 14> late_gain{std::min(params.room_gain + params.reverb_gain, 5000.0f) / + Common::FixedPoint<50, 14> late_gain{(std::min)(params.room_gain + params.reverb_gain, 5000.0f) / 2000.0f}; state.late_gain = pow_10(late_gain.to_float()); @@ -94,7 +94,7 @@ static void UpdateI3dl2ReverbEffectParameter(const I3dl2ReverbInfo::ParameterVer const Common::FixedPoint<50, 14> c{ std::sqrt(std::pow(b.to_float(), 2.0f) + (std::pow(a.to_float(), 2.0f) * -4.0f))}; - state.lowpass_1 = std::min(((b - c) / (a * 2.0f)).to_float(), 0.99723f); + state.lowpass_1 = (std::min)(((b - c) / (a * 2.0f)).to_float(), 0.99723f); state.lowpass_2 = 1.0f - state.lowpass_1; } diff --git a/src/audio_core/renderer/command/effect/light_limiter.cpp b/src/audio_core/renderer/command/effect/light_limiter.cpp index 63aa06f5c3..3488dd37b5 100644 --- a/src/audio_core/renderer/command/effect/light_limiter.cpp +++ b/src/audio_core/renderer/command/effect/light_limiter.cpp @@ -50,8 +50,8 @@ static void ApplyLightLimiterEffect(const LightLimiterInfo::ParameterVersion2& p std::span> inputs, std::span> outputs, const u32 sample_count, LightLimiterInfo::StatisticsInternal* statistics) { - constexpr s64 min{std::numeric_limits::min()}; - constexpr s64 max{std::numeric_limits::max()}; + constexpr s64 min{(std::numeric_limits::min)()}; + constexpr s64 max{(std::numeric_limits::max)()}; const auto recip_estimate = [](f64 a) -> f64 { s32 q, s; @@ -117,9 +117,9 @@ static void ApplyLightLimiterEffect(const LightLimiterInfo::ParameterVersion2& p if (statistics) { statistics->channel_max_sample[channel] = - std::max(statistics->channel_max_sample[channel], abs_sample.to_float()); + (std::max)(statistics->channel_max_sample[channel], abs_sample.to_float()); statistics->channel_compression_gain_min[channel] = - std::min(statistics->channel_compression_gain_min[channel], + (std::min)(statistics->channel_compression_gain_min[channel], state.compression_gain[channel].to_float()); } } diff --git a/src/audio_core/renderer/command/effect/reverb.cpp b/src/audio_core/renderer/command/effect/reverb.cpp index 7f152a9629..67b893305a 100644 --- a/src/audio_core/renderer/command/effect/reverb.cpp +++ b/src/audio_core/renderer/command/effect/reverb.cpp @@ -94,7 +94,7 @@ static void UpdateReverbEffectParameter(const ReverbInfo::ParameterVersion2& par for (u32 i = 0; i < ReverbInfo::MaxDelayTaps; i++) { auto early_delay{ ((pre_delay_time + EarlyDelayTimes[params.early_mode][i]) * sample_rate).to_int()}; - early_delay = std::min(early_delay, state.pre_delay_line.sample_count_max); + early_delay = (std::min)(early_delay, state.pre_delay_line.sample_count_max); state.early_delay_times[i] = early_delay + 1; state.early_gains[i] = Common::FixedPoint<50, 14>::from_base(params.early_gain) * EarlyDelayGains[params.early_mode][i]; @@ -107,7 +107,7 @@ static void UpdateReverbEffectParameter(const ReverbInfo::ParameterVersion2& par auto pre_time{ ((pre_delay_time + EarlyDelayTimes[params.early_mode][10]) * sample_rate).to_int()}; - state.pre_delay_time = std::min(pre_time, state.pre_delay_line.sample_count_max); + state.pre_delay_time = (std::min)(pre_time, state.pre_delay_line.sample_count_max); if (!unk_initialized) { unk_value = cos((1280.0f / sample_rate).to_float()); @@ -117,13 +117,13 @@ static void UpdateReverbEffectParameter(const ReverbInfo::ParameterVersion2& par for (u32 i = 0; i < ReverbInfo::MaxDelayLines; i++) { const auto fdn_delay{(FdnDelayTimes[params.late_mode][i] * sample_rate).to_int()}; state.fdn_delay_lines[i].sample_count = - std::min(fdn_delay, state.fdn_delay_lines[i].sample_count_max); + (std::min)(fdn_delay, state.fdn_delay_lines[i].sample_count_max); state.fdn_delay_lines[i].buffer_end = &state.fdn_delay_lines[i].buffer[state.fdn_delay_lines[i].sample_count - 1]; const auto decay_delay{(DecayDelayTimes[params.late_mode][i] * sample_rate).to_int()}; state.decay_delay_lines[i].sample_count = - std::min(decay_delay, state.decay_delay_lines[i].sample_count_max); + (std::min)(decay_delay, state.decay_delay_lines[i].sample_count_max); state.decay_delay_lines[i].buffer_end = &state.decay_delay_lines[i].buffer[state.decay_delay_lines[i].sample_count - 1]; diff --git a/src/audio_core/renderer/command/mix/depop_for_mix_buffers.cpp b/src/audio_core/renderer/command/mix/depop_for_mix_buffers.cpp index caedb56b79..f80fb92631 100644 --- a/src/audio_core/renderer/command/mix/depop_for_mix_buffers.cpp +++ b/src/audio_core/renderer/command/mix/depop_for_mix_buffers.cpp @@ -43,7 +43,7 @@ void DepopForMixBuffersCommand::Dump( } void DepopForMixBuffersCommand::Process(const AudioRenderer::CommandListProcessor& processor) { - auto end_index{std::min(processor.buffer_count, input + count)}; + auto end_index{(std::min)(processor.buffer_count, input + count)}; std::span depop_buff{reinterpret_cast(depop_buffer), end_index}; for (u32 index = input; index < end_index; index++) { diff --git a/src/audio_core/renderer/command/resample/upsample.cpp b/src/audio_core/renderer/command/resample/upsample.cpp index 691d70390f..add975504c 100644 --- a/src/audio_core/renderer/command/resample/upsample.cpp +++ b/src/audio_core/renderer/command/resample/upsample.cpp @@ -215,7 +215,7 @@ auto UpsampleCommand::Dump([[maybe_unused]] const AudioRenderer::CommandListProc void UpsampleCommand::Process(const AudioRenderer::CommandListProcessor& processor) { const auto info{reinterpret_cast(upsampler_info)}; - const auto input_count{std::min(info->input_count, buffer_count)}; + const auto input_count{(std::min)(info->input_count, buffer_count)}; const std::span inputs_{reinterpret_cast(inputs), input_count}; for (u32 i = 0; i < input_count; i++) { diff --git a/src/audio_core/renderer/command/sink/circular_buffer.cpp b/src/audio_core/renderer/command/sink/circular_buffer.cpp index e056d15a65..8ef48a26df 100644 --- a/src/audio_core/renderer/command/sink/circular_buffer.cpp +++ b/src/audio_core/renderer/command/sink/circular_buffer.cpp @@ -21,8 +21,8 @@ void CircularBufferSinkCommand::Dump( } void CircularBufferSinkCommand::Process(const AudioRenderer::CommandListProcessor& processor) { - constexpr s32 min{std::numeric_limits::min()}; - constexpr s32 max{std::numeric_limits::max()}; + constexpr s32 min{(std::numeric_limits::min)()}; + constexpr s32 max{(std::numeric_limits::max)()}; std::array output{}; for (u32 channel = 0; channel < input_count; channel++) { diff --git a/src/audio_core/renderer/command/sink/device.cpp b/src/audio_core/renderer/command/sink/device.cpp index 3480ed475c..86a257363b 100644 --- a/src/audio_core/renderer/command/sink/device.cpp +++ b/src/audio_core/renderer/command/sink/device.cpp @@ -20,8 +20,8 @@ void DeviceSinkCommand::Dump([[maybe_unused]] const AudioRenderer::CommandListPr } void DeviceSinkCommand::Process(const AudioRenderer::CommandListProcessor& processor) { - constexpr s32 min = std::numeric_limits::min(); - constexpr s32 max = std::numeric_limits::max(); + constexpr s32 min = (std::numeric_limits::min)(); + constexpr s32 max = (std::numeric_limits::max)(); auto stream{processor.GetOutputSinkStream()}; stream->SetSystemChannels(input_count); diff --git a/src/audio_core/renderer/mix/mix_context.cpp b/src/audio_core/renderer/mix/mix_context.cpp index c712610bbd..1103af910b 100644 --- a/src/audio_core/renderer/mix/mix_context.cpp +++ b/src/audio_core/renderer/mix/mix_context.cpp @@ -126,7 +126,7 @@ bool MixContext::TSortInfo(const SplitterContext& splitter_context) { } auto sorted_results{node_states.GetSortedResuls()}; - const auto result_size{std::min(count, static_cast(sorted_results.second))}; + const auto result_size{(std::min)(count, static_cast(sorted_results.second))}; for (s32 i = 0; i < result_size; i++) { sorted_mix_infos[i] = &mix_infos[sorted_results.first[i]]; } diff --git a/src/audio_core/renderer/sink/sink_info_base.h b/src/audio_core/renderer/sink/sink_info_base.h index e10d1cb382..2a7fd81f68 100644 --- a/src/audio_core/renderer/sink/sink_info_base.h +++ b/src/audio_core/renderer/sink/sink_info_base.h @@ -168,9 +168,9 @@ protected: /// Node id for this sink u32 node_id{}; /// State buffer for this sink - std::array state{}; + std::array state{}; /// Parameter buffer for this sink - std::array + std::array parameter{}; }; diff --git a/src/audio_core/renderer/splitter/splitter_context.cpp b/src/audio_core/renderer/splitter/splitter_context.cpp index d0f3b60c29..583cbaf735 100644 --- a/src/audio_core/renderer/splitter/splitter_context.cpp +++ b/src/audio_core/renderer/splitter/splitter_context.cpp @@ -170,7 +170,7 @@ void SplitterContext::RecomposeDestination(SplitterInfo& out_info, auto dest_count{info_header->destination_count}; if (!splitter_bug_fixed) { - dest_count = std::min(dest_count, GetDestCountPerInfoForCompat()); + dest_count = (std::min)(dest_count, GetDestCountPerInfoForCompat()); } if (dest_count == 0) { diff --git a/src/audio_core/renderer/system.cpp b/src/audio_core/renderer/system.cpp index c30d68426c..c4a2768b93 100644 --- a/src/audio_core/renderer/system.cpp +++ b/src/audio_core/renderer/system.cpp @@ -718,7 +718,7 @@ u64 System::GenerateCommand(std::span in_command_buffer, const auto estimated_time{start_estimated_time - end_estimated_time}; - const auto time_limit{static_cast(std::max(dsp_time_limit + estimated_time, 0.0f))}; + const auto time_limit{static_cast((std::max)(dsp_time_limit + estimated_time, 0.0f))}; num_voices_dropped = DropVoices(command_buffer, static_cast(start_estimated_time), time_limit); } diff --git a/src/audio_core/sink/cubeb_sink.cpp b/src/audio_core/sink/cubeb_sink.cpp index a33162b806..a3a7a89ba4 100644 --- a/src/audio_core/sink/cubeb_sink.cpp +++ b/src/audio_core/sink/cubeb_sink.cpp @@ -73,7 +73,7 @@ public: minimum_latency = TargetSampleCount * 2; } - minimum_latency = std::max(minimum_latency, TargetSampleCount * 2); + minimum_latency = (std::max)(minimum_latency, TargetSampleCount * 2); LOG_INFO(Service_Audio, "Opening cubeb stream {} type {} with: rate {} channels {} (system channels {}) " @@ -372,7 +372,7 @@ u32 GetCubebLatency() { LOG_CRITICAL(Audio_Sink, "Error getting minimum latency, error: {}", latency_error); latency = TargetSampleCount * 2; } - latency = std::max(latency, TargetSampleCount * 2); + latency = (std::max)(latency, TargetSampleCount * 2); cubeb_destroy(ctx); return latency; } @@ -426,7 +426,7 @@ bool IsCubebSuitable() { LOG_ERROR(Audio_Sink, "Cubeb could not get min latency, it is not suitable."); return false; } - latency = std::max(latency, TargetSampleCount * 2); + latency = (std::max)(latency, TargetSampleCount * 2); // Test opening a device with standard parameters cubeb_devid output_device{0}; diff --git a/src/audio_core/sink/sink_stream.cpp b/src/audio_core/sink/sink_stream.cpp index c0078e6ddc..4d7f0c1d5d 100644 --- a/src/audio_core/sink/sink_stream.cpp +++ b/src/audio_core/sink/sink_stream.cpp @@ -31,8 +31,8 @@ void SinkStream::AppendBuffer(SinkBuffer& buffer, std::span samples) { return; } - constexpr s32 min{std::numeric_limits::min()}; - constexpr s32 max{std::numeric_limits::max()}; + constexpr s32 min{(std::numeric_limits::min)()}; + constexpr s32 max{(std::numeric_limits::max)()}; auto yuzu_volume{Settings::Volume()}; if (yuzu_volume > 1.0f) { @@ -123,8 +123,8 @@ void SinkStream::AppendBuffer(SinkBuffer& buffer, std::span samples) { } std::vector SinkStream::ReleaseBuffer(u64 num_samples) { - constexpr s32 min = std::numeric_limits::min(); - constexpr s32 max = std::numeric_limits::max(); + constexpr s32 min = (std::numeric_limits::min)(); + constexpr s32 max = (std::numeric_limits::max)(); auto samples{samples_buffer.Pop(num_samples)}; diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 9b898837bc..665143900a 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -237,7 +237,7 @@ else() ) # Get around GCC failing with intrinsics in Debug - if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_BUILD_TYPE MATCHES "Debug") + if(CXX_GCC AND CMAKE_BUILD_TYPE MATCHES "Debug") set_property( SOURCE stb.cpp APPEND @@ -245,7 +245,7 @@ else() endif() endif() -if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") +if(CXX_CLANG) target_compile_options(common PRIVATE -fsized-deallocation -Werror=unreachable-code-aggressive) target_compile_definitions( diff --git a/src/common/free_region_manager.h b/src/common/free_region_manager.h index 2e590d6094..39d52f866c 100644 --- a/src/common/free_region_manager.h +++ b/src/common/free_region_manager.h @@ -27,8 +27,8 @@ public: // If we are, join with them, ensuring we stay in bounds. if (it != m_free_regions.end()) { - start_address = std::min(start_address, it->lower()); - end_address = std::max(end_address, it->upper()); + start_address = (std::min)(start_address, it->lower()); + end_address = (std::max)(end_address, it->upper()); } // Free the relevant region. diff --git a/src/common/fs/path_util.cpp b/src/common/fs/path_util.cpp index a2f5cb92ff..318f311891 100644 --- a/src/common/fs/path_util.cpp +++ b/src/common/fs/path_util.cpp @@ -484,9 +484,9 @@ std::string GetParentPath(std::string_view path) { std::size_t name_index; if (name_bck_index == std::string_view::npos || name_fwd_index == std::string_view::npos) { - name_index = std::min(name_bck_index, name_fwd_index); + name_index = (std::min)(name_bck_index, name_fwd_index); } else { - name_index = std::max(name_bck_index, name_fwd_index); + name_index = (std::max)(name_bck_index, name_fwd_index); } return std::string(path.substr(0, name_index)); @@ -506,7 +506,7 @@ std::string_view GetPathWithoutTop(std::string_view path) { const auto name_bck_index = path.find('\\'); const auto name_fwd_index = path.find('/'); - return path.substr(std::min(name_bck_index, name_fwd_index) + 1); + return path.substr((std::min)(name_bck_index, name_fwd_index) + 1); } } // namespace Common::FS diff --git a/src/common/heap_tracker.cpp b/src/common/heap_tracker.cpp index c875683f0f..7cce54976e 100644 --- a/src/common/heap_tracker.cpp +++ b/src/common/heap_tracker.cpp @@ -144,8 +144,7 @@ void HeapTracker::Protect(size_t virtual_offset, size_t size, MemoryPermission p } // Clamp to end. - next = std::min(next, end); - + next = (std::min)(next, end); // Reprotect, if we need to. if (should_protect) { m_buffer.Protect(cur, next - cur, perm); @@ -211,8 +210,8 @@ void HeapTracker::RebuildSeparateHeapAddressSpace() { // Despite being worse in theory, this has proven to be better in practice than more // regularly dumping a smaller amount, because it significantly reduces average case // lock contention. - const size_t desired_count = std::min(m_resident_map_count, m_max_resident_map_count) / 2; - const size_t evict_count = m_resident_map_count - desired_count; + std::size_t const desired_count = (std::min)(m_resident_map_count, m_max_resident_map_count) / 2; + std::size_t const evict_count = m_resident_map_count - desired_count; auto it = m_resident_mappings.begin(); for (size_t i = 0; i < evict_count && it != m_resident_mappings.end(); i++) { diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp index 15a198e216..1b7532b6b9 100644 --- a/src/common/host_memory.cpp +++ b/src/common/host_memory.cpp @@ -199,8 +199,8 @@ public: std::scoped_lock lock{placeholder_mutex}; auto [it, end] = placeholders.equal_range({virtual_offset, virtual_end}); while (it != end) { - const size_t offset = std::max(it->lower(), virtual_offset); - const size_t protect_length = std::min(it->upper(), virtual_end) - offset; + const size_t offset = (std::max)(it->lower(), virtual_offset); + const size_t protect_length = (std::min)(it->upper(), virtual_end) - offset; DWORD old_flags{}; if (!VirtualProtect(virtual_base + offset, protect_length, new_flags, &old_flags)) { LOG_CRITICAL(HW_Memory, "Failed to change virtual memory protect rules"); @@ -266,8 +266,8 @@ private: } const size_t placeholder_begin = it->lower(); const size_t placeholder_end = it->upper(); - const size_t unmap_begin = std::max(virtual_offset, placeholder_begin); - const size_t unmap_end = std::min(virtual_offset + length, placeholder_end); + const size_t unmap_begin = (std::max)(virtual_offset, placeholder_begin); + const size_t unmap_end = (std::min)(virtual_offset + length, placeholder_end); ASSERT(unmap_begin >= placeholder_begin && unmap_begin < placeholder_end); ASSERT(unmap_end <= placeholder_end && unmap_end > placeholder_begin); @@ -655,8 +655,8 @@ private: *virtual_offset = 0; *length = 0; } else { - *virtual_offset = std::max(intended_start, address_space_start); - *length = std::min(intended_end, address_space_end) - *virtual_offset; + *virtual_offset = (std::max)(intended_start, address_space_start); + *length = (std::min)(intended_end, address_space_end) - *virtual_offset; } } diff --git a/src/common/logging/log.h b/src/common/logging/log.h index 6da9e5231a..252c83aa2c 100644 --- a/src/common/logging/log.h +++ b/src/common/logging/log.h @@ -18,7 +18,7 @@ constexpr const char* TrimSourcePath(std::string_view source) { const auto rfind = [source](const std::string_view match) { return source.rfind(match) == source.npos ? 0 : (source.rfind(match) + match.size()); }; - auto idx = std::max({rfind("src/"), rfind("src\\"), rfind("../"), rfind("..\\")}); + auto idx = (std::max)({rfind("src/"), rfind("src\\"), rfind("../"), rfind("..\\")}); return source.data() + idx; } diff --git a/src/common/math_util.h b/src/common/math_util.h index 1f5928c15f..f52a0a35ae 100644 --- a/src/common/math_util.h +++ b/src/common/math_util.h @@ -85,10 +85,10 @@ struct Rectangle { } [[nodiscard]] constexpr bool Intersect(const Rectangle& with, Rectangle* result) const { - result->left = std::max(left, with.left); - result->top = std::max(top, with.top); - result->right = std::min(right, with.right); - result->bottom = std::min(bottom, with.bottom); + result->left = (std::max)(left, with.left); + result->top = (std::max)(top, with.top); + result->right = (std::min)(right, with.right); + result->bottom = (std::min)(bottom, with.bottom); return !result->IsEmpty(); } }; diff --git a/src/common/overflow.h b/src/common/overflow.h index e184ead953..d39fa24041 100644 --- a/src/common/overflow.h +++ b/src/common/overflow.h @@ -25,9 +25,9 @@ template inline bool CanAddWithoutOverflow(T lhs, T rhs) { #ifdef _MSC_VER if (lhs >= 0 && rhs >= 0) { - return WrappingAdd(lhs, rhs) >= std::max(lhs, rhs); + return WrappingAdd(lhs, rhs) >= (std::max)(lhs, rhs); } else if (lhs < 0 && rhs < 0) { - return WrappingAdd(lhs, rhs) <= std::min(lhs, rhs); + return WrappingAdd(lhs, rhs) <= (std::min)(lhs, rhs); } else { return true; } diff --git a/src/common/range_map.h b/src/common/range_map.h index ab73993e3b..e9cb50825b 100644 --- a/src/common/range_map.h +++ b/src/common/range_map.h @@ -18,7 +18,7 @@ private: public: explicit RangeMap(ValueT null_value_) : null_value{null_value_} { - container.emplace(std::numeric_limits::min(), null_value); + container.emplace((std::numeric_limits::min)(), null_value); }; ~RangeMap() = default; @@ -66,7 +66,7 @@ private: } const auto it_end = std::next(it); if (it_end == container.end()) { - return std::numeric_limits::max() - address; + return (std::numeric_limits::max)() - address; } return it_end->first - address; } diff --git a/src/common/range_sets.inc b/src/common/range_sets.inc index b83eceb7b0..3edd8c8a43 100644 --- a/src/common/range_sets.inc +++ b/src/common/range_sets.inc @@ -274,7 +274,7 @@ void OverlapRangeSet::Subtract(AddressType base_address, size_t siz template void OverlapRangeSet::DeleteAll(AddressType base_address, size_t size) { - m_impl->template Subtract(base_address, size, std::numeric_limits::max(), + m_impl->template Subtract(base_address, size, (std::numeric_limits::max)(), [](AddressType, AddressType) {}); } diff --git a/src/common/ring_buffer.h b/src/common/ring_buffer.h index 86de96b43e..e97854f514 100644 --- a/src/common/ring_buffer.h +++ b/src/common/ring_buffer.h @@ -29,7 +29,7 @@ class RingBuffer { // T must be safely memcpy-able and have a trivial default constructor. static_assert(std::is_trivial_v); // Ensure capacity is sensible. - static_assert(capacity < std::numeric_limits::max() / 2); + static_assert(capacity < (std::numeric_limits::max)() / 2); static_assert((capacity & (capacity - 1)) == 0, "capacity must be a power of two"); // Ensure lock-free. static_assert(std::atomic_size_t::is_always_lock_free); @@ -43,9 +43,9 @@ public: std::lock_guard lock(rb_mutex); const std::size_t slots_free = capacity + read_index - write_index; - const std::size_t push_count = std::min(slot_count, slots_free); + const std::size_t push_count = (std::min)(slot_count, slots_free); const std::size_t pos = write_index % capacity; - const std::size_t first_copy = std::min(capacity - pos, push_count); + const std::size_t first_copy = (std::min)(capacity - pos, push_count); const std::size_t second_copy = push_count - first_copy; const char* in = static_cast(new_slots); @@ -69,9 +69,9 @@ public: std::lock_guard lock(rb_mutex); const std::size_t slots_filled = write_index - read_index; - const std::size_t pop_count = std::min(slots_filled, max_slots); + const std::size_t pop_count = (std::min)(slots_filled, max_slots); const std::size_t pos = read_index % capacity; - const std::size_t first_copy = std::min(capacity - pos, pop_count); + const std::size_t first_copy = (std::min)(capacity - pos, pop_count); const std::size_t second_copy = pop_count - first_copy; char* out = static_cast(output); @@ -84,7 +84,7 @@ public: } std::vector Pop(std::size_t max_slots = ~std::size_t(0)) { - std::vector out(std::min(max_slots, capacity)); + std::vector out((std::min)(max_slots, capacity)); const std::size_t count = Pop(out.data(), out.size()); out.resize(count); return out; diff --git a/src/common/scm_rev.cpp.in b/src/common/scm_rev.cpp.in index b6bff72867..a157d03878 100644 --- a/src/common/scm_rev.cpp.in +++ b/src/common/scm_rev.cpp.in @@ -1,12 +1,11 @@ +// SPDX-FileCopyrightText: 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: 2014 Citra Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include "common/scm_rev.h" -#include -#include -#include - #define GIT_REV "@GIT_REV@" #define GIT_BRANCH "@GIT_BRANCH@" #define GIT_DESC "@GIT_DESC@" @@ -18,64 +17,21 @@ #define TITLE_BAR_FORMAT_IDLE "@TITLE_BAR_FORMAT_IDLE@" #define TITLE_BAR_FORMAT_RUNNING "@TITLE_BAR_FORMAT_RUNNING@" #define IS_DEV_BUILD @IS_DEV_BUILD@ +#define COMPILER_ID "@CXX_COMPILER@" namespace Common { -const char* g_scm_rev; -const char* g_scm_branch; -const char* g_scm_desc; -const char g_build_name[] = BUILD_NAME; -const char g_build_date[] = BUILD_DATE; -const char g_build_fullname[] = BUILD_FULLNAME; -const char g_build_version[] = BUILD_VERSION; -const char g_build_id[] = BUILD_ID; -const char g_title_bar_format_idle[] = TITLE_BAR_FORMAT_IDLE; -const char g_title_bar_format_running[] = TITLE_BAR_FORMAT_RUNNING; -const bool g_is_dev_build = IS_DEV_BUILD; +constexpr const char g_scm_rev[] = GIT_REV; +constexpr const char g_scm_branch[] = GIT_BRANCH; +constexpr const char g_scm_desc[] = GIT_DESC; +constexpr const char g_build_name[] = BUILD_NAME; +constexpr const char g_build_date[] = BUILD_DATE; +constexpr const char g_build_fullname[] = BUILD_FULLNAME; +constexpr const char g_build_version[] = BUILD_VERSION; +constexpr const char g_build_id[] = BUILD_ID; +constexpr const char g_title_bar_format_idle[] = TITLE_BAR_FORMAT_IDLE; +constexpr const char g_title_bar_format_running[] = TITLE_BAR_FORMAT_RUNNING; +constexpr const bool g_is_dev_build = IS_DEV_BUILD; +constexpr const char g_compiler_id[] = COMPILER_ID; -/// Anonymizes SCM data -/// This is quite weak. But better than nothing. -class scm_encrypt { - std::string m_scm_rev, m_scm_branch, m_scm_desc; - -public: - scm_encrypt() { - // Get a key that is easy to obtain when asking the person directly but (usually) hard to - // guess - std::string key; -#ifdef __linux__ - if (!std::getline(std::ifstream("/proc/sys/kernel/hostname"), key)) - key = "linux_error_key"; -#else - // Not a good fallback, but better than nothing I guess? - key = g_build_date; -#endif - // Copy strings in place - m_scm_rev = GIT_REV; - m_scm_branch = GIT_BRANCH; - m_scm_desc = GIT_DESC; - // XOR each string with key - auto key_it = key.begin(); - for (auto& string : {&m_scm_rev, &m_scm_branch, &m_scm_desc}) { - for (auto& c : *string) { - c ^= *key_it; - if (++key_it == key.end()) - key_it = key.begin(); - } - } - // Make each string human-readable - for (auto& string : {&m_scm_rev, &m_scm_branch, &m_scm_desc}) { - const std::string original = *string; - string->clear(); - for (const auto c : original) { - string->append(fmt::format("{:x}", unsigned(c))); - } - string->pop_back(); - } - // Set pointers - g_scm_rev = m_scm_rev.c_str(); - g_scm_branch = m_scm_branch.c_str(); - g_scm_desc = m_scm_desc.c_str(); - } -} scm_encrypt_instance; } // namespace Common diff --git a/src/common/scm_rev.h b/src/common/scm_rev.h index ee1997950a..84356ad64a 100644 --- a/src/common/scm_rev.h +++ b/src/common/scm_rev.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: 2014 Citra Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -5,9 +8,9 @@ namespace Common { -extern const char* g_scm_rev; -extern const char* g_scm_branch; -extern const char* g_scm_desc; +extern const char g_scm_rev[]; +extern const char g_scm_branch[]; +extern const char g_scm_desc[]; extern const char g_build_name[]; extern const char g_build_date[]; extern const char g_build_fullname[]; @@ -17,5 +20,6 @@ extern const char g_title_bar_format_idle[]; extern const char g_title_bar_format_running[]; extern const char g_shader_cache_version[]; extern const bool g_is_dev_build; +extern const char g_compiler_id[]; } // namespace Common diff --git a/src/common/settings.h b/src/common/settings.h index 047dfc800a..9d448a2b38 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -37,14 +37,14 @@ struct ResolutionScalingInfo { if (value == 0) { return 0; } - return std::max((value * static_cast(up_scale)) >> static_cast(down_shift), 1); + return (std::max)((value * static_cast(up_scale)) >> static_cast(down_shift), 1); } u32 ScaleUp(u32 value) const { if (value == 0U) { return 0U; } - return std::max((value * up_scale) >> down_shift, 1U); + return (std::max)((value * up_scale) >> down_shift, 1U); } }; @@ -612,8 +612,8 @@ struct Values { false, true, &custom_rtc_enabled}; SwitchableSetting custom_rtc_offset{linkage, 0, - std::numeric_limits::min(), - std::numeric_limits::max(), + (std::numeric_limits::min)(), + (std::numeric_limits::max)(), "custom_rtc_offset", Category::System, Specialization::Countable, diff --git a/src/common/settings_setting.h b/src/common/settings_setting.h index 0b18ca5ecc..ce7a3e91a6 100644 --- a/src/common/settings_setting.h +++ b/src/common/settings_setting.h @@ -223,7 +223,7 @@ public: if constexpr (std::is_enum_v) { return EnumMetadata::Index(); } else { - return std::numeric_limits::max(); + return (std::numeric_limits::max)(); } } @@ -237,14 +237,14 @@ public: [[nodiscard]] std::string MinVal() const override final { if constexpr (std::is_arithmetic_v && !ranged) { - return this->ToString(std::numeric_limits::min()); + return this->ToString((std::numeric_limits::min)()); } else { return this->ToString(minimum); } } [[nodiscard]] std::string MaxVal() const override final { if constexpr (std::is_arithmetic_v && !ranged) { - return this->ToString(std::numeric_limits::max()); + return this->ToString((std::numeric_limits::max)()); } else { return this->ToString(maximum); } diff --git a/src/common/slot_vector.h b/src/common/slot_vector.h index 34ff7de941..8db4bba30b 100644 --- a/src/common/slot_vector.h +++ b/src/common/slot_vector.h @@ -17,7 +17,7 @@ namespace Common { struct SlotId { - static constexpr u32 INVALID_INDEX = std::numeric_limits::max(); + static constexpr u32 INVALID_INDEX = (std::numeric_limits::max)(); constexpr auto operator<=>(const SlotId&) const noexcept = default; diff --git a/src/common/thread.cpp b/src/common/thread.cpp index 34cc1527bf..62a3115d5a 100644 --- a/src/common/thread.cpp +++ b/src/common/thread.cpp @@ -66,7 +66,7 @@ void SetCurrentThreadPriority(ThreadPriority new_priority) { const auto scheduling_type = SCHED_OTHER; s32 max_prio = sched_get_priority_max(scheduling_type); s32 min_prio = sched_get_priority_min(scheduling_type); - u32 level = std::max(static_cast(new_priority) + 1, 4U); + u32 level = (std::max)(static_cast(new_priority) + 1, 4U); struct sched_param params; if (max_prio > min_prio) { @@ -101,7 +101,7 @@ void SetCurrentThreadName(const char* name) { #elif defined(__linux__) // Linux limits thread names to 15 characters and will outright reject any // attempt to set a longer name with ERANGE. - std::string truncated(name, std::min(strlen(name), static_cast(15))); + std::string truncated(name, (std::min)(strlen(name), static_cast(15))); if (int e = pthread_setname_np(pthread_self(), truncated.c_str())) { errno = e; LOG_ERROR(Common, "Failed to set thread name to '{}': {}", truncated, GetLastErrorMsg()); diff --git a/src/common/tiny_mt.h b/src/common/tiny_mt.h index 5d5ebf158c..a757591c9b 100644 --- a/src/common/tiny_mt.h +++ b/src/common/tiny_mt.h @@ -124,7 +124,7 @@ public: this->state.data[3] = ParamTmat; { - const int num_init_iterations = std::max(seed_count + 1, MinimumInitIterations) - 1; + const int num_init_iterations = (std::max)(seed_count + 1, MinimumInitIterations) - 1; GenerateInitialValuePlus(&this->state, 0, seed_count); diff --git a/src/common/uint128.h b/src/common/uint128.h index f450a6db99..56433096fe 100644 --- a/src/common/uint128.h +++ b/src/common/uint128.h @@ -20,7 +20,7 @@ namespace Common { // This function multiplies 2 u64 values and divides it by a u64 value. [[nodiscard]] static inline u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) { -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) u128 r{}; r[0] = _umul128(a, b, &r[1]); u64 remainder; @@ -41,7 +41,7 @@ namespace Common { // This function multiplies 2 u64 values and produces a u128 value; [[nodiscard]] static inline u128 Multiply64Into128(u64 a, u64 b) { u128 result; -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) result[0] = _umul128(a, b, &result[1]); #else unsigned __int128 tmp = a; @@ -65,7 +65,7 @@ namespace Common { #endif #else // This one is bit more inaccurate. - return MultiplyAndDivide64(std::numeric_limits::max(), numerator, divisor); + return MultiplyAndDivide64((std::numeric_limits::max)(), numerator, divisor); #endif } diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp index 41d385f598..b578d75ece 100644 --- a/src/common/x64/cpu_wait.cpp +++ b/src/common/x64/cpu_wait.cpp @@ -24,7 +24,7 @@ constexpr auto PauseCycles = 100'000U; } // Anonymous namespace -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) __forceinline static void TPAUSE() { static constexpr auto RequestC02State = 0U; _tpause(RequestC02State, FencedRDTSC() + PauseCycles); diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 1979d427b5..0be60b55c6 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -1187,6 +1187,7 @@ else() -Wno-cast-function-type $<$:-fsized-deallocation> + $<$:-Wno-cast-function-type-mismatch> ) endif() diff --git a/src/core/arm/debug.cpp b/src/core/arm/debug.cpp index 854509463b..20f1ea00df 100644 --- a/src/core/arm/debug.cpp +++ b/src/core/arm/debug.cpp @@ -283,9 +283,9 @@ Loader::AppLoader::Modules FindModules(Kernel::KProcess* process) { // Ignore leading directories. char* path_pointer = module_path.path.data(); char* path_end = - path_pointer + std::min(PathLengthMax, module_path.path_length); + path_pointer + (std::min)(PathLengthMax, module_path.path_length); - for (s32 i = 0; i < std::min(PathLengthMax, module_path.path_length) && + for (s32 i = 0; i < (std::min)(PathLengthMax, module_path.path_length) && module_path.path[i] != '\0'; i++) { if (module_path.path[i] == '/' || module_path.path[i] == '\\') { diff --git a/src/core/arm/dynarmic/dynarmic_cp15.cpp b/src/core/arm/dynarmic/dynarmic_cp15.cpp index c663adda19..0d5e5912ae 100644 --- a/src/core/arm/dynarmic/dynarmic_cp15.cpp +++ b/src/core/arm/dynarmic/dynarmic_cp15.cpp @@ -58,6 +58,8 @@ CallbackOrAccessOneWord DynarmicCP15::CompileSendOneWord(bool two, unsigned opc1 _mm_lfence(); #elif defined(ARCHITECTURE_x86_64) asm volatile("mfence\n\tlfence\n\t" : : : "memory"); +#elif defined(_MSC_VER) && defined(ARCHITECTURE_arm64) + _Memory_barrier(); #elif defined(ARCHITECTURE_arm64) asm volatile("dsb sy\n\t" : : : "memory"); #else @@ -75,6 +77,8 @@ CallbackOrAccessOneWord DynarmicCP15::CompileSendOneWord(bool two, unsigned opc1 _mm_mfence(); #elif defined(ARCHITECTURE_x86_64) asm volatile("mfence\n\t" : : : "memory"); +#elif defined(_MSC_VER) && defined(ARCHITECTURE_arm64) + _Memory_barrier(); #elif defined(ARCHITECTURE_arm64) asm volatile("dmb sy\n\t" : : : "memory"); #else diff --git a/src/core/arm/nce/interpreter_visitor.h b/src/core/arm/nce/interpreter_visitor.h index f90d876abb..9dfbdb2fe9 100644 --- a/src/core/arm/nce/interpreter_visitor.h +++ b/src/core/arm/nce/interpreter_visitor.h @@ -4,9 +4,14 @@ #pragma once +#include #include #include +#include +#include "core/hle/kernel/k_thread.h" +#include "core/memory.h" +#include "common/logging/log.h" #include "core/arm/nce/visitor_base.h" namespace Core { diff --git a/src/core/arm/nce/patcher.cpp b/src/core/arm/nce/patcher.cpp index b8387ce7cb..9321258ae9 100644 --- a/src/core/arm/nce/patcher.cpp +++ b/src/core/arm/nce/patcher.cpp @@ -11,6 +11,8 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/hle/kernel/svc.h" +#include "core/memory.h" +#include "core/hle/kernel/k_thread.h" namespace Core::NCE { diff --git a/src/core/core.cpp b/src/core/core.cpp index c2852e66f0..7315f35e0c 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -185,7 +185,7 @@ struct System::Impl { Service::PSC::Time::LocationName name{}; auto new_name = Settings::GetTimeZoneString(Settings::values.time_zone_index.GetValue()); - std::memcpy(name.data(), new_name.data(), std::min(name.size(), new_name.size())); + std::memcpy(name.data(), new_name.data(), (std::min)(name.size(), new_name.size())); timezone_service->SetDeviceLocationName(name); diff --git a/src/core/crypto/xts_encryption_layer.cpp b/src/core/crypto/xts_encryption_layer.cpp index b60303412b..34e58463de 100644 --- a/src/core/crypto/xts_encryption_layer.cpp +++ b/src/core/crypto/xts_encryption_layer.cpp @@ -34,8 +34,8 @@ std::size_t XTSEncryptionLayer::Read(u8* data, std::size_t length, std::size_t o buffer.resize(XTS_SECTOR_SIZE); cipher.XTSTranscode(buffer.data(), buffer.size(), buffer.data(), offset / XTS_SECTOR_SIZE, XTS_SECTOR_SIZE, Op::Decrypt); - std::memcpy(data, buffer.data(), std::min(buffer.size(), length)); - return std::min(buffer.size(), length); + std::memcpy(data, buffer.data(), (std::min)(buffer.size(), length)); + return (std::min)(buffer.size(), length); } // offset does not fall on block boundary (0x4000) diff --git a/src/core/debugger/gdbstub.cpp b/src/core/debugger/gdbstub.cpp index fcb5787147..5c3c045b3c 100644 --- a/src/core/debugger/gdbstub.cpp +++ b/src/core/debugger/gdbstub.cpp @@ -664,7 +664,7 @@ void GDBStub::HandleRcmd(const std::vector& command) { if (svc_mem_info.state != Kernel::Svc::MemoryState::Inaccessible || svc_mem_info.base_address + svc_mem_info.size - 1 != - std::numeric_limits::max()) { + (std::numeric_limits::max)()) { const char* state = GetMemoryStateName(svc_mem_info.state); const char* perm = GetMemoryPermissionString(svc_mem_info); const char l = True(svc_mem_info.attribute & MemoryAttribute::Locked) ? 'L' : '-'; @@ -710,7 +710,7 @@ std::vector::const_iterator GDBStub::CommandEnd() const { const auto end{std::find(current_command.begin(), current_command.end(), GDB_STUB_END)}; // Require the checksum to be present - return std::min(end + 2, current_command.end()); + return (std::min)(end + 2, current_command.end()); } std::optional GDBStub::DetachCommand() { diff --git a/src/core/debugger/gdbstub_arch.cpp b/src/core/debugger/gdbstub_arch.cpp index 452f565bec..ee7108376a 100644 --- a/src/core/debugger/gdbstub_arch.cpp +++ b/src/core/debugger/gdbstub_arch.cpp @@ -12,7 +12,7 @@ static T HexToValue(std::string_view hex) { static_assert(std::is_trivially_copyable_v); T value{}; const auto mem{Common::HexStringToVector(hex, false)}; - std::memcpy(&value, mem.data(), std::min(mem.size(), sizeof(T))); + std::memcpy(&value, mem.data(), (std::min)(mem.size(), sizeof(T))); return value; } diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc index f104d495bb..52dff5df9a 100644 --- a/src/core/device_memory_manager.inc +++ b/src/core/device_memory_manager.inc @@ -388,7 +388,7 @@ void DeviceMemoryManager::WalkBlock(DAddr addr, std::size_t size, auto o while (remaining_size) { const size_t next_pages = static_cast(continuity_tracker[page_index]); const std::size_t copy_amount = - std::min((next_pages << Memory::YUZU_PAGEBITS) - page_offset, remaining_size); + (std::min)((next_pages << Memory::YUZU_PAGEBITS) - page_offset, remaining_size); const auto current_vaddr = static_cast((page_index << Memory::YUZU_PAGEBITS) + page_offset); SCOPE_EXIT{ diff --git a/src/core/file_sys/fs_path_utility.h b/src/core/file_sys/fs_path_utility.h index cdfd8c7729..3af23b0bba 100644 --- a/src/core/file_sys/fs_path_utility.h +++ b/src/core/file_sys/fs_path_utility.h @@ -683,7 +683,7 @@ public: const auto max_mount_len = out_mount_name_buffer_size == 0 ? MountNameLengthMax + 1 - : std::min(MountNameLengthMax + 1, out_mount_name_buffer_size); + : (std::min)(MountNameLengthMax + 1, out_mount_name_buffer_size); // Parse the path until we see a drive separator size_t mount_len = 0; diff --git a/src/core/file_sys/fsa/fs_i_directory.h b/src/core/file_sys/fsa/fs_i_directory.h index c8e895eab0..a4adcd2beb 100644 --- a/src/core/file_sys/fsa/fs_i_directory.h +++ b/src/core/file_sys/fsa/fs_i_directory.h @@ -48,7 +48,7 @@ public: private: Result DoRead(s64* out_count, DirectoryEntry* out_entries, s64 max_entries) { const u64 actual_entries = - std::min(static_cast(max_entries), entries.size() - next_entry_index); + (std::min)(static_cast(max_entries), entries.size() - next_entry_index); const auto* begin = reinterpret_cast(entries.data() + next_entry_index); const auto* end = reinterpret_cast(entries.data() + next_entry_index + actual_entries); const auto range_size = static_cast(std::distance(begin, end)); diff --git a/src/core/file_sys/fsa/fs_i_file.h b/src/core/file_sys/fsa/fs_i_file.h index 1188ae8ca7..99468ef0e2 100644 --- a/src/core/file_sys/fsa/fs_i_file.h +++ b/src/core/file_sys/fsa/fs_i_file.h @@ -93,7 +93,7 @@ protected: R_TRY(this->DoGetSize(std::addressof(file_size))); R_UNLESS(offset <= file_size, ResultOutOfRange); - *out = static_cast(std::min(file_size - offset, static_cast(size))); + *out = static_cast((std::min)(file_size - offset, static_cast(size))); R_SUCCEED(); } diff --git a/src/core/file_sys/fssystem/fssystem_aes_ctr_counter_extended_storage.cpp b/src/core/file_sys/fssystem/fssystem_aes_ctr_counter_extended_storage.cpp index bc1cddbb0c..c9fb5f64d6 100644 --- a/src/core/file_sys/fssystem/fssystem_aes_ctr_counter_extended_storage.cpp +++ b/src/core/file_sys/fssystem/fssystem_aes_ctr_counter_extended_storage.cpp @@ -213,7 +213,7 @@ size_t AesCtrCounterExtendedStorage::Read(u8* buffer, size_t size, size_t offset // Determine how much is left. const auto remaining_size = end_offset - cur_offset; - const auto cur_size = static_cast(std::min(remaining_size, data_size)); + const auto cur_size = static_cast((std::min)(remaining_size, data_size)); ASSERT(cur_size <= size); // If necessary, perform decryption. diff --git a/src/core/file_sys/fssystem/fssystem_aes_ctr_storage.cpp b/src/core/file_sys/fssystem/fssystem_aes_ctr_storage.cpp index b65aca18d9..c18fde18f4 100644 --- a/src/core/file_sys/fssystem/fssystem_aes_ctr_storage.cpp +++ b/src/core/file_sys/fssystem/fssystem_aes_ctr_storage.cpp @@ -94,7 +94,7 @@ size_t AesCtrStorage::Write(const u8* buffer, size_t size, size_t offset) { while (remaining > 0) { // Determine data we're writing and where. const size_t write_size = - use_work_buffer ? std::min(pooled_buffer.GetSize(), remaining) : remaining; + use_work_buffer ? (std::min)(pooled_buffer.GetSize(), remaining) : remaining; void* write_buf; if (use_work_buffer) { diff --git a/src/core/file_sys/fssystem/fssystem_aes_xts_storage.cpp b/src/core/file_sys/fssystem/fssystem_aes_xts_storage.cpp index efc5aa0b12..5ef2544dfb 100644 --- a/src/core/file_sys/fssystem/fssystem_aes_xts_storage.cpp +++ b/src/core/file_sys/fssystem/fssystem_aes_xts_storage.cpp @@ -65,7 +65,7 @@ size_t AesXtsStorage::Read(u8* buffer, size_t size, size_t offset) const { // Determine the size of the pre-data read. const size_t skip_size = static_cast(offset - Common::AlignDown(offset, m_block_size)); - const size_t data_size = std::min(size, m_block_size - skip_size); + const size_t data_size = (std::min)(size, m_block_size - skip_size); // Decrypt into a pooled buffer. { @@ -84,14 +84,14 @@ size_t AesXtsStorage::Read(u8* buffer, size_t size, size_t offset) const { AddCounter(ctr.data(), IvSize, 1); processed_size += data_size; - ASSERT(processed_size == std::min(size, m_block_size - skip_size)); + ASSERT(processed_size == (std::min)(size, m_block_size - skip_size)); } // Decrypt aligned chunks. char* cur = reinterpret_cast(buffer) + processed_size; size_t remaining = size - processed_size; while (remaining > 0) { - const size_t cur_size = std::min(m_block_size, remaining); + const size_t cur_size = (std::min)(m_block_size, remaining); m_cipher->SetIV(ctr); m_cipher->Transcode(cur, cur_size, cur, Core::Crypto::Op::Decrypt); diff --git a/src/core/file_sys/fssystem/fssystem_alignment_matching_storage_impl.cpp b/src/core/file_sys/fssystem/fssystem_alignment_matching_storage_impl.cpp index 641c888aed..08b77d790a 100644 --- a/src/core/file_sys/fssystem/fssystem_alignment_matching_storage_impl.cpp +++ b/src/core/file_sys/fssystem/fssystem_alignment_matching_storage_impl.cpp @@ -104,7 +104,7 @@ size_t AlignmentMatchingStorageImpl::Read(VirtualFile base_storage, char* work_b while (remaining_tail_size > 0) { const auto aligned_tail_offset = Common::AlignDown(tail_offset, data_alignment); const auto cur_size = - std::min(static_cast(aligned_tail_offset + data_alignment - tail_offset), + (std::min)(static_cast(aligned_tail_offset + data_alignment - tail_offset), remaining_tail_size); base_storage->Read(reinterpret_cast(work_buf), data_alignment, aligned_tail_offset); @@ -186,7 +186,7 @@ size_t AlignmentMatchingStorageImpl::Write(VirtualFile base_storage, char* work_ const auto aligned_tail_offset = Common::AlignDown(tail_offset, data_alignment); const auto cur_size = - std::min(static_cast(aligned_tail_offset + data_alignment - tail_offset), + (std::min)(static_cast(aligned_tail_offset + data_alignment - tail_offset), remaining_tail_size); base_storage->Read(reinterpret_cast(work_buf), data_alignment, aligned_tail_offset); diff --git a/src/core/file_sys/fssystem/fssystem_crypto_configuration.cpp b/src/core/file_sys/fssystem/fssystem_crypto_configuration.cpp index a4f0cde281..0a9f28975b 100644 --- a/src/core/file_sys/fssystem/fssystem_crypto_configuration.cpp +++ b/src/core/file_sys/fssystem/fssystem_crypto_configuration.cpp @@ -29,12 +29,12 @@ void GenerateKey(void* dst_key, size_t dst_key_size, const void* src_key, size_t key_type == static_cast(KeyType::NcaHeaderKey2)) { const s32 key_index = static_cast(KeyType::NcaHeaderKey2) == key_type; const auto key = instance.GetKey(Core::Crypto::S256KeyType::Header); - std::memcpy(dst_key, key.data() + key_index * 0x10, std::min(dst_key_size, key.size() / 2)); + std::memcpy(dst_key, key.data() + key_index * 0x10, (std::min)(dst_key_size, key.size() / 2)); return; } const s32 key_generation = - std::max(key_type / NcaCryptoConfiguration::KeyAreaEncryptionKeyIndexCount, 1) - 1; + (std::max)(key_type / NcaCryptoConfiguration::KeyAreaEncryptionKeyIndexCount, 1) - 1; const s32 key_index = key_type % NcaCryptoConfiguration::KeyAreaEncryptionKeyIndexCount; Core::Crypto::AESCipher cipher( diff --git a/src/core/file_sys/fssystem/fssystem_integrity_verification_storage.cpp b/src/core/file_sys/fssystem/fssystem_integrity_verification_storage.cpp index 046571e9ef..57cdc19248 100644 --- a/src/core/file_sys/fssystem/fssystem_integrity_verification_storage.cpp +++ b/src/core/file_sys/fssystem/fssystem_integrity_verification_storage.cpp @@ -34,7 +34,7 @@ void IntegrityVerificationStorage::Initialize(VirtualFile hs, ASSERT(m_verification_block_size == 1ll << m_verification_block_order); // Set upper layer block sizes. - upper_layer_verif_block_size = std::max(upper_layer_verif_block_size, HashSize); + upper_layer_verif_block_size = (std::max)(upper_layer_verif_block_size, HashSize); m_upper_layer_verification_block_size = upper_layer_verif_block_size; m_upper_layer_verification_block_order = ILog2(static_cast(upper_layer_verif_block_size)); ASSERT(m_upper_layer_verification_block_size == 1ll << m_upper_layer_verification_block_order); diff --git a/src/core/file_sys/fssystem/fssystem_nca_header.cpp b/src/core/file_sys/fssystem/fssystem_nca_header.cpp index cef0f0bb94..2226c087c0 100644 --- a/src/core/file_sys/fssystem/fssystem_nca_header.cpp +++ b/src/core/file_sys/fssystem/fssystem_nca_header.cpp @@ -9,7 +9,7 @@ namespace FileSys { u8 NcaHeader::GetProperKeyGeneration() const { - return std::max(this->key_generation, this->key_generation_2); + return (std::max)(this->key_generation, this->key_generation_2); } bool NcaPatchInfo::HasIndirectTable() const { diff --git a/src/core/file_sys/fssystem/fssystem_pooled_buffer.cpp b/src/core/file_sys/fssystem/fssystem_pooled_buffer.cpp index bbfaab2557..dcd08dac3e 100644 --- a/src/core/file_sys/fssystem/fssystem_pooled_buffer.cpp +++ b/src/core/file_sys/fssystem/fssystem_pooled_buffer.cpp @@ -34,7 +34,7 @@ void PooledBuffer::AllocateCore(size_t ideal_size, size_t required_size, bool la ASSERT(required_size <= GetAllocatableSizeMaxCore(large)); const size_t target_size = - std::min(std::max(ideal_size, required_size), GetAllocatableSizeMaxCore(large)); + (std::min)((std::max)(ideal_size, required_size), GetAllocatableSizeMaxCore(large)); // Dummy implementation for allocate. if (target_size > 0) { diff --git a/src/core/file_sys/fssystem/fssystem_sparse_storage.h b/src/core/file_sys/fssystem/fssystem_sparse_storage.h index 6c196ec611..1cc7e7b1eb 100644 --- a/src/core/file_sys/fssystem/fssystem_sparse_storage.h +++ b/src/core/file_sys/fssystem/fssystem_sparse_storage.h @@ -18,7 +18,7 @@ private: virtual ~ZeroStorage() {} virtual size_t GetSize() const override { - return std::numeric_limits::max(); + return (std::numeric_limits::max)(); } virtual size_t Read(u8* buffer, size_t size, size_t offset) const override { @@ -62,7 +62,7 @@ public: private: void SetZeroStorage() { - return this->SetStorage(1, m_zero_storage, 0, std::numeric_limits::max()); + return this->SetStorage(1, m_zero_storage, 0, (std::numeric_limits::max)()); } private: diff --git a/src/core/file_sys/nca_metadata.cpp b/src/core/file_sys/nca_metadata.cpp index 9e855c50d1..55ea4d0803 100644 --- a/src/core/file_sys/nca_metadata.cpp +++ b/src/core/file_sys/nca_metadata.cpp @@ -102,7 +102,7 @@ std::vector CNMT::Serialize() const { header.type >= TitleType::Application && header.type <= TitleType::AOC; const auto dead_zone = header.table_offset + sizeof(CNMTHeader); std::vector out( - std::max(sizeof(CNMTHeader) + (has_opt_header ? sizeof(OptionalHeader) : 0), dead_zone) + + (std::max)(sizeof(CNMTHeader) + (has_opt_header ? sizeof(OptionalHeader) : 0), dead_zone) + content_records.size() * sizeof(ContentRecord) + meta_records.size() * sizeof(MetaRecord)); memcpy(out.data(), &header, sizeof(CNMTHeader)); diff --git a/src/core/file_sys/registered_cache.cpp b/src/core/file_sys/registered_cache.cpp index 85d30543c1..cb2089e9b3 100644 --- a/src/core/file_sys/registered_cache.cpp +++ b/src/core/file_sys/registered_cache.cpp @@ -273,7 +273,7 @@ std::vector PlaceholderCache::List() const { NcaID PlaceholderCache::Generate() { std::random_device device; std::mt19937 gen(device()); - std::uniform_int_distribution distribution(1, std::numeric_limits::max()); + std::uniform_int_distribution distribution(1, (std::numeric_limits::max)()); NcaID out{}; diff --git a/src/core/file_sys/romfs.cpp b/src/core/file_sys/romfs.cpp index a2b2809734..fee75f9de6 100644 --- a/src/core/file_sys/romfs.cpp +++ b/src/core/file_sys/romfs.cpp @@ -75,7 +75,7 @@ std::pair GetEntry(const RomFSTraversalContext& ctx, siz } std::memcpy(&entry, data + offset, sizeof(EntryType)); - const size_t name_length = std::min(entry_end + entry.name_length, size) - entry_end; + const size_t name_length = (std::min)(entry_end + entry.name_length, size) - entry_end; std::string name(reinterpret_cast(data + entry_end), name_length); return {entry, std::move(name)}; diff --git a/src/core/file_sys/vfs/vfs.cpp b/src/core/file_sys/vfs/vfs.cpp index a04292760f..2be7084209 100644 --- a/src/core/file_sys/vfs/vfs.cpp +++ b/src/core/file_sys/vfs/vfs.cpp @@ -507,9 +507,9 @@ bool VfsRawCopy(const VirtualFile& src, const VirtualFile& dest, std::size_t blo if (!dest->Resize(src->GetSize())) return false; - std::vector temp(std::min(block_size, src->GetSize())); + std::vector temp((std::min)(block_size, src->GetSize())); for (std::size_t i = 0; i < src->GetSize(); i += block_size) { - const auto read = std::min(block_size, src->GetSize() - i); + const auto read = (std::min)(block_size, src->GetSize() - i); if (src->Read(temp.data(), read, i) != read) { return false; diff --git a/src/core/file_sys/vfs/vfs_static.h b/src/core/file_sys/vfs/vfs_static.h index bb53560ac7..6dc4ef8fbf 100644 --- a/src/core/file_sys/vfs/vfs_static.h +++ b/src/core/file_sys/vfs/vfs_static.h @@ -43,7 +43,7 @@ public: } std::size_t Read(u8* data, std::size_t length, std::size_t offset) const override { - const auto read = std::min(length, size - offset); + const auto read = (std::min)(length, size - offset); std::fill(data, data + read, value); return read; } @@ -61,7 +61,7 @@ public: } std::vector ReadBytes(std::size_t length, std::size_t offset) const override { - const auto read = std::min(length, size - offset); + const auto read = (std::min)(length, size - offset); return std::vector(read, value); } diff --git a/src/core/file_sys/vfs/vfs_vector.cpp b/src/core/file_sys/vfs/vfs_vector.cpp index 0d54461c8f..7576a023cf 100644 --- a/src/core/file_sys/vfs/vfs_vector.cpp +++ b/src/core/file_sys/vfs/vfs_vector.cpp @@ -37,7 +37,7 @@ bool VectorVfsFile::IsReadable() const { } std::size_t VectorVfsFile::Read(u8* data_, std::size_t length, std::size_t offset) const { - const auto read = std::min(length, data.size() - offset); + const auto read = (std::min)(length, data.size() - offset); std::memcpy(data_, data.data() + offset, read); return read; } @@ -45,7 +45,7 @@ std::size_t VectorVfsFile::Read(u8* data_, std::size_t length, std::size_t offse std::size_t VectorVfsFile::Write(const u8* data_, std::size_t length, std::size_t offset) { if (offset + length > data.size()) data.resize(offset + length); - const auto write = std::min(length, data.size() - offset); + const auto write = (std::min)(length, data.size() - offset); std::memcpy(data.data() + offset, data_, write); return write; } diff --git a/src/core/file_sys/vfs/vfs_vector.h b/src/core/file_sys/vfs/vfs_vector.h index 587187dd26..27f2c03ca7 100644 --- a/src/core/file_sys/vfs/vfs_vector.h +++ b/src/core/file_sys/vfs/vfs_vector.h @@ -45,7 +45,7 @@ public: } std::size_t Read(u8* data_, std::size_t length, std::size_t offset) const override { - const auto read = std::min(length, size - offset); + const auto read = (std::min)(length, size - offset); std::memcpy(data_, data.data() + offset, read); return read; } diff --git a/src/core/frontend/emu_window.cpp b/src/core/frontend/emu_window.cpp index d1f1ca8c97..ec5cec8fa0 100644 --- a/src/core/frontend/emu_window.cpp +++ b/src/core/frontend/emu_window.cpp @@ -28,11 +28,11 @@ std::pair EmuWindow::MapToTouchScreen(u32 framebuffer_x, u32 framebuff } std::pair EmuWindow::ClipToTouchScreen(u32 new_x, u32 new_y) const { - new_x = std::max(new_x, framebuffer_layout.screen.left); - new_x = std::min(new_x, framebuffer_layout.screen.right - 1); + new_x = (std::max)(new_x, framebuffer_layout.screen.left); + new_x = (std::min)(new_x, framebuffer_layout.screen.right - 1); - new_y = std::max(new_y, framebuffer_layout.screen.top); - new_y = std::min(new_y, framebuffer_layout.screen.bottom - 1); + new_y = (std::max)(new_y, framebuffer_layout.screen.top); + new_y = (std::min)(new_y, framebuffer_layout.screen.bottom - 1); return std::make_pair(new_x, new_y); } diff --git a/src/core/frontend/framebuffer_layout.cpp b/src/core/frontend/framebuffer_layout.cpp index 2590b20da4..3de975c20f 100644 --- a/src/core/frontend/framebuffer_layout.cpp +++ b/src/core/frontend/framebuffer_layout.cpp @@ -14,7 +14,7 @@ namespace Layout { template static Common::Rectangle MaxRectangle(Common::Rectangle window_area, float screen_aspect_ratio) { - const float scale = std::min(static_cast(window_area.GetWidth()), + const float scale = (std::min)(static_cast(window_area.GetWidth()), static_cast(window_area.GetHeight()) / screen_aspect_ratio); return Common::Rectangle{0, 0, static_cast(std::round(scale)), static_cast(std::round(scale * screen_aspect_ratio))}; diff --git a/src/core/hle/kernel/board/nintendo/nx/k_memory_layout.cpp b/src/core/hle/kernel/board/nintendo/nx/k_memory_layout.cpp index 24eb3f8866..fa918ff204 100644 --- a/src/core/hle/kernel/board/nintendo/nx/k_memory_layout.cpp +++ b/src/core/hle/kernel/board/nintendo/nx/k_memory_layout.cpp @@ -133,7 +133,7 @@ void SetupPoolPartitionMemoryRegions(KMemoryLayout& memory_layout) { // Decide on starting addresses for our pools. const u64 application_pool_start = pool_end - application_pool_size; const u64 applet_pool_start = application_pool_start - applet_pool_size; - const u64 unsafe_system_pool_start = std::min( + const u64 unsafe_system_pool_start = (std::min)( kernel_dram_start + CarveoutSizeMax, Common::AlignDown(applet_pool_start - unsafe_system_pool_min_size, CarveoutAlignment)); const size_t unsafe_system_pool_size = applet_pool_start - unsafe_system_pool_start; diff --git a/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp b/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp index f62f3e4767..db654d730d 100644 --- a/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp +++ b/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp @@ -182,13 +182,13 @@ namespace { template u64 GenerateUniformRange(u64 min, u64 max, F f) { // Handle the case where the difference is too large to represent. - if (max == std::numeric_limits::max() && min == std::numeric_limits::min()) { + if (max == (std::numeric_limits::max)() && min == (std::numeric_limits::min)()) { return f(); } // Iterate until we get a value in range. const u64 range_size = ((max + 1) - min); - const u64 effective_max = (std::numeric_limits::max() / range_size) * range_size; + const u64 effective_max = ((std::numeric_limits::max)() / range_size) * range_size; while (true) { if (const u64 rnd = f(); rnd < effective_max) { return min + (rnd % range_size); @@ -201,7 +201,7 @@ u64 GenerateUniformRange(u64 min, u64 max, F f) { u64 KSystemControl::GenerateRandomU64() { std::random_device device; std::mt19937 gen(device()); - std::uniform_int_distribution distribution(1, std::numeric_limits::max()); + std::uniform_int_distribution distribution(1, (std::numeric_limits::max)()); return distribution(gen); } diff --git a/src/core/hle/kernel/k_dynamic_page_manager.h b/src/core/hle/kernel/k_dynamic_page_manager.h index ad11e84b71..2357fe0f4d 100644 --- a/src/core/hle/kernel/k_dynamic_page_manager.h +++ b/src/core/hle/kernel/k_dynamic_page_manager.h @@ -110,7 +110,7 @@ public: // Update our tracking. m_page_bitmap.ClearBit(offset); - m_peak = std::max(m_peak, (++m_used)); + m_peak = (std::max)(m_peak, (++m_used)); return GetPointer(m_aligned_address) + offset; } @@ -131,7 +131,7 @@ public: // Update our tracking. m_page_bitmap.ClearRange(offset, count); m_used += count; - m_peak = std::max(m_peak, m_used); + m_peak = (std::max)(m_peak, m_used); return GetPointer(m_aligned_address) + offset; } diff --git a/src/core/hle/kernel/k_handle_table.h b/src/core/hle/kernel/k_handle_table.h index 1bf68e6b04..22fdc7e47a 100644 --- a/src/core/hle/kernel/k_handle_table.h +++ b/src/core/hle/kernel/k_handle_table.h @@ -179,7 +179,7 @@ private: m_free_head_index = m_entry_infos[index].GetNextFreeIndex(); - m_max_count = std::max(m_max_count, ++m_count); + m_max_count = (std::max)(m_max_count, ++m_count); return index; } diff --git a/src/core/hle/kernel/k_hardware_timer.cpp b/src/core/hle/kernel/k_hardware_timer.cpp index 4e947dd6bc..f3098a59e0 100644 --- a/src/core/hle/kernel/k_hardware_timer.cpp +++ b/src/core/hle/kernel/k_hardware_timer.cpp @@ -19,7 +19,7 @@ void KHardwareTimer::Initialize() { void KHardwareTimer::Finalize() { m_kernel.System().CoreTiming().UnscheduleEvent(m_event_type); - m_wakeup_time = std::numeric_limits::max(); + m_wakeup_time = (std::numeric_limits::max)(); m_event_type.reset(); } @@ -37,7 +37,7 @@ void KHardwareTimer::DoTask() { // Disable the timer interrupt while we handle this. // Not necessary due to core timing already having popped this event to call it. // this->DisableInterrupt(); - m_wakeup_time = std::numeric_limits::max(); + m_wakeup_time = (std::numeric_limits::max)(); if (const s64 next_time = this->DoInterruptTaskImpl(GetTick()); 0 < next_time && next_time <= m_wakeup_time) { @@ -63,7 +63,7 @@ void KHardwareTimer::EnableInterrupt(s64 wakeup_time) { void KHardwareTimer::DisableInterrupt() { m_kernel.System().CoreTiming().UnscheduleEvent(m_event_type, Core::Timing::UnscheduleEventType::NoWait); - m_wakeup_time = std::numeric_limits::max(); + m_wakeup_time = (std::numeric_limits::max)(); } s64 KHardwareTimer::GetTick() const { @@ -71,7 +71,7 @@ s64 KHardwareTimer::GetTick() const { } bool KHardwareTimer::GetInterruptEnabled() { - return m_wakeup_time != std::numeric_limits::max(); + return m_wakeup_time != (std::numeric_limits::max)(); } } // namespace Kernel diff --git a/src/core/hle/kernel/k_hardware_timer.h b/src/core/hle/kernel/k_hardware_timer.h index 27f43cd194..cb83e9c5b5 100644 --- a/src/core/hle/kernel/k_hardware_timer.h +++ b/src/core/hle/kernel/k_hardware_timer.h @@ -40,7 +40,7 @@ private: private: // Absolute time in nanoseconds - s64 m_wakeup_time{std::numeric_limits::max()}; + s64 m_wakeup_time{(std::numeric_limits::max)()}; std::shared_ptr m_event_type{}; }; diff --git a/src/core/hle/kernel/k_light_server_session.cpp b/src/core/hle/kernel/k_light_server_session.cpp index e5ceb01f2a..5ea448b998 100644 --- a/src/core/hle/kernel/k_light_server_session.cpp +++ b/src/core/hle/kernel/k_light_server_session.cpp @@ -11,7 +11,7 @@ namespace Kernel { namespace { -constexpr u64 InvalidThreadId = std::numeric_limits::max(); +constexpr u64 InvalidThreadId = (std::numeric_limits::max)(); class ThreadQueueImplForKLightServerSessionRequest final : public KThreadQueue { private: diff --git a/src/core/hle/kernel/k_light_server_session.h b/src/core/hle/kernel/k_light_server_session.h index 8eca3eab69..87ec9db016 100644 --- a/src/core/hle/kernel/k_light_server_session.h +++ b/src/core/hle/kernel/k_light_server_session.h @@ -19,7 +19,7 @@ private: KLightSession* m_parent{}; KThread::WaiterList m_request_list{}; KThread* m_current_request{}; - u64 m_server_thread_id{std::numeric_limits::max()}; + u64 m_server_thread_id{(std::numeric_limits::max)()}; KThread* m_server_thread{}; public: diff --git a/src/core/hle/kernel/k_memory_block.h b/src/core/hle/kernel/k_memory_block.h index d2b7e9a66e..acf48cb757 100644 --- a/src/core/hle/kernel/k_memory_block.h +++ b/src/core/hle/kernel/k_memory_block.h @@ -551,7 +551,7 @@ public: } m_device_disable_merge_left_count = - std::min(m_device_disable_merge_left_count, m_device_use_count); + (std::min)(m_device_disable_merge_left_count, m_device_use_count); if (m_device_disable_merge_left_count == 0) { m_disable_merge_attribute = static_cast( diff --git a/src/core/hle/kernel/k_memory_layout.cpp b/src/core/hle/kernel/k_memory_layout.cpp index bec7146688..6821f4c07e 100644 --- a/src/core/hle/kernel/k_memory_layout.cpp +++ b/src/core/hle/kernel/k_memory_layout.cpp @@ -66,7 +66,7 @@ bool KMemoryRegionTree::Insert(u64 address, size_t size, u32 type_id, u32 new_at this->insert(*found); // Insert a new region for the split. - const u64 new_pair = (old_pair != std::numeric_limits::max()) + const u64 new_pair = (old_pair != (std::numeric_limits::max)()) ? old_pair + (address - old_address) : old_pair; this->insert(*AllocateRegion(m_memory_region_allocator, address, inserted_region_last, @@ -75,7 +75,7 @@ bool KMemoryRegionTree::Insert(u64 address, size_t size, u32 type_id, u32 new_at // If we need to insert a region after the region, do so. if (old_last != inserted_region_last) { - const u64 after_pair = (old_pair != std::numeric_limits::max()) + const u64 after_pair = (old_pair != (std::numeric_limits::max)()) ? old_pair + (inserted_region_end - old_address) : old_pair; this->insert(*AllocateRegion(m_memory_region_allocator, inserted_region_end, old_last, diff --git a/src/core/hle/kernel/k_memory_manager.cpp b/src/core/hle/kernel/k_memory_manager.cpp index d6bd272962..2aa393ac06 100644 --- a/src/core/hle/kernel/k_memory_manager.cpp +++ b/src/core/hle/kernel/k_memory_manager.cpp @@ -323,7 +323,7 @@ Result KMemoryManager::AllocateAndOpen(KPageGroup* out, size_t num_pages, u32 op // Process part or all of the block. const size_t cur_pages = - std::min(remaining_pages, manager.GetPageOffsetToEnd(cur_address)); + (std::min)(remaining_pages, manager.GetPageOffsetToEnd(cur_address)); manager.OpenFirst(cur_address, cur_pages); // Advance. @@ -385,7 +385,7 @@ Result KMemoryManager::AllocateForProcess(KPageGroup* out, size_t num_pages, u32 // Process part or all of the block. const size_t cur_pages = - std::min(remaining_pages, manager.GetPageOffsetToEnd(cur_address)); + (std::min)(remaining_pages, manager.GetPageOffsetToEnd(cur_address)); any_new = manager.ProcessOptimizedAllocation(m_system.Kernel(), cur_address, cur_pages, fill_pattern); @@ -409,7 +409,7 @@ Result KMemoryManager::AllocateForProcess(KPageGroup* out, size_t num_pages, u32 // Track some or all of the current pages. const size_t cur_pages = - std::min(remaining_pages, manager.GetPageOffsetToEnd(cur_address)); + (std::min)(remaining_pages, manager.GetPageOffsetToEnd(cur_address)); manager.TrackOptimizedAllocation(m_system.Kernel(), cur_address, cur_pages); // Advance. diff --git a/src/core/hle/kernel/k_memory_manager.h b/src/core/hle/kernel/k_memory_manager.h index c5a487af92..41d33fa55d 100644 --- a/src/core/hle/kernel/k_memory_manager.h +++ b/src/core/hle/kernel/k_memory_manager.h @@ -68,7 +68,7 @@ public: // Repeatedly open references until we've done so for all pages. while (num_pages) { auto& manager = this->GetManager(address); - const size_t cur_pages = std::min(num_pages, manager.GetPageOffsetToEnd(address)); + const size_t cur_pages = (std::min)(num_pages, manager.GetPageOffsetToEnd(address)); { KScopedLightLock lk(m_pool_locks[static_cast(manager.GetPool())]); @@ -84,7 +84,7 @@ public: // Repeatedly open references until we've done so for all pages. while (num_pages) { auto& manager = this->GetManager(address); - const size_t cur_pages = std::min(num_pages, manager.GetPageOffsetToEnd(address)); + const size_t cur_pages = (std::min)(num_pages, manager.GetPageOffsetToEnd(address)); { KScopedLightLock lk(m_pool_locks[static_cast(manager.GetPool())]); @@ -100,7 +100,7 @@ public: // Repeatedly close references until we've done so for all pages. while (num_pages) { auto& manager = this->GetManager(address); - const size_t cur_pages = std::min(num_pages, manager.GetPageOffsetToEnd(address)); + const size_t cur_pages = (std::min)(num_pages, manager.GetPageOffsetToEnd(address)); { KScopedLightLock lk(m_pool_locks[static_cast(manager.GetPool())]); diff --git a/src/core/hle/kernel/k_memory_region.h b/src/core/hle/kernel/k_memory_region.h index e3044f0227..cad7b31126 100644 --- a/src/core/hle/kernel/k_memory_region.h +++ b/src/core/hle/kernel/k_memory_region.h @@ -28,7 +28,7 @@ public: : m_address(address), m_last_address(last_address), m_pair_address(pair_address), m_attributes(attributes), m_type_id(type_id) {} constexpr KMemoryRegion(u64 address, u64 last_address, u32 attributes, u32 type_id) - : KMemoryRegion(address, last_address, std::numeric_limits::max(), attributes, + : KMemoryRegion(address, last_address, (std::numeric_limits::max)(), attributes, type_id) {} ~KMemoryRegion() = default; diff --git a/src/core/hle/kernel/k_page_bitmap.h b/src/core/hle/kernel/k_page_bitmap.h index 0ff9877326..4ad5483b28 100644 --- a/src/core/hle/kernel/k_page_bitmap.h +++ b/src/core/hle/kernel/k_page_bitmap.h @@ -83,7 +83,7 @@ public: } // Determine how many bits to take this round. - const auto cur_bits = std::min(num_bits, m_bits_available); + const auto cur_bits = (std::min)(num_bits, m_bits_available); // Generate mask for our current bits. const u64 mask = (static_cast(1) << cur_bits) - 1; diff --git a/src/core/hle/kernel/k_page_heap.h b/src/core/hle/kernel/k_page_heap.h index c55225bac6..0d63a6b1f5 100644 --- a/src/core/hle/kernel/k_page_heap.h +++ b/src/core/hle/kernel/k_page_heap.h @@ -75,7 +75,7 @@ public: } static constexpr s32 GetAlignedBlockIndex(size_t num_pages, size_t align_pages) { - const size_t target_pages = std::max(num_pages, align_pages); + const size_t target_pages = (std::max)(num_pages, align_pages); for (size_t i = 0; i < NumMemoryBlockPageShifts; i++) { if (target_pages <= (static_cast(1) << MemoryBlockPageShifts[i]) / PageSize) { return static_cast(i); diff --git a/src/core/hle/kernel/k_page_table_base.cpp b/src/core/hle/kernel/k_page_table_base.cpp index 5e39fbeb14..6b3f60f52e 100644 --- a/src/core/hle/kernel/k_page_table_base.cpp +++ b/src/core/hle/kernel/k_page_table_base.cpp @@ -1731,7 +1731,7 @@ void KPageTableBase::RemapPageGroup(PageLinkedList* page_list, KProcessAddress a } // Map whatever we can. - const size_t cur_pages = std::min(pg_pages, map_pages); + const size_t cur_pages = (std::min)(pg_pages, map_pages); R_ASSERT(this->Operate(page_list, map_address, map_pages, pg_phys_addr, true, map_properties, OperationType::Map, true)); @@ -1929,7 +1929,7 @@ Result KPageTableBase::GetContiguousMemoryRangeWithState( } // Take the minimum size for our region. - size = std::min(size, contig_size); + size = (std::min)(size, contig_size); // Check that the memory is contiguous (modulo the reference count bit). const KMemoryState test_state_mask = state_mask | KMemoryState::FlagReferenceCounted; @@ -5297,7 +5297,7 @@ Result KPageTableBase::MapPhysicalMemory(KProcessAddress address, size_t size) { KMemoryPermission::None, false, false, DisableMergeAttribute::None}; const size_t cur_pages = - std::min(KProcessAddress(info.GetEndAddress()) - cur_address, + (std::min)(KProcessAddress(info.GetEndAddress()) - cur_address, last_unmap_address + 1 - cur_address) / PageSize; @@ -5345,7 +5345,7 @@ Result KPageTableBase::MapPhysicalMemory(KProcessAddress address, size_t size) { ? DisableMergeAttribute::DisableHead : DisableMergeAttribute::None}; size_t map_pages = - std::min(KProcessAddress(info.GetEndAddress()) - cur_address, + (std::min)(KProcessAddress(info.GetEndAddress()) - cur_address, last_address + 1 - cur_address) / PageSize; @@ -5373,7 +5373,7 @@ Result KPageTableBase::MapPhysicalMemory(KProcessAddress address, size_t size) { } // Add whatever we can to the current block. - const size_t cur_pages = std::min(pg_pages, remain_pages); + const size_t cur_pages = (std::min)(pg_pages, remain_pages); R_TRY(cur_pg.AddBlock(pg_phys_addr + ((pg_pages - cur_pages) * PageSize), cur_pages)); @@ -5535,7 +5535,7 @@ Result KPageTableBase::UnmapPhysicalMemory(KProcessAddress address, size_t size) // Determine the range to unmap. const KPageProperties unmap_properties = {KMemoryPermission::None, false, false, DisableMergeAttribute::None}; - const size_t cur_pages = std::min(KProcessAddress(info.GetEndAddress()) - cur_address, + const size_t cur_pages = (std::min)(KProcessAddress(info.GetEndAddress()) - cur_address, last_address + 1 - cur_address) / PageSize; @@ -5656,7 +5656,7 @@ Result KPageTableBase::UnmapProcessMemory(KProcessAddress dst_address, size_t si } // Update our current size. - m_cur_size = std::min(m_remaining_size, m_cur_size + m_entry.block_size); + m_cur_size = (std::min)(m_remaining_size, m_cur_size + m_entry.block_size); } } }; diff --git a/src/core/hle/kernel/k_process.h b/src/core/hle/kernel/k_process.h index df3e540dc2..d6742f0637 100644 --- a/src/core/hle/kernel/k_process.h +++ b/src/core/hle/kernel/k_process.h @@ -59,7 +59,7 @@ public: static constexpr u64 InitialProcessIdMax = 0x50; static constexpr u64 ProcessIdMin = InitialProcessIdMax + 1; - static constexpr u64 ProcessIdMax = std::numeric_limits::max(); + static constexpr u64 ProcessIdMax = (std::numeric_limits::max)(); private: using SharedMemoryInfoList = Common::IntrusiveListBaseTraits::ListType; diff --git a/src/core/hle/kernel/k_resource_limit.cpp b/src/core/hle/kernel/k_resource_limit.cpp index d8a63aaf8c..1403317d72 100644 --- a/src/core/hle/kernel/k_resource_limit.cpp +++ b/src/core/hle/kernel/k_resource_limit.cpp @@ -111,7 +111,7 @@ bool KResourceLimit::Reserve(LimitableResource which, s64 value, s64 timeout) { if (m_current_values[index] + value <= m_limit_values[index]) { m_current_values[index] += value; m_current_hints[index] += value; - m_peak_values[index] = std::max(m_peak_values[index], m_current_values[index]); + m_peak_values[index] = (std::max)(m_peak_values[index], m_current_values[index]); return true; } diff --git a/src/core/hle/kernel/k_slab_heap.h b/src/core/hle/kernel/k_slab_heap.h index 334afebb71..2ec3d185dc 100644 --- a/src/core/hle/kernel/k_slab_heap.h +++ b/src/core/hle/kernel/k_slab_heap.h @@ -149,7 +149,7 @@ public: size_t GetObjectIndex(const void* obj) const { if constexpr (SupportDynamicExpansion) { if (!this->Contains(reinterpret_cast(obj))) { - return std::numeric_limits::max(); + return (std::numeric_limits::max)(); } } diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp index 8a360a839b..6aef191c87 100644 --- a/src/core/hle/kernel/k_thread.cpp +++ b/src/core/hle/kernel/k_thread.cpp @@ -1016,7 +1016,7 @@ void KThread::RestorePriority(KernelCore& kernel, KThread* thread) { s32 new_priority = thread->GetBasePriority(); for (const auto& held_lock : thread->m_held_lock_info_list) { new_priority = - std::min(new_priority, held_lock.GetHighestPriorityWaiter()->GetPriority()); + (std::min)(new_priority, held_lock.GetHighestPriorityWaiter()->GetPriority()); } // If the priority we would inherit is not different from ours, don't do anything. diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 00177dc943..0ff81066e9 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -507,7 +507,7 @@ struct KernelCore::Impl { constexpr size_t MiscRegionAlign = KernelAslrAlignment; constexpr size_t MiscRegionMinimumSize = 32_MiB; const size_t misc_region_size = Common::AlignUp( - std::max(misc_region_needed_size, MiscRegionMinimumSize), MiscRegionAlign); + (std::max)(misc_region_needed_size, MiscRegionMinimumSize), MiscRegionAlign); ASSERT(misc_region_size > 0); // Setup the misc region. diff --git a/src/core/hle/kernel/svc/svc_address_arbiter.cpp b/src/core/hle/kernel/svc/svc_address_arbiter.cpp index ab91d74433..688d6abce0 100644 --- a/src/core/hle/kernel/svc/svc_address_arbiter.cpp +++ b/src/core/hle/kernel/svc/svc_address_arbiter.cpp @@ -58,10 +58,10 @@ Result WaitForAddress(Core::System& system, u64 address, ArbitrationType arb_typ if (offset_tick > 0) { timeout = system.Kernel().HardwareTimer().GetTick() + offset_tick + 2; if (timeout <= 0) { - timeout = std::numeric_limits::max(); + timeout = (std::numeric_limits::max)(); } } else { - timeout = std::numeric_limits::max(); + timeout = (std::numeric_limits::max)(); } } else { timeout = timeout_ns; diff --git a/src/core/hle/kernel/svc/svc_condition_variable.cpp b/src/core/hle/kernel/svc/svc_condition_variable.cpp index 0f4550a795..2aed6a77be 100644 --- a/src/core/hle/kernel/svc/svc_condition_variable.cpp +++ b/src/core/hle/kernel/svc/svc_condition_variable.cpp @@ -31,10 +31,10 @@ Result WaitProcessWideKeyAtomic(Core::System& system, u64 address, u64 cv_key, u if (offset_tick > 0) { timeout = system.Kernel().HardwareTimer().GetTick() + offset_tick + 2; if (timeout <= 0) { - timeout = std::numeric_limits::max(); + timeout = (std::numeric_limits::max)(); } } else { - timeout = std::numeric_limits::max(); + timeout = (std::numeric_limits::max)(); } } else { timeout = timeout_ns; diff --git a/src/core/hle/kernel/svc/svc_ipc.cpp b/src/core/hle/kernel/svc/svc_ipc.cpp index b619bd70ab..bc0684e76c 100644 --- a/src/core/hle/kernel/svc/svc_ipc.cpp +++ b/src/core/hle/kernel/svc/svc_ipc.cpp @@ -61,10 +61,10 @@ Result ReplyAndReceiveImpl(KernelCore& kernel, int32_t* out_index, uintptr_t mes if (offset_tick > 0) { timeout = kernel.HardwareTimer().GetTick() + offset_tick + 2; if (timeout <= 0) { - timeout = std::numeric_limits::max(); + timeout = (std::numeric_limits::max)(); } } else { - timeout = std::numeric_limits::max(); + timeout = (std::numeric_limits::max)(); } } else { timeout = timeout_ns; diff --git a/src/core/hle/kernel/svc/svc_process.cpp b/src/core/hle/kernel/svc/svc_process.cpp index 87845d64a6..6d63892a94 100644 --- a/src/core/hle/kernel/svc/svc_process.cpp +++ b/src/core/hle/kernel/svc/svc_process.cpp @@ -82,7 +82,7 @@ Result GetProcessList(Core::System& system, s32* out_num_processes, u64 out_proc const auto num_processes = process_list.size(); const auto copy_amount = - std::min(static_cast(out_process_ids_size), num_processes); + (std::min)(static_cast(out_process_ids_size), num_processes); for (std::size_t i = 0; i < copy_amount && it != process_list.end(); ++i, ++it) { memory.Write64(out_process_ids, (*it)->GetProcessId()); diff --git a/src/core/hle/kernel/svc/svc_thread.cpp b/src/core/hle/kernel/svc/svc_thread.cpp index 77cd634c0d..ca5ce6fe07 100644 --- a/src/core/hle/kernel/svc/svc_thread.cpp +++ b/src/core/hle/kernel/svc/svc_thread.cpp @@ -117,10 +117,10 @@ void SleepThread(Core::System& system, s64 ns) { if (offset_tick > 0) { timeout = kernel.HardwareTimer().GetTick() + offset_tick + 2; if (timeout <= 0) { - timeout = std::numeric_limits::max(); + timeout = (std::numeric_limits::max)(); } } else { - timeout = std::numeric_limits::max(); + timeout = (std::numeric_limits::max)(); } // Sleep. @@ -226,7 +226,7 @@ Result GetThreadList(Core::System& system, s32* out_num_threads, u64 out_thread_ auto& memory = GetCurrentMemory(system.Kernel()); const auto& thread_list = current_process->GetThreadList(); const auto num_threads = thread_list.size(); - const auto copy_amount = std::min(static_cast(out_thread_ids_size), num_threads); + const auto copy_amount = (std::min)(static_cast(out_thread_ids_size), num_threads); auto list_iter = thread_list.cbegin(); for (std::size_t i = 0; i < copy_amount; ++i, ++list_iter) { diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp index 51a542e5e6..73f54f89b2 100644 --- a/src/core/hle/service/acc/acc.cpp +++ b/src/core/hle/service/acc/acc.cpp @@ -72,7 +72,7 @@ static void SanitizeJPEGImageSize(std::vector& image) { } } - image.resize(std::min(image.size(), max_jpeg_image_size)); + image.resize((std::min)(image.size(), max_jpeg_image_size)); } class IManagerForSystemService final : public ServiceFramework { diff --git a/src/core/hle/service/am/frontend/applet_cabinet.cpp b/src/core/hle/service/am/frontend/applet_cabinet.cpp index 4cbc80d639..58401479d3 100644 --- a/src/core/hle/service/am/frontend/applet_cabinet.cpp +++ b/src/core/hle/service/am/frontend/applet_cabinet.cpp @@ -118,7 +118,7 @@ void Cabinet::DisplayCompleted(bool apply_changes, std::string_view amiibo_name) case Service::NFP::CabinetMode::StartNicknameAndOwnerSettings: { Service::NFP::RegisterInfoPrivate register_info{}; std::memcpy(register_info.amiibo_name.data(), amiibo_name.data(), - std::min(amiibo_name.size(), register_info.amiibo_name.size() - 1)); + (std::min)(amiibo_name.size(), register_info.amiibo_name.size() - 1)); register_info.mii_store_data.BuildRandom(Mii::Age::All, Mii::Gender::All, Mii::Race::All); register_info.mii_store_data.SetNickname({u'y', u'u', u'z', u'u'}); nfp_device->SetRegisterInfoPrivate(register_info); diff --git a/src/core/hle/service/am/frontend/applet_controller.cpp b/src/core/hle/service/am/frontend/applet_controller.cpp index 66f52686d7..d457885773 100644 --- a/src/core/hle/service/am/frontend/applet_controller.cpp +++ b/src/core/hle/service/am/frontend/applet_controller.cpp @@ -31,7 +31,7 @@ static Core::Frontend::ControllerParameters ConvertToFrontendParameters( npad_style_set.raw = private_arg.style_set; return { - .min_players = std::max(s8{1}, header.player_count_min), + .min_players = (std::max)(s8{1}, header.player_count_min), .max_players = header.player_count_max, .keep_controllers_connected = header.enable_take_over_connection, .enable_single_mode = header.enable_single_mode, diff --git a/src/core/hle/service/am/service/application_accessor.cpp b/src/core/hle/service/am/service/application_accessor.cpp index 986abc716a..2ac07f838e 100644 --- a/src/core/hle/service/am/service/application_accessor.cpp +++ b/src/core/hle/service/am/service/application_accessor.cpp @@ -115,7 +115,7 @@ Result IApplicationAccessor::GetApplicationControlProperty( R_TRY(system.GetARPManager().GetControlProperty(&nacp, m_applet->program_id)); std::memcpy(out_control_property.data(), nacp.data(), - std::min(out_control_property.size(), nacp.size())); + (std::min)(out_control_property.size(), nacp.size())); R_SUCCEED(); } diff --git a/src/core/hle/service/am/service/application_functions.cpp b/src/core/hle/service/am/service/application_functions.cpp index b736e2821b..eacc345e15 100644 --- a/src/core/hle/service/am/service/application_functions.cpp +++ b/src/core/hle/service/am/service/application_functions.cpp @@ -216,7 +216,7 @@ Result IApplicationFunctions::GetDisplayVersion(Out out_display_ if (res.first != nullptr) { const auto& version = res.first->GetVersionString(); std::memcpy(out_display_version->string.data(), version.data(), - std::min(version.size(), out_display_version->string.size())); + (std::min)(version.size(), out_display_version->string.size())); } else { static constexpr char default_version[]{"1.0.0"}; std::memcpy(out_display_version->string.data(), default_version, sizeof(default_version)); @@ -284,7 +284,7 @@ Result IApplicationFunctions::GetCacheStorageMax(Out out_cache_storage_inde R_TRY(system.GetARPManager().GetControlProperty(&nacp, m_applet->program_id)); auto raw_nacp = std::make_unique(); - std::memcpy(raw_nacp.get(), nacp.data(), std::min(sizeof(*raw_nacp), nacp.size())); + std::memcpy(raw_nacp.get(), nacp.data(), (std::min)(sizeof(*raw_nacp), nacp.size())); *out_cache_storage_index_max = static_cast(raw_nacp->cache_storage_max_index); *out_max_journal_size = static_cast(raw_nacp->cache_storage_data_and_journal_max_size); diff --git a/src/core/hle/service/am/service/library_applet_self_accessor.cpp b/src/core/hle/service/am/service/library_applet_self_accessor.cpp index cbe45189f8..091aadc9fc 100644 --- a/src/core/hle/service/am/service/library_applet_self_accessor.cpp +++ b/src/core/hle/service/am/service/library_applet_self_accessor.cpp @@ -162,7 +162,7 @@ Result ILibraryAppletSelfAccessor::GetMainAppletApplicationControlProperty( system.GetARPManager().GetControlProperty(&nacp, application.application_id); if (R_SUCCEEDED(result)) { - std::memcpy(out_nacp->data(), nacp.data(), std::min(nacp.size(), out_nacp->size())); + std::memcpy(out_nacp->data(), nacp.data(), (std::min)(nacp.size(), out_nacp->size())); } R_RETURN(result); diff --git a/src/core/hle/service/bcat/bcat_service.cpp b/src/core/hle/service/bcat/bcat_service.cpp index 63b1072d2a..5c23760113 100644 --- a/src/core/hle/service/bcat/bcat_service.cpp +++ b/src/core/hle/service/bcat/bcat_service.cpp @@ -102,7 +102,7 @@ Result IBcatService::SetPassphrase(u64 application_id, Passphrase passphrase{}; std::memcpy(passphrase.data(), passphrase_buffer.data(), - std::min(passphrase.size(), passphrase_buffer.size())); + (std::min)(passphrase.size(), passphrase_buffer.size())); backend.SetPassphrase(application_id, passphrase); R_SUCCEED(); diff --git a/src/core/hle/service/bcat/delivery_cache_directory_service.cpp b/src/core/hle/service/bcat/delivery_cache_directory_service.cpp index 01f08a2fc5..fea373a607 100644 --- a/src/core/hle/service/bcat/delivery_cache_directory_service.cpp +++ b/src/core/hle/service/bcat/delivery_cache_directory_service.cpp @@ -57,12 +57,12 @@ Result IDeliveryCacheDirectoryService::Read( R_UNLESS(current_dir != nullptr, ResultNoOpenEntry); const auto files = current_dir->GetFiles(); - *out_count = static_cast(std::min(files.size(), out_buffer.size())); + *out_count = static_cast((std::min)(files.size(), out_buffer.size())); std::transform(files.begin(), files.begin() + *out_count, out_buffer.begin(), [](const auto& file) { FileName name{}; std::memcpy(name.data(), file->GetName().data(), - std::min(file->GetName().size(), name.size())); + (std::min)(file->GetName().size(), name.size())); return DeliveryCacheDirectoryEntry{name, file->GetSize(), DigestFile(file)}; }); R_SUCCEED(); diff --git a/src/core/hle/service/bcat/delivery_cache_storage_service.cpp b/src/core/hle/service/bcat/delivery_cache_storage_service.cpp index 4c79d71f41..0ce798eb75 100644 --- a/src/core/hle/service/bcat/delivery_cache_storage_service.cpp +++ b/src/core/hle/service/bcat/delivery_cache_storage_service.cpp @@ -47,7 +47,7 @@ Result IDeliveryCacheStorageService::EnumerateDeliveryCacheDirectory( LOG_DEBUG(Service_BCAT, "called, size={:016X}", out_directories.size()); *out_directory_count = - static_cast(std::min(out_directories.size(), entries.size() - next_read_index)); + static_cast((std::min)(out_directories.size(), entries.size() - next_read_index)); memcpy(out_directories.data(), entries.data() + next_read_index, *out_directory_count * sizeof(DirectoryName)); next_read_index += *out_directory_count; diff --git a/src/core/hle/service/cmif_serialization.h b/src/core/hle/service/cmif_serialization.h index 5a5f610f34..03b2a130a1 100644 --- a/src/core/hle/service/cmif_serialization.h +++ b/src/core/hle/service/cmif_serialization.h @@ -304,7 +304,7 @@ void ReadInArgument(bool is_domain, CallArguments& args, const u8* raw_data, HLE buffer = ctx.ReadBufferX(InBufferIndex); } - std::memcpy(&std::get(args), buffer.data(), std::min(BufferSize, buffer.size())); + std::memcpy(&std::get(args), buffer.data(), (std::min)(BufferSize, buffer.size())); return ReadInArgument(is_domain, args, raw_data, ctx, temp); } else if constexpr (ArgumentTraits::Type == ArgumentType::InBuffer) { diff --git a/src/core/hle/service/es/es.cpp b/src/core/hle/service/es/es.cpp index 9eaae4c4bd..9ad8d0e9b5 100644 --- a/src/core/hle/service/es/es.cpp +++ b/src/core/hle/service/es/es.cpp @@ -203,7 +203,7 @@ private: std::transform(tickets.begin(), tickets.end(), std::back_inserter(ids), [](const auto& pair) { return pair.first; }); - out_entries = std::min(ids.size(), out_entries); + out_entries = (std::min)(ids.size(), out_entries); ctx.WriteBuffer(ids.data(), out_entries * sizeof(u128)); IPC::ResponseBuilder rb{ctx, 3}; @@ -225,7 +225,7 @@ private: std::transform(tickets.begin(), tickets.end(), std::back_inserter(ids), [](const auto& pair) { return pair.first; }); - out_entries = std::min(ids.size(), out_entries); + out_entries = (std::min)(ids.size(), out_entries); ctx.WriteBuffer(ids.data(), out_entries * sizeof(u128)); IPC::ResponseBuilder rb{ctx, 3}; diff --git a/src/core/hle/service/filesystem/fsp/fs_i_save_data_info_reader.cpp b/src/core/hle/service/filesystem/fsp/fs_i_save_data_info_reader.cpp index ff823586b3..490ac49d42 100644 --- a/src/core/hle/service/filesystem/fsp/fs_i_save_data_info_reader.cpp +++ b/src/core/hle/service/filesystem/fsp/fs_i_save_data_info_reader.cpp @@ -44,7 +44,7 @@ Result ISaveDataInfoReader::ReadSaveDataInfo( const u64 count_entries = out_entries.size(); // Cap at total number of entries. - const u64 actual_entries = std::min(count_entries, info.size() - next_entry_index); + const u64 actual_entries = (std::min)(count_entries, info.size() - next_entry_index); // Determine data start and end const auto* begin = reinterpret_cast(info.data() + next_entry_index); diff --git a/src/core/hle/service/glue/notif.cpp b/src/core/hle/service/glue/notif.cpp index 5a03d34c12..dd3f1954de 100644 --- a/src/core/hle/service/glue/notif.cpp +++ b/src/core/hle/service/glue/notif.cpp @@ -67,7 +67,7 @@ Result NotificationServiceImpl::ListAlarmSettings(s32* out_count, std::span out_alarms) { LOG_INFO(Service_NOTIF, "called, alarm_count={}", alarms.size()); - const auto count = std::min(out_alarms.size(), alarms.size()); + const auto count = (std::min)(out_alarms.size(), alarms.size()); for (size_t i = 0; i < count; i++) { out_alarms[i] = alarms[i]; } @@ -90,7 +90,7 @@ Result NotificationServiceImpl::LoadApplicationParameter(u32* out_size, LOG_WARNING(Service_NOTIF, "(STUBBED) called, alarm_setting_id={}", alarm_setting_id); std::memcpy(out_application_parameter.data(), application_parameter.data(), - std::min(sizeof(application_parameter), out_application_parameter.size())); + (std::min)(sizeof(application_parameter), out_application_parameter.size())); *out_size = static_cast(application_parameter.size()); R_SUCCEED(); diff --git a/src/core/hle/service/glue/time/manager.cpp b/src/core/hle/service/glue/time/manager.cpp index 77bf8896cd..bfe57999c8 100644 --- a/src/core/hle/service/glue/time/manager.cpp +++ b/src/core/hle/service/glue/time/manager.cpp @@ -29,7 +29,7 @@ static s64 CalendarTimeToEpoch(Service::PSC::Time::CalendarTime calendar) { }; s16 month_s16{calendar.month}; - s8 month{static_cast(((month_s16 * 43) & ~std::numeric_limits::max()) + + s8 month{static_cast(((month_s16 * 43) & ~(std::numeric_limits::max)()) + ((month_s16 * 43) >> 9))}; s8 month_index{static_cast(calendar.month - 12 * month)}; if (month_index == 0) { @@ -71,13 +71,13 @@ static Service::PSC::Time::LocationName GetTimeZoneString( Service::PSC::Time::LocationName configured_name{}; std::memcpy(configured_name.data(), configured_zone.data(), - std::min(configured_name.size(), configured_zone.size())); + (std::min)(configured_name.size(), configured_zone.size())); if (!time_zone_binary.IsValid(configured_name)) { configured_zone = Common::TimeZone::FindSystemTimeZone(); configured_name = {}; std::memcpy(configured_name.data(), configured_zone.data(), - std::min(configured_name.size(), configured_zone.size())); + (std::min)(configured_name.size(), configured_zone.size())); } ASSERT_MSG(time_zone_binary.IsValid(configured_name), "Invalid time zone {}!", diff --git a/src/core/hle/service/hid/hid_debug_server.cpp b/src/core/hle/service/hid/hid_debug_server.cpp index 738c6d9ae2..450c1e953f 100644 --- a/src/core/hle/service/hid/hid_debug_server.cpp +++ b/src/core/hle/service/hid/hid_debug_server.cpp @@ -178,7 +178,7 @@ Result IHidDebugServer::SetTouchScreenAutoPilotState( AutoPilotState auto_pilot{}; auto_pilot.count = - static_cast(std::min(auto_pilot_buffer.size(), auto_pilot.state.size())); + static_cast((std::min)(auto_pilot_buffer.size(), auto_pilot.state.size())); memcpy(auto_pilot.state.data(), auto_pilot_buffer.data(), auto_pilot.count * sizeof(TouchState)); diff --git a/src/core/hle/service/jit/jit_context.cpp b/src/core/hle/service/jit/jit_context.cpp index 0090e8568d..06a2368fe5 100644 --- a/src/core/hle/service/jit/jit_context.cpp +++ b/src/core/hle/service/jit/jit_context.cpp @@ -107,7 +107,7 @@ public: void AddTicks(u64 ticks) override {} u64 GetTicksRemaining() override { - return std::numeric_limits::max(); + return (std::numeric_limits::max)(); } u64 GetCNTPCT() override { return 0; diff --git a/src/core/hle/service/ldn/ldn_types.h b/src/core/hle/service/ldn/ldn_types.h index fa0cdcbfa7..56a3cd1b4b 100644 --- a/src/core/hle/service/ldn/ldn_types.h +++ b/src/core/hle/service/ldn/ldn_types.h @@ -170,7 +170,7 @@ struct Ssid { Ssid() = default; constexpr explicit Ssid(std::string_view data) { - length = static_cast(std::min(data.size(), SsidLengthMax)); + length = static_cast((std::min)(data.size(), SsidLengthMax)); raw = {}; data.copy(raw.data(), length); raw[length] = 0; diff --git a/src/core/hle/service/lm/lm.cpp b/src/core/hle/service/lm/lm.cpp index 20df002330..508f91546d 100644 --- a/src/core/hle/service/lm/lm.cpp +++ b/src/core/hle/service/lm/lm.cpp @@ -180,7 +180,7 @@ private: if (length == 0) { return std::nullopt; } - const auto length_to_read = std::min(length, data.size() - offset); + const auto length_to_read = (std::min)(length, data.size() - offset); std::string output(length_to_read, '\0'); std::memcpy(output.data(), data.data() + offset, length_to_read); diff --git a/src/core/hle/service/nfc/common/device.cpp b/src/core/hle/service/nfc/common/device.cpp index 30eab469a1..c026d47e86 100644 --- a/src/core/hle/service/nfc/common/device.cpp +++ b/src/core/hle/service/nfc/common/device.cpp @@ -978,7 +978,7 @@ Result NfcDevice::GetApplicationArea(std::span data) const { } memcpy(data.data(), tag_data.application_area.data(), - std::min(data.size(), sizeof(NFP::ApplicationArea))); + (std::min)(data.size(), sizeof(NFP::ApplicationArea))); return ResultSuccess; } diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp index 4710167364..15c7d8d2c7 100644 --- a/src/core/hle/service/nifm/nifm.cpp +++ b/src/core/hle/service/nifm/nifm.cpp @@ -613,7 +613,7 @@ void IGeneralService::EnumerateNetworkInterfaces(HLERequestContext& ctx) { const size_t guest_bytes = ctx.GetWriteBufferSize(); if (guest_bytes && !blob.empty()) - ctx.WriteBuffer(blob.data(), std::min(guest_bytes, blob.size())); + ctx.WriteBuffer(blob.data(), (std::min)(guest_bytes, blob.size())); IPC::ResponseBuilder rb{ctx, 3}; rb.Push(ResultSuccess); @@ -639,7 +639,7 @@ void IGeneralService::EnumerateNetworkProfiles(HLERequestContext& ctx) { const size_t guest_sz = ctx.GetWriteBufferSize(); if (guest_sz && uuids.size()) { - const size_t to_copy = std::min(guest_sz, uuids.size() * sizeof(u128)); + const size_t to_copy = (std::min)(guest_sz, uuids.size() * sizeof(u128)); ctx.WriteBuffer(uuids.data(), to_copy); } diff --git a/src/core/hle/service/ns/application_manager_interface.cpp b/src/core/hle/service/ns/application_manager_interface.cpp index 517ec75743..60ecd5c2b9 100644 --- a/src/core/hle/service/ns/application_manager_interface.cpp +++ b/src/core/hle/service/ns/application_manager_interface.cpp @@ -410,7 +410,7 @@ Result IApplicationManagerInterface::IsAnyApplicationEntityInstalled( Result IApplicationManagerInterface::GetApplicationView( OutArray out_application_views, InArray application_ids) { - const auto size = std::min(out_application_views.size(), application_ids.size()); + const auto size = (std::min)(out_application_views.size(), application_ids.size()); LOG_WARNING(Service_NS, "(STUBBED) called, size={}", application_ids.size()); for (size_t i = 0; i < size; i++) { @@ -428,7 +428,7 @@ Result IApplicationManagerInterface::GetApplicationView( Result IApplicationManagerInterface::GetApplicationViewWithPromotionInfo( OutArray out_application_views, InArray application_ids) { - const auto size = std::min(out_application_views.size(), application_ids.size()); + const auto size = (std::min)(out_application_views.size(), application_ids.size()); LOG_WARNING(Service_NS, "(STUBBED) called, size={}", application_ids.size()); for (size_t i = 0; i < size; i++) { diff --git a/src/core/hle/service/ns/platform_service_manager.cpp b/src/core/hle/service/ns/platform_service_manager.cpp index 23cf05005c..301cf4ac4f 100644 --- a/src/core/hle/service/ns/platform_service_manager.cpp +++ b/src/core/hle/service/ns/platform_service_manager.cpp @@ -254,7 +254,7 @@ Result IPlatformServiceManager::GetSharedFontInOrderOfPriority( constexpr size_t MaxElementCount = 6; // TODO(ogniK): Have actual priority order - const auto max_size = std::min({MaxElementCount, out_font_codes.size(), out_font_offsets.size(), + const auto max_size = (std::min)({MaxElementCount, out_font_codes.size(), out_font_offsets.size(), out_font_sizes.size(), impl->shared_font_regions.size()}); for (size_t i = 0; i < max_size; i++) { diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index 02913a5817..140c6eb6e3 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp @@ -504,7 +504,7 @@ NvResult nvhost_as_gpu::GetVARegions3(IoctlGetVaRegions& params, std::span(std::countr_zero(YUZU_PAGESIZE))}; static constexpr u32 SUPPORTED_BIG_PAGE_SIZES{0x30000}; static constexpr u32 DEFAULT_BIG_PAGE_SIZE{0x20000}; u32 big_page_size{DEFAULT_BIG_PAGE_SIZE}; - u32 big_page_size_bits{std::countr_zero(DEFAULT_BIG_PAGE_SIZE)}; + u32 big_page_size_bits{static_cast(std::countr_zero(DEFAULT_BIG_PAGE_SIZE))}; static constexpr u32 VA_START_SHIFT{10}; static constexpr u64 DEFAULT_VA_SPLIT{1ULL << 34}; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp index 9ca6308e6f..fa46c2f280 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp @@ -139,7 +139,7 @@ NvResult nvhost_nvdec_common::GetWaitbase(IoctlGetWaitbase& params) { NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span entries, DeviceFD fd) { - const size_t num_entries = std::min(params.num_entries, static_cast(entries.size())); + const size_t num_entries = (std::min)(params.num_entries, static_cast(entries.size())); for (size_t i = 0; i < num_entries; i++) { DAddr pin_address = nvmap.PinHandle(entries[i].map_handle, true); entries[i].map_address = static_cast(pin_address); @@ -150,7 +150,7 @@ NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span entries) { - const size_t num_entries = std::min(params.num_entries, static_cast(entries.size())); + const size_t num_entries = (std::min)(params.num_entries, static_cast(entries.size())); for (size_t i = 0; i < num_entries; i++) { nvmap.UnpinHandle(entries[i].map_handle); entries[i] = {}; diff --git a/src/core/hle/service/nvnflinger/buffer_queue_consumer.cpp b/src/core/hle/service/nvnflinger/buffer_queue_consumer.cpp index 91ba35aef5..a9b0f9d2f3 100644 --- a/src/core/hle/service/nvnflinger/buffer_queue_consumer.cpp +++ b/src/core/hle/service/nvnflinger/buffer_queue_consumer.cpp @@ -328,7 +328,7 @@ void BufferQueueConsumer::Transact(u32 code, std::span parcel_data, const auto serialized = parcel_out.Serialize(); std::memcpy(parcel_reply.data(), serialized.data(), - std::min(parcel_reply.size(), serialized.size())); + (std::min)(parcel_reply.size(), serialized.size())); } Kernel::KReadableEvent* BufferQueueConsumer::GetNativeHandle(u32 type_id) { diff --git a/src/core/hle/service/nvnflinger/buffer_queue_core.cpp b/src/core/hle/service/nvnflinger/buffer_queue_core.cpp index 30095b0f73..27ac930f96 100644 --- a/src/core/hle/service/nvnflinger/buffer_queue_core.cpp +++ b/src/core/hle/service/nvnflinger/buffer_queue_core.cpp @@ -47,7 +47,7 @@ s32 BufferQueueCore::GetMinMaxBufferCountLocked(bool async) const { s32 BufferQueueCore::GetMaxBufferCountLocked(bool async) const { const auto min_buffer_count = GetMinMaxBufferCountLocked(async); - auto max_buffer_count = std::max(default_max_buffer_count, min_buffer_count); + auto max_buffer_count = (std::max)(default_max_buffer_count, min_buffer_count); if (override_max_buffer_count != 0) { ASSERT(override_max_buffer_count >= min_buffer_count); diff --git a/src/core/hle/service/nvnflinger/buffer_queue_producer.cpp b/src/core/hle/service/nvnflinger/buffer_queue_producer.cpp index 1bb88a45fa..f9e1dba965 100644 --- a/src/core/hle/service/nvnflinger/buffer_queue_producer.cpp +++ b/src/core/hle/service/nvnflinger/buffer_queue_producer.cpp @@ -942,7 +942,7 @@ void BufferQueueProducer::Transact(u32 code, std::span parcel_data, std::scoped_lock lock{core->mutex}; - auto buffer_history_count = std::min(parcel_in.Read(), (s32)core->history.size()); + auto buffer_history_count = (std::min)(parcel_in.Read(), (s32)core->history.size()); if (buffer_history_count <= 0) { parcel_out.Write(Status::BadValue); @@ -978,7 +978,7 @@ void BufferQueueProducer::Transact(u32 code, std::span parcel_data, const auto serialized = parcel_out.Serialize(); std::memcpy(parcel_reply.data(), serialized.data(), - std::min(parcel_reply.size(), serialized.size())); + (std::min)(parcel_reply.size(), serialized.size())); } diff --git a/src/core/hle/service/nvnflinger/hardware_composer.cpp b/src/core/hle/service/nvnflinger/hardware_composer.cpp index 77622a7832..7098f4709d 100644 --- a/src/core/hle/service/nvnflinger/hardware_composer.cpp +++ b/src/core/hle/service/nvnflinger/hardware_composer.cpp @@ -101,7 +101,7 @@ u32 HardwareComposer::ComposeLocked(f32* out_speed_scale, Display& display, // only swap intervals of 0, 1 and 2 have been observed, but if 3 were // to be introduced, this would cause an issue. if (swap_interval) { - swap_interval = std::min(*swap_interval, item_swap_interval); + swap_interval = (std::min)(*swap_interval, item_swap_interval); } else { swap_interval = item_swap_interval; } diff --git a/src/core/hle/service/psc/time/common.h b/src/core/hle/service/psc/time/common.h index 954aed666a..0ad2ed51f8 100644 --- a/src/core/hle/service/psc/time/common.h +++ b/src/core/hle/service/psc/time/common.h @@ -138,12 +138,12 @@ constexpr inline std::chrono::nanoseconds ConvertToTimeSpan(s64 ticks) { std::chrono::duration_cast(std::chrono::seconds(1)).count()}; constexpr s64 max{Common::WallClock::CNTFRQ * - (std::numeric_limits::max() / one_second_ns)}; + ((std::numeric_limits::max)() / one_second_ns)}; if (ticks > max) { - return std::chrono::nanoseconds(std::numeric_limits::max()); + return std::chrono::nanoseconds((std::numeric_limits::max)()); } else if (ticks < -max) { - return std::chrono::nanoseconds(std::numeric_limits::min()); + return std::chrono::nanoseconds((std::numeric_limits::min)()); } auto a{ticks / Common::WallClock::CNTFRQ * one_second_ns}; @@ -156,9 +156,9 @@ constexpr inline Result GetSpanBetweenTimePoints(s64* out_seconds, const SteadyC const SteadyClockTimePoint& b) { R_UNLESS(out_seconds, ResultInvalidArgument); R_UNLESS(a.IdMatches(b), ResultInvalidArgument); - R_UNLESS(a.time_point >= 0 || b.time_point <= a.time_point + std::numeric_limits::max(), + R_UNLESS(a.time_point >= 0 || b.time_point <= a.time_point + (std::numeric_limits::max)(), ResultOverflow); - R_UNLESS(a.time_point < 0 || b.time_point >= a.time_point + std::numeric_limits::min(), + R_UNLESS(a.time_point < 0 || b.time_point >= a.time_point + (std::numeric_limits::min)(), ResultOverflow); *out_seconds = b.time_point - a.time_point; diff --git a/src/core/hle/service/psc/time/power_state_request_manager.cpp b/src/core/hle/service/psc/time/power_state_request_manager.cpp index 17de0bf4dd..15fe8e2918 100644 --- a/src/core/hle/service/psc/time/power_state_request_manager.cpp +++ b/src/core/hle/service/psc/time/power_state_request_manager.cpp @@ -17,7 +17,7 @@ PowerStateRequestManager::~PowerStateRequestManager() { void PowerStateRequestManager::UpdatePendingPowerStateRequestPriority(u32 priority) { std::scoped_lock l{m_mutex}; if (m_has_pending_request) { - m_pending_request_priority = std::max(m_pending_request_priority, priority); + m_pending_request_priority = (std::max)(m_pending_request_priority, priority); } else { m_pending_request_priority = priority; m_has_pending_request = true; diff --git a/src/core/hle/service/set/settings_server.cpp b/src/core/hle/service/set/settings_server.cpp index aa873bc8c5..7d1869a4e8 100644 --- a/src/core/hle/service/set/settings_server.cpp +++ b/src/core/hle/service/set/settings_server.cpp @@ -122,8 +122,8 @@ Result ISettingsServer::GetAvailableLanguageCodes( Out out_count, OutArray out_language_codes) { LOG_DEBUG(Service_SET, "called"); - const std::size_t max_amount = std::min(PRE_4_0_0_MAX_ENTRIES, out_language_codes.size()); - *out_count = static_cast(std::min(available_language_codes.size(), max_amount)); + const std::size_t max_amount = (std::min)(PRE_4_0_0_MAX_ENTRIES, out_language_codes.size()); + *out_count = static_cast((std::min)(available_language_codes.size(), max_amount)); memcpy(out_language_codes.data(), available_language_codes.data(), static_cast(*out_count) * sizeof(LanguageCode)); @@ -159,8 +159,8 @@ Result ISettingsServer::GetAvailableLanguageCodes2( Out out_count, OutArray language_codes) { LOG_DEBUG(Service_SET, "called"); - const std::size_t max_amount = std::min(POST_4_0_0_MAX_ENTRIES, language_codes.size()); - *out_count = static_cast(std::min(available_language_codes.size(), max_amount)); + const std::size_t max_amount = (std::min)(POST_4_0_0_MAX_ENTRIES, language_codes.size()); + *out_count = static_cast((std::min)(available_language_codes.size(), max_amount)); memcpy(language_codes.data(), available_language_codes.data(), static_cast(*out_count) * sizeof(LanguageCode)); @@ -233,7 +233,7 @@ Result ISettingsServer::GetDeviceNickName( LOG_DEBUG(Service_SET, "called"); const std::size_t string_size = - std::min(Settings::values.device_name.GetValue().size(), out_device_name->size()); + (std::min)(Settings::values.device_name.GetValue().size(), out_device_name->size()); *out_device_name = {}; memcpy(out_device_name->data(), Settings::values.device_name.GetValue().data(), string_size); diff --git a/src/core/hle/service/set/system_settings_server.cpp b/src/core/hle/service/set/system_settings_server.cpp index d246b95d0e..c70fdea24b 100644 --- a/src/core/hle/service/set/system_settings_server.cpp +++ b/src/core/hle/service/set/system_settings_server.cpp @@ -533,7 +533,7 @@ Result ISystemSettingsServer::GetEulaVersions( LOG_INFO(Service_SET, "called, elements={}", m_system_settings.eula_version_count); *out_count = - std::min(m_system_settings.eula_version_count, static_cast(out_eula_versions.size())); + (std::min)(m_system_settings.eula_version_count, static_cast(out_eula_versions.size())); memcpy(out_eula_versions.data(), m_system_settings.eula_versions.data(), static_cast(*out_count) * sizeof(EulaVersion)); R_SUCCEED(); @@ -599,7 +599,7 @@ Result ISystemSettingsServer::GetAccountNotificationSettings( LOG_INFO(Service_SET, "called, elements={}", m_system_settings.account_notification_settings_count); - *out_count = std::min(m_system_settings.account_notification_settings_count, + *out_count = (std::min)(m_system_settings.account_notification_settings_count, static_cast(out_account_notification_settings.size())); memcpy(out_account_notification_settings.data(), m_system_settings.account_notification_settings.data(), diff --git a/src/core/hle/service/sm/sm_controller.cpp b/src/core/hle/service/sm/sm_controller.cpp index 9e25eae4d4..3b63d162c4 100644 --- a/src/core/hle/service/sm/sm_controller.cpp +++ b/src/core/hle/service/sm/sm_controller.cpp @@ -74,9 +74,9 @@ void Controller::QueryPointerBufferSize(HLERequestContext& ctx) { ASSERT(process != nullptr); u32 buffer_size = process->GetPointerBufferSize(); - if (buffer_size > std::numeric_limits::max()) { + if (buffer_size > (std::numeric_limits::max)()) { LOG_WARNING(Service, "Pointer buffer size exceeds u16 max, clamping"); - buffer_size = std::numeric_limits::max(); + buffer_size = (std::numeric_limits::max)(); } IPC::ResponseBuilder rb{ctx, 3}; @@ -94,9 +94,9 @@ void Controller::SetPointerBufferSize(HLERequestContext& ctx) { u32 requested_size = rp.PopRaw(); - if (requested_size > std::numeric_limits::max()) { + if (requested_size > (std::numeric_limits::max)()) { LOG_WARNING(Service, "Requested pointer buffer size too large, clamping to 0xFFFF"); - requested_size = std::numeric_limits::max(); + requested_size = (std::numeric_limits::max)(); } process->SetPointerBufferSize(requested_size); diff --git a/src/core/hle/service/sockets/bsd.cpp b/src/core/hle/service/sockets/bsd.cpp index fffbc413bb..a31bf45238 100644 --- a/src/core/hle/service/sockets/bsd.cpp +++ b/src/core/hle/service/sockets/bsd.cpp @@ -45,13 +45,13 @@ bool IsConnectionBased(Type type) { template T GetValue(std::span buffer) { T t{}; - std::memcpy(&t, buffer.data(), std::min(sizeof(T), buffer.size())); + std::memcpy(&t, buffer.data(), (std::min)(sizeof(T), buffer.size())); return t; } template void PutValue(std::span buffer, const T& t) { - std::memcpy(buffer.data(), &t, std::min(sizeof(T), buffer.size())); + std::memcpy(buffer.data(), &t, (std::min)(sizeof(T), buffer.size())); } } // Anonymous namespace diff --git a/src/core/hle/service/spl/spl_module.cpp b/src/core/hle/service/spl/spl_module.cpp index 549e6f4fa8..f59eeac06c 100644 --- a/src/core/hle/service/spl/spl_module.cpp +++ b/src/core/hle/service/spl/spl_module.cpp @@ -68,7 +68,7 @@ void Module::Interface::GenerateRandomBytes(HLERequestContext& ctx) { const std::size_t size = ctx.GetWriteBufferSize(); - std::uniform_int_distribution distribution(0, std::numeric_limits::max()); + std::uniform_int_distribution distribution(0, (std::numeric_limits::max)()); std::vector data(size); std::generate(data.begin(), data.end(), [&] { return static_cast(distribution(rng)); }); diff --git a/src/core/hle/service/ssl/ssl.cpp b/src/core/hle/service/ssl/ssl.cpp index 2d10bd04d2..7720c93d5a 100644 --- a/src/core/hle/service/ssl/ssl.cpp +++ b/src/core/hle/service/ssl/ssl.cpp @@ -445,7 +445,7 @@ private: void GetNextAlpnProto(HLERequestContext& ctx) { const size_t writable = ctx.GetWriteBufferSize(); - const size_t to_write = std::min(next_alpn_proto.size(), writable); + const size_t to_write = (std::min)(next_alpn_proto.size(), writable); if (to_write != 0) { ctx.WriteBuffer(std::span(next_alpn_proto.data(), to_write)); diff --git a/src/core/hle/service/vi/application_display_service.cpp b/src/core/hle/service/vi/application_display_service.cpp index 289ad7073c..cd28b81dc1 100644 --- a/src/core/hle/service/vi/application_display_service.cpp +++ b/src/core/hle/service/vi/application_display_service.cpp @@ -192,7 +192,7 @@ Result IApplicationDisplayService::OpenLayer(Out out_size, const auto buffer = parcel.Serialize(); std::memcpy(out_native_window.data(), buffer.data(), - std::min(out_native_window.size(), buffer.size())); + (std::min)(out_native_window.size(), buffer.size())); *out_size = buffer.size(); R_SUCCEED(); @@ -226,7 +226,7 @@ Result IApplicationDisplayService::CreateStrayLayer( const auto buffer = parcel.Serialize(); std::memcpy(out_native_window.data(), buffer.data(), - std::min(out_native_window.size(), buffer.size())); + (std::min)(out_native_window.size(), buffer.size())); *out_size = buffer.size(); diff --git a/src/core/internal_network/network.cpp b/src/core/internal_network/network.cpp index 6a3c6e9c41..400bd04bdf 100644 --- a/src/core/internal_network/network.cpp +++ b/src/core/internal_network/network.cpp @@ -105,7 +105,7 @@ sockaddr TranslateFromSockAddrIn(SockAddrIn input) { } LINGER MakeLinger(bool enable, u32 linger_value) { - ASSERT(linger_value <= std::numeric_limits::max()); + ASSERT(linger_value <= (std::numeric_limits::max)()); LINGER value; value.l_onoff = enable ? 1 : 0; @@ -798,7 +798,7 @@ Errno Socket::Shutdown(ShutdownHow how) { std::pair Socket::Recv(int flags, std::span message) { ASSERT(flags == 0); - ASSERT(message.size() < static_cast(std::numeric_limits::max())); + ASSERT(message.size() < static_cast((std::numeric_limits::max)())); const auto result = recv(fd, reinterpret_cast(message.data()), static_cast(message.size()), 0); @@ -811,7 +811,7 @@ std::pair Socket::Recv(int flags, std::span message) { std::pair Socket::RecvFrom(int flags, std::span message, SockAddrIn* addr) { ASSERT(flags == 0); - ASSERT(message.size() < static_cast(std::numeric_limits::max())); + ASSERT(message.size() < static_cast((std::numeric_limits::max)())); sockaddr_in addr_in{}; socklen_t addrlen = sizeof(addr_in); @@ -831,7 +831,7 @@ std::pair Socket::RecvFrom(int flags, std::span message, SockAdd } std::pair Socket::Send(std::span message, int flags) { - ASSERT(message.size() < static_cast(std::numeric_limits::max())); + ASSERT(message.size() < static_cast((std::numeric_limits::max)())); ASSERT(flags == 0); int native_flags = 0; diff --git a/src/core/internal_network/network_interface.cpp b/src/core/internal_network/network_interface.cpp index f62381b9e3..ae9755113a 100644 --- a/src/core/internal_network/network_interface.cpp +++ b/src/core/internal_network/network_interface.cpp @@ -147,7 +147,7 @@ std::vector GetAvailableNetworkInterfaces() { } // ignore header - file.ignore(std::numeric_limits::max(), '\n'); + file.ignore((std::numeric_limits::max)(), '\n'); bool gateway_found = false; diff --git a/src/core/internal_network/socket_proxy.cpp b/src/core/internal_network/socket_proxy.cpp index c263fb4ca8..1600c061f0 100644 --- a/src/core/internal_network/socket_proxy.cpp +++ b/src/core/internal_network/socket_proxy.cpp @@ -105,14 +105,14 @@ Errno ProxySocket::Shutdown(ShutdownHow how) { std::pair ProxySocket::Recv(int flags, std::span message) { LOG_WARNING(Network, "(STUBBED) called"); ASSERT(flags == 0); - ASSERT(message.size() < static_cast(std::numeric_limits::max())); + ASSERT(message.size() < static_cast((std::numeric_limits::max)())); return {static_cast(0), Errno::SUCCESS}; } std::pair ProxySocket::RecvFrom(int flags, std::span message, SockAddrIn* addr) { ASSERT(flags == 0); - ASSERT(message.size() < static_cast(std::numeric_limits::max())); + ASSERT(message.size() < static_cast((std::numeric_limits::max)())); // TODO (flTobi): Verify the timeout behavior and break when connection is lost const auto timestamp = std::chrono::steady_clock::now(); @@ -183,7 +183,7 @@ std::pair ProxySocket::ReceivePacket(int flags, std::span messag std::pair ProxySocket::Send(std::span message, int flags) { LOG_WARNING(Network, "(STUBBED) called"); - ASSERT(message.size() < static_cast(std::numeric_limits::max())); + ASSERT(message.size() < static_cast((std::numeric_limits::max)())); ASSERT(flags == 0); return {static_cast(0), Errno::SUCCESS}; diff --git a/src/core/loader/nca.cpp b/src/core/loader/nca.cpp index 4a87ab53e7..9a82dae144 100644 --- a/src/core/loader/nca.cpp +++ b/src/core/loader/nca.cpp @@ -164,7 +164,7 @@ ResultStatus AppLoader_NCA::VerifyIntegrity(std::function // Begin iterating the file. while (processed_size < total_size) { // Refill the buffer. - const size_t intended_read_size = std::min(buffer.size(), total_size - processed_size); + const size_t intended_read_size = (std::min)(buffer.size(), total_size - processed_size); const size_t read_size = file->Read(buffer.data(), intended_read_size, processed_size); // Update the hash function with the buffer contents. diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 0035c626e2..2583aae867 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -48,7 +48,7 @@ struct Memory::Impl { explicit Impl(Core::System& system_) : system{system_} { // Initialize thread count based on available cores for parallel memory operations const unsigned int hw_concurrency = std::thread::hardware_concurrency(); - thread_count = std::max(2u, std::min(hw_concurrency, 8u)); // Limit to 8 threads max + thread_count = (std::max)(2u, (std::min)(hw_concurrency, 8u)); // Limit to 8 threads max } void SetCurrentPageTable(Kernel::KProcess& process) { @@ -263,7 +263,7 @@ struct Memory::Impl { while (remaining_size) { const std::size_t copy_amount = - std::min(static_cast(YUZU_PAGESIZE) - page_offset, remaining_size); + (std::min)(static_cast(YUZU_PAGESIZE) - page_offset, remaining_size); const auto current_vaddr = static_cast((page_index << YUZU_PAGEBITS) + page_offset); @@ -948,7 +948,7 @@ struct Memory::Impl { const auto* p = GetPointerImpl( v_address, []() {}, []() {}); constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1; - const size_t core = std::min(system.GetCurrentHostThreadID(), + const size_t core = (std::min)(system.GetCurrentHostThreadID(), sys_core); // any other calls threads go to syscore. if (!gpu_device_memory) [[unlikely]] { gpu_device_memory = &system.Host1x().MemoryManager(); @@ -989,7 +989,7 @@ struct Memory::Impl { void InvalidateGPUMemory(u8* p, size_t size) { constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1; - const size_t core = std::min(system.GetCurrentHostThreadID(), + const size_t core = (std::min)(system.GetCurrentHostThreadID(), sys_core); // any other calls threads go to syscore. if (!gpu_device_memory) [[unlikely]] { gpu_device_memory = &system.Host1x().MemoryManager(); diff --git a/src/core/tools/renderdoc.cpp b/src/core/tools/renderdoc.cpp index 947fa6cb37..d3a47e1d96 100644 --- a/src/core/tools/renderdoc.cpp +++ b/src/core/tools/renderdoc.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -18,10 +21,12 @@ namespace Tools { RenderdocAPI::RenderdocAPI() { #ifdef WIN32 if (HMODULE mod = GetModuleHandleA("renderdoc.dll")) { - const auto RENDERDOC_GetAPI = - reinterpret_cast(GetProcAddress(mod, "RENDERDOC_GetAPI")); - const s32 ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_6_0, (void**)&rdoc_api); - ASSERT(ret == 1); + void* proc = reinterpret_cast(GetProcAddress(mod, "RENDERDOC_GetAPI")); + if (proc) { + const auto RENDERDOC_GetAPI = reinterpret_cast(proc); + const s32 ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_6_0, (void**)&rdoc_api); + ASSERT(ret == 1); + } } #else #ifdef ANDROID diff --git a/src/dynarmic/CMakeLists.txt b/src/dynarmic/CMakeLists.txt index 0065b1cf7f..38457deb50 100644 --- a/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/CMakeLists.txt @@ -103,7 +103,7 @@ if (MSVC) /WX) endif() - if (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang") + if (CXX_CLANG) list(APPEND DYNARMIC_CXX_FLAGS -Qunused-arguments -Wno-missing-braces) @@ -131,7 +131,7 @@ else() -Wfatal-errors) endif() - if (CMAKE_CXX_COMPILER_ID MATCHES "GNU") + if (CXX_GCC) # GCC produces bogus -Warray-bounds warnings from xbyak headers for code paths that are not # actually reachable. Specifically, it happens in cases where some code casts an Operand& # to Address& after first checking isMEM(), and that code is inlined in a situation where @@ -141,7 +141,7 @@ else() list(APPEND DYNARMIC_CXX_FLAGS -Wstack-usage=4096) endif() - if (CMAKE_CXX_COMPILER_ID MATCHES "[Cc]lang") + if (CXX_CLANG) # Bracket depth determines maximum size of a fold expression in Clang since 9c9974c3ccb6. # And this in turns limits the size of a std::array. list(APPEND DYNARMIC_CXX_FLAGS -fbracket-depth=1024) diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp index a13baa6a97..3bc93e6fd5 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp @@ -277,7 +277,7 @@ void EmitX64::EmitNZCVFromPackedFlags(EmitContext& ctx, IR::Inst* inst) { } void EmitX64::EmitAddCycles(size_t cycles) { - ASSERT(cycles < std::numeric_limits::max()); + ASSERT(cycles < (std::numeric_limits::max)()); code.sub(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], static_cast(cycles)); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp index e795181872..31231c02aa 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp @@ -38,7 +38,7 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) Xbyak::Reg addend = ctx.reg_alloc.UseGpr(args[1]).changeBit(size); Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr().changeBit(size); - constexpr u64 int_max = static_cast(std::numeric_limits>::max()); + constexpr u64 int_max = static_cast((std::numeric_limits>::max)()); if constexpr (size < 64) { code.xor_(overflow.cvt32(), overflow.cvt32()); code.bt(result.cvt32(), size - 1); @@ -82,7 +82,7 @@ void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst Xbyak::Reg op_result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(size); Xbyak::Reg addend = ctx.reg_alloc.UseScratchGpr(args[1]).changeBit(size); - constexpr u64 boundary = op == Op::Add ? std::numeric_limits>::max() : 0; + constexpr u64 boundary = op == Op::Add ? (std::numeric_limits>::max)() : 0; if constexpr (op == Op::Add) { code.add(op_result, addend); diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp index e1b9e54df8..99000c2a57 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -548,7 +548,7 @@ void EmitX64::EmitVectorArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst) void EmitX64::EmitVectorArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - const u8 shift_amount = std::min(args[1].GetImmediateU8(), u8(63)); + const u8 shift_amount = (std::min)(args[1].GetImmediateU8(), u8(63)); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { code.vpsraq(result, result, shift_amount); @@ -2139,7 +2139,7 @@ void EmitX64::EmitVectorMaxS64(EmitContext& ctx, IR::Inst* inst) { } EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::max(x, y); }); + std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return (std::max)(x, y); }); }); } @@ -2201,7 +2201,7 @@ void EmitX64::EmitVectorMaxU64(EmitContext& ctx, IR::Inst* inst) { } EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::max(x, y); }); + std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return (std::max)(x, y); }); }); } @@ -2259,7 +2259,7 @@ void EmitX64::EmitVectorMinS64(EmitContext& ctx, IR::Inst* inst) { } EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); }); + std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return (std::min)(x, y); }); }); } @@ -2321,7 +2321,7 @@ void EmitX64::EmitVectorMinU64(EmitContext& ctx, IR::Inst* inst) { } EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { - std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return std::min(x, y); }); + std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return (std::min)(x, y); }); }); } @@ -2837,22 +2837,22 @@ static void LowerPairedOperation(VectorArray& result, const VectorArray& x template static void PairedMax(VectorArray& result, const VectorArray& x, const VectorArray& y) { - PairedOperation(result, x, y, [](auto a, auto b) { return std::max(a, b); }); + PairedOperation(result, x, y, [](auto a, auto b) { return (std::max)(a, b); }); } template static void PairedMin(VectorArray& result, const VectorArray& x, const VectorArray& y) { - PairedOperation(result, x, y, [](auto a, auto b) { return std::min(a, b); }); + PairedOperation(result, x, y, [](auto a, auto b) { return (std::min)(a, b); }); } template static void LowerPairedMax(VectorArray& result, const VectorArray& x, const VectorArray& y) { - LowerPairedOperation(result, x, y, [](auto a, auto b) { return std::max(a, b); }); + LowerPairedOperation(result, x, y, [](auto a, auto b) { return (std::max)(a, b); }); } template static void LowerPairedMin(VectorArray& result, const VectorArray& x, const VectorArray& y) { - LowerPairedOperation(result, x, y, [](auto a, auto b) { return std::min(a, b); }); + LowerPairedOperation(result, x, y, [](auto a, auto b) { return (std::min)(a, b); }); } template @@ -4933,7 +4933,7 @@ static bool VectorSignedSaturatedShiftLeft(VectorArray& dst, const VectorArra for (size_t i = 0; i < dst.size(); i++) { const T element = data[i]; const T shift = std::clamp(static_cast(mcl::bit::sign_extend<8>(static_cast(shift_values[i] & 0xFF))), - -static_cast(bit_size_minus_one), std::numeric_limits::max()); + -static_cast(bit_size_minus_one), (std::numeric_limits::max)()); if (element == 0) { dst[i] = 0; @@ -4995,7 +4995,7 @@ static bool VectorSignedSaturatedShiftLeftUnsigned(VectorArray& dst, const Ve const U shifted_test = shifted >> static_cast(shift); if (shifted_test != static_cast(element)) { - dst[i] = static_cast(std::numeric_limits::max()); + dst[i] = static_cast((std::numeric_limits::max)()); qc_flag = true; } else { dst[i] = shifted; @@ -5845,11 +5845,11 @@ static bool EmitVectorUnsignedSaturatedAccumulateSigned(VectorArray& result, const s64 y = static_cast(static_cast>(rhs[i])); const s64 sum = x + y; - if (sum > std::numeric_limits::max()) { - result[i] = std::numeric_limits::max(); + if (sum > (std::numeric_limits::max)()) { + result[i] = (std::numeric_limits::max)(); qc_flag = true; } else if (sum < 0) { - result[i] = std::numeric_limits::min(); + result[i] = (std::numeric_limits::min)(); qc_flag = true; } else { result[i] = static_cast(sum); @@ -5947,20 +5947,20 @@ static bool VectorUnsignedSaturatedShiftLeft(VectorArray& dst, const VectorAr for (size_t i = 0; i < dst.size(); i++) { const T element = data[i]; const S shift = std::clamp(static_cast(mcl::bit::sign_extend<8>(static_cast(shift_values[i] & 0xFF))), - negative_bit_size, std::numeric_limits::max()); + negative_bit_size, (std::numeric_limits::max)()); if (element == 0 || shift <= negative_bit_size) { dst[i] = 0; } else if (shift < 0) { dst[i] = static_cast(element >> -shift); } else if (shift >= static_cast(bit_size)) { - dst[i] = std::numeric_limits::max(); + dst[i] = (std::numeric_limits::max)(); qc_flag = true; } else { const T shifted = element << shift; if ((shifted >> shift) != element) { - dst[i] = std::numeric_limits::max(); + dst[i] = (std::numeric_limits::max)(); qc_flag = true; } else { dst[i] = shifted; diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp index c8f0d9575c..a368e6703f 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp @@ -2116,7 +2116,7 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { } } else { using FPT = mcl::unsigned_integer_of_size; // WORKAROUND: For issue 678 on MSVC - constexpr u64 integer_max = static_cast(std::numeric_limits>>::max()); + constexpr u64 integer_max = static_cast((std::numeric_limits>>::max)()); code.movaps(xmm0, GetVectorOf(code)); FCODE(cmplep)(xmm0, src); diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index fa6006ed2a..29eab7908b 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -85,7 +85,7 @@ void HostLocInfo::ReleaseOne() noexcept { if (current_references == 0) return; - ASSERT(size_t(accumulated_uses) + 1 < std::numeric_limits::max()); + ASSERT(size_t(accumulated_uses) + 1 < (std::numeric_limits::max)()); accumulated_uses++; current_references--; @@ -116,7 +116,7 @@ void HostLocInfo::AddValue(IR::Inst* inst) noexcept { values.clear(); } values.push_back(inst); - ASSERT(size_t(total_uses) + inst->UseCount() < std::numeric_limits::max()); + ASSERT(size_t(total_uses) + inst->UseCount() < (std::numeric_limits::max)()); total_uses += inst->UseCount(); max_bit_width = std::max(max_bit_width, GetBitWidth(inst->GetType())); } @@ -400,14 +400,14 @@ void RegAlloc::HostCall(IR::Inst* result_def, } void RegAlloc::AllocStackSpace(const size_t stack_space) noexcept { - ASSERT(stack_space < size_t(std::numeric_limits::max())); + ASSERT(stack_space < size_t((std::numeric_limits::max)())); ASSERT(reserved_stack_space == 0); reserved_stack_space = stack_space; code->sub(code->rsp, u32(stack_space)); } void RegAlloc::ReleaseStackSpace(const size_t stack_space) noexcept { - ASSERT(stack_space < size_t(std::numeric_limits::max())); + ASSERT(stack_space < size_t((std::numeric_limits::max)())); ASSERT(reserved_stack_space == stack_space); reserved_stack_space = 0; code->add(code->rsp, u32(stack_space)); diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index f70329f471..bfacdcca52 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -52,18 +52,18 @@ public: is_set_last_use = true; } inline void ReadLock() noexcept { - ASSERT(size_t(is_being_used_count) + 1 < std::numeric_limits::max()); + ASSERT(size_t(is_being_used_count) + 1 < (std::numeric_limits::max)()); ASSERT(!is_scratch); is_being_used_count++; } inline void WriteLock() noexcept { - ASSERT(size_t(is_being_used_count) + 1 < std::numeric_limits::max()); + ASSERT(size_t(is_being_used_count) + 1 < (std::numeric_limits::max)()); ASSERT(is_being_used_count == 0); is_being_used_count++; is_scratch = true; } inline void AddArgReference() noexcept { - ASSERT(size_t(current_references) + 1 < std::numeric_limits::max()); + ASSERT(size_t(current_references) + 1 < (std::numeric_limits::max)()); current_references++; ASSERT(accumulated_uses + current_references <= total_uses); } diff --git a/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h b/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h index fdadef8257..3f4501a528 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h @@ -106,6 +106,7 @@ inline size_t RegNumber(ExtReg reg) { } ASSERT_MSG(false, "Invalid extended register"); + return 0; } inline Reg operator+(Reg reg, size_t number) { diff --git a/src/hid_core/frontend/emulated_controller.cpp b/src/hid_core/frontend/emulated_controller.cpp index 5bdad4a0ad..8a6922c49f 100644 --- a/src/hid_core/frontend/emulated_controller.cpp +++ b/src/hid_core/frontend/emulated_controller.cpp @@ -1308,9 +1308,9 @@ bool EmulatedController::SetVibration(DeviceIndex device_index, const VibrationV : Common::Input::VibrationAmplificationType::Linear; const Common::Input::VibrationStatus status = { - .low_amplitude = std::min(vibration.low_amplitude * strength, 1.0f), + .low_amplitude = (std::min)(vibration.low_amplitude * strength, 1.0f), .low_frequency = vibration.low_frequency, - .high_amplitude = std::min(vibration.high_amplitude * strength, 1.0f), + .high_amplitude = (std::min)(vibration.high_amplitude * strength, 1.0f), .high_frequency = vibration.high_frequency, .type = type, }; diff --git a/src/hid_core/hidbus/ringcon.cpp b/src/hid_core/hidbus/ringcon.cpp index 4f5eaa5053..a2bfd82636 100644 --- a/src/hid_core/hidbus/ringcon.cpp +++ b/src/hid_core/hidbus/ringcon.cpp @@ -283,7 +283,7 @@ u8 RingController::GetCrcValue(const std::vector& data) const { template u64 RingController::GetData(const T& reply, std::span out_data) const { static_assert(std::is_trivially_copyable_v); - const auto data_size = static_cast(std::min(sizeof(reply), out_data.size())); + const auto data_size = static_cast((std::min)(sizeof(reply), out_data.size())); std::memcpy(out_data.data(), &reply, data_size); return data_size; } diff --git a/src/hid_core/irsensor/image_transfer_processor.cpp b/src/hid_core/irsensor/image_transfer_processor.cpp index 2b5a50ef6f..9040390946 100644 --- a/src/hid_core/irsensor/image_transfer_processor.cpp +++ b/src/hid_core/irsensor/image_transfer_processor.cpp @@ -146,7 +146,7 @@ void ImageTransferProcessor::SetTransferMemoryAddress(Common::ProcessAddress t_m Core::IrSensor::ImageTransferProcessorState ImageTransferProcessor::GetState( std::span data) const { - const auto size = std::min(GetDataSize(current_config.trimming_format), data.size()); + const auto size = (std::min)(GetDataSize(current_config.trimming_format), data.size()); system.ApplicationMemory().ReadBlock(transfer_memory, data.data(), size); return processor_state; } diff --git a/src/hid_core/resources/abstracted_pad/abstract_battery_handler.cpp b/src/hid_core/resources/abstracted_pad/abstract_battery_handler.cpp index 62fbbb0a7e..b3e17b389d 100644 --- a/src/hid_core/resources/abstracted_pad/abstract_battery_handler.cpp +++ b/src/hid_core/resources/abstracted_pad/abstract_battery_handler.cpp @@ -30,7 +30,7 @@ void NpadAbstractBatteryHandler::SetPropertiesHandler(NpadAbstractPropertiesHand } Result NpadAbstractBatteryHandler::IncrementRefCounter() { - if (ref_counter == std::numeric_limits::max() - 1) { + if (ref_counter == (std::numeric_limits::max)() - 1) { return ResultNpadHandlerOverflow; } ref_counter++; diff --git a/src/hid_core/resources/abstracted_pad/abstract_button_handler.cpp b/src/hid_core/resources/abstracted_pad/abstract_button_handler.cpp index 5871694335..e4166b3735 100644 --- a/src/hid_core/resources/abstracted_pad/abstract_button_handler.cpp +++ b/src/hid_core/resources/abstracted_pad/abstract_button_handler.cpp @@ -30,7 +30,7 @@ void NpadAbstractButtonHandler::SetPropertiesHandler(NpadAbstractPropertiesHandl } Result NpadAbstractButtonHandler::IncrementRefCounter() { - if (ref_counter == std::numeric_limits::max() - 1) { + if (ref_counter == (std::numeric_limits::max)() - 1) { return ResultNpadHandlerOverflow; } ref_counter++; diff --git a/src/hid_core/resources/abstracted_pad/abstract_ir_sensor_handler.cpp b/src/hid_core/resources/abstracted_pad/abstract_ir_sensor_handler.cpp index e399edfd70..4367dcaa56 100644 --- a/src/hid_core/resources/abstracted_pad/abstract_ir_sensor_handler.cpp +++ b/src/hid_core/resources/abstracted_pad/abstract_ir_sensor_handler.cpp @@ -24,7 +24,7 @@ void NpadAbstractIrSensorHandler::SetPropertiesHandler(NpadAbstractPropertiesHan } Result NpadAbstractIrSensorHandler::IncrementRefCounter() { - if (ref_counter == std::numeric_limits::max() - 1) { + if (ref_counter == (std::numeric_limits::max)() - 1) { return ResultNpadHandlerOverflow; } ref_counter++; diff --git a/src/hid_core/resources/abstracted_pad/abstract_led_handler.cpp b/src/hid_core/resources/abstracted_pad/abstract_led_handler.cpp index 0b2bfe88da..b4375e57f3 100644 --- a/src/hid_core/resources/abstracted_pad/abstract_led_handler.cpp +++ b/src/hid_core/resources/abstracted_pad/abstract_led_handler.cpp @@ -29,7 +29,7 @@ void NpadAbstractLedHandler::SetPropertiesHandler(NpadAbstractPropertiesHandler* } Result NpadAbstractLedHandler::IncrementRefCounter() { - if (ref_counter == std::numeric_limits::max() - 1) { + if (ref_counter == (std::numeric_limits::max)() - 1) { return ResultNpadHandlerOverflow; } ref_counter++; diff --git a/src/hid_core/resources/abstracted_pad/abstract_mcu_handler.cpp b/src/hid_core/resources/abstracted_pad/abstract_mcu_handler.cpp index 6f35bd95cc..accbfe0def 100644 --- a/src/hid_core/resources/abstracted_pad/abstract_mcu_handler.cpp +++ b/src/hid_core/resources/abstracted_pad/abstract_mcu_handler.cpp @@ -22,7 +22,7 @@ void NpadAbstractMcuHandler::SetPropertiesHandler(NpadAbstractPropertiesHandler* } Result NpadAbstractMcuHandler::IncrementRefCounter() { - if (ref_counter == std::numeric_limits::max() - 1) { + if (ref_counter == (std::numeric_limits::max)() - 1) { return ResultNpadHandlerOverflow; } ref_counter++; diff --git a/src/hid_core/resources/abstracted_pad/abstract_nfc_handler.cpp b/src/hid_core/resources/abstracted_pad/abstract_nfc_handler.cpp index bd9b79333c..7a47786d42 100644 --- a/src/hid_core/resources/abstracted_pad/abstract_nfc_handler.cpp +++ b/src/hid_core/resources/abstracted_pad/abstract_nfc_handler.cpp @@ -24,7 +24,7 @@ void NpadAbstractNfcHandler::SetPropertiesHandler(NpadAbstractPropertiesHandler* } Result NpadAbstractNfcHandler::IncrementRefCounter() { - if (ref_counter == std::numeric_limits::max() - 1) { + if (ref_counter == (std::numeric_limits::max)() - 1) { return ResultNpadHandlerOverflow; } ref_counter++; diff --git a/src/hid_core/resources/abstracted_pad/abstract_pad.cpp b/src/hid_core/resources/abstracted_pad/abstract_pad.cpp index 435b095f02..39906fe33f 100644 --- a/src/hid_core/resources/abstracted_pad/abstract_pad.cpp +++ b/src/hid_core/resources/abstracted_pad/abstract_pad.cpp @@ -68,7 +68,7 @@ void AbstractPad::SetNpadId(Core::HID::NpadIdType npad_id) { } Result AbstractPad::Activate() { - if (ref_counter == std::numeric_limits::max() - 1) { + if (ref_counter == (std::numeric_limits::max)() - 1) { return ResultNpadHandlerOverflow; } diff --git a/src/hid_core/resources/abstracted_pad/abstract_pad_holder.cpp b/src/hid_core/resources/abstracted_pad/abstract_pad_holder.cpp index 8334dc34f6..80f86459b9 100644 --- a/src/hid_core/resources/abstracted_pad/abstract_pad_holder.cpp +++ b/src/hid_core/resources/abstracted_pad/abstract_pad_holder.cpp @@ -73,7 +73,7 @@ u64 NpadAbstractedPadHolder::RemoveAbstractPadByAssignmentStyle( } u32 NpadAbstractedPadHolder::GetAbstractedPads(std::span list) const { - u32 num_elements = std::min(static_cast(list.size()), list_size); + u32 num_elements = (std::min)(static_cast(list.size()), list_size); for (std::size_t i = 0; i < num_elements; i++) { list[i] = assignment_list[i].abstracted_pad; } diff --git a/src/hid_core/resources/abstracted_pad/abstract_palma_handler.cpp b/src/hid_core/resources/abstracted_pad/abstract_palma_handler.cpp index 04d276d617..c10d0c4070 100644 --- a/src/hid_core/resources/abstracted_pad/abstract_palma_handler.cpp +++ b/src/hid_core/resources/abstracted_pad/abstract_palma_handler.cpp @@ -25,7 +25,7 @@ void NpadAbstractPalmaHandler::SetPalmaResource(PalmaResource* resource) { } Result NpadAbstractPalmaHandler::IncrementRefCounter() { - if (ref_counter == std::numeric_limits::max() - 1) { + if (ref_counter == (std::numeric_limits::max)() - 1) { return ResultNpadHandlerOverflow; } ref_counter++; diff --git a/src/hid_core/resources/abstracted_pad/abstract_properties_handler.cpp b/src/hid_core/resources/abstracted_pad/abstract_properties_handler.cpp index 36b630c7f4..90c46cbe8c 100644 --- a/src/hid_core/resources/abstracted_pad/abstract_properties_handler.cpp +++ b/src/hid_core/resources/abstracted_pad/abstract_properties_handler.cpp @@ -38,7 +38,7 @@ Core::HID::NpadIdType NpadAbstractPropertiesHandler::GetNpadId() const { } Result NpadAbstractPropertiesHandler::IncrementRefCounter() { - if (ref_counter == std::numeric_limits::max() - 1) { + if (ref_counter == (std::numeric_limits::max)() - 1) { return ResultNpadHandlerOverflow; } diff --git a/src/hid_core/resources/abstracted_pad/abstract_sixaxis_handler.cpp b/src/hid_core/resources/abstracted_pad/abstract_sixaxis_handler.cpp index 0dde244ef8..10c00ef95c 100644 --- a/src/hid_core/resources/abstracted_pad/abstract_sixaxis_handler.cpp +++ b/src/hid_core/resources/abstracted_pad/abstract_sixaxis_handler.cpp @@ -33,7 +33,7 @@ void NpadAbstractSixAxisHandler::SetSixaxisResource(SixAxisResource* resource) { } Result NpadAbstractSixAxisHandler::IncrementRefCounter() { - if (ref_counter == std::numeric_limits::max() - 1) { + if (ref_counter == (std::numeric_limits::max)() - 1) { return ResultNpadHandlerOverflow; } ref_counter++; diff --git a/src/hid_core/resources/abstracted_pad/abstract_vibration_handler.cpp b/src/hid_core/resources/abstracted_pad/abstract_vibration_handler.cpp index ca64b0a437..07a35b2147 100644 --- a/src/hid_core/resources/abstracted_pad/abstract_vibration_handler.cpp +++ b/src/hid_core/resources/abstracted_pad/abstract_vibration_handler.cpp @@ -55,7 +55,7 @@ void NpadAbstractVibrationHandler::SetGcVibration(NpadGcVibrationDevice* gc_devi } Result NpadAbstractVibrationHandler::IncrementRefCounter() { - if (ref_counter == std::numeric_limits::max() - 1) { + if (ref_counter == (std::numeric_limits::max)() - 1) { return ResultNpadHandlerOverflow; } ref_counter++; diff --git a/src/hid_core/resources/applet_resource.cpp b/src/hid_core/resources/applet_resource.cpp index 243beb1c7f..a533ca4319 100644 --- a/src/hid_core/resources/applet_resource.cpp +++ b/src/hid_core/resources/applet_resource.cpp @@ -271,7 +271,7 @@ void AppletResource::EnablePalmaBoostMode(u64 aruid, bool is_enabled) { } Result AppletResource::RegisterCoreAppletResource() { - if (ref_counter == std::numeric_limits::max() - 1) { + if (ref_counter == (std::numeric_limits::max)() - 1) { return ResultAppletResourceOverflow; } if (ref_counter == 0) { diff --git a/src/hid_core/resources/npad/npad.cpp b/src/hid_core/resources/npad/npad.cpp index ca1ccd659c..f1f5ee5e9f 100644 --- a/src/hid_core/resources/npad/npad.cpp +++ b/src/hid_core/resources/npad/npad.cpp @@ -55,7 +55,7 @@ NPad::~NPad() { } Result NPad::Activate() { - if (ref_counter == std::numeric_limits::max() - 1) { + if (ref_counter == (std::numeric_limits::max)() - 1) { return ResultNpadResourceOverflow; } diff --git a/src/hid_core/resources/npad/npad_data.cpp b/src/hid_core/resources/npad/npad_data.cpp index 29ad5cb08c..fbcc3ef89a 100644 --- a/src/hid_core/resources/npad/npad_data.cpp +++ b/src/hid_core/resources/npad/npad_data.cpp @@ -46,7 +46,7 @@ Result NPadData::SetSupportedNpadIdType(std::span l } std::size_t NPadData::GetSupportedNpadIdType(std::span out_list) const { - std::size_t out_size = std::min(supported_npad_id_types_count, out_list.size()); + std::size_t out_size = (std::min)(supported_npad_id_types_count, out_list.size()); memcpy(out_list.data(), supported_npad_id_types.data(), out_size * sizeof(Core::HID::NpadIdType)); diff --git a/src/hid_core/resources/npad/npad_resource.cpp b/src/hid_core/resources/npad/npad_resource.cpp index 79f7d74c0c..21a514dce6 100644 --- a/src/hid_core/resources/npad/npad_resource.cpp +++ b/src/hid_core/resources/npad/npad_resource.cpp @@ -126,7 +126,7 @@ Result NPadResource::Activate(u64 aruid) { } Result NPadResource::Activate() { - if (ref_counter == std::numeric_limits::max() - 1) { + if (ref_counter == (std::numeric_limits::max)() - 1) { return ResultAppletResourceOverflow; } if (ref_counter == 0) { diff --git a/src/hid_core/resources/palma/palma.cpp b/src/hid_core/resources/palma/palma.cpp index be3d3c0edd..9210b456e2 100644 --- a/src/hid_core/resources/palma/palma.cpp +++ b/src/hid_core/resources/palma/palma.cpp @@ -62,7 +62,7 @@ Result Palma::GetPalmaOperationInfo(const PalmaConnectionHandle& handle, } operation_type = static_cast(operation.operation); std::memcpy(out_data.data(), operation.data.data(), - std::min(out_data.size(), operation.data.size())); + (std::min)(out_data.size(), operation.data.size())); return ResultSuccess; } diff --git a/src/hid_core/resources/touch_screen/gesture_handler.cpp b/src/hid_core/resources/touch_screen/gesture_handler.cpp index 4fcaf6ecf1..6309019796 100644 --- a/src/hid_core/resources/touch_screen/gesture_handler.cpp +++ b/src/hid_core/resources/touch_screen/gesture_handler.cpp @@ -16,7 +16,7 @@ GestureHandler::~GestureHandler() {} void GestureHandler::SetTouchState(std::span touch_state, u32 count, s64 timestamp) { gesture = {}; - gesture.active_points = std::min(MaxPoints, static_cast(count)); + gesture.active_points = (std::min)(MaxPoints, static_cast(count)); for (size_t id = 0; id < gesture.active_points; ++id) { const auto& [active_x, active_y] = touch_state[id].position; diff --git a/src/hid_core/resources/touch_screen/touch_screen_resource.cpp b/src/hid_core/resources/touch_screen/touch_screen_resource.cpp index 79ddaa4dfa..51b94b2466 100644 --- a/src/hid_core/resources/touch_screen/touch_screen_resource.cpp +++ b/src/hid_core/resources/touch_screen/touch_screen_resource.cpp @@ -25,8 +25,8 @@ TouchResource::~TouchResource() { }; Result TouchResource::ActivateTouch() { - if (global_ref_counter == std::numeric_limits::max() - 1 || - touch_ref_counter == std::numeric_limits::max() - 1) { + if (global_ref_counter == (std::numeric_limits::max)() - 1 || + touch_ref_counter == (std::numeric_limits::max)() - 1) { return ResultTouchOverflow; } @@ -91,8 +91,8 @@ Result TouchResource::ActivateTouch(u64 aruid) { } Result TouchResource::ActivateGesture() { - if (global_ref_counter == std::numeric_limits::max() - 1 || - gesture_ref_counter == std::numeric_limits::max() - 1) { + if (global_ref_counter == (std::numeric_limits::max)() - 1 || + gesture_ref_counter == (std::numeric_limits::max)() - 1) { return ResultGestureOverflow; } diff --git a/src/input_common/drivers/mouse.cpp b/src/input_common/drivers/mouse.cpp index 4af2dd36f5..34bf877bf5 100644 --- a/src/input_common/drivers/mouse.cpp +++ b/src/input_common/drivers/mouse.cpp @@ -102,11 +102,11 @@ void Mouse::UpdateStickInput() { SetAxis(identifier, mouse_axis_y, -last_mouse_change.y); // Decay input over time - const float clamped_length = std::min(1.0f, length); + const float clamped_length = (std::min)(1.0f, length); const float decay_strength = Settings::values.mouse_panning_decay_strength.GetValue(); const float decay = 1 - clamped_length * clamped_length * decay_strength * 0.01f; const float min_decay = Settings::values.mouse_panning_min_decay.GetValue(); - const float clamped_decay = std::min(1 - min_decay / 100.0f, decay); + const float clamped_decay = (std::min)(1 - min_decay / 100.0f, decay); last_mouse_change *= clamped_decay; } diff --git a/src/input_common/drivers/sdl_driver.cpp b/src/input_common/drivers/sdl_driver.cpp index 51169c6f2b..972abec9fe 100644 --- a/src/input_common/drivers/sdl_driver.cpp +++ b/src/input_common/drivers/sdl_driver.cpp @@ -120,7 +120,7 @@ public: f32 low_frequency_scale = 1.0f; if (vibration.low_frequency > low_start_sensitivity_limit) { low_frequency_scale = - std::max(1.0f - (vibration.low_frequency - low_start_sensitivity_limit) / + (std::max)(1.0f - (vibration.low_frequency - low_start_sensitivity_limit) / low_width_sensitivity_limit, 0.3f); } @@ -129,7 +129,7 @@ public: f32 high_frequency_scale = 1.0f; if (vibration.high_frequency > high_start_sensitivity_limit) { high_frequency_scale = - std::max(1.0f - (vibration.high_frequency - high_start_sensitivity_limit) / + (std::max)(1.0f - (vibration.high_frequency - high_start_sensitivity_limit) / high_width_sensitivity_limit, 0.3f); } diff --git a/src/input_common/drivers/udp_client.cpp b/src/input_common/drivers/udp_client.cpp index d483cd3490..df1819904b 100644 --- a/src/input_common/drivers/udp_client.cpp +++ b/src/input_common/drivers/udp_client.cpp @@ -615,8 +615,8 @@ CalibrationConfigurationJob::CalibrationConfigurationJob( } LOG_DEBUG(Input, "Current touch: {} {}", data.touch[0].x, data.touch[0].y); - min_x = std::min(min_x, static_cast(data.touch[0].x)); - min_y = std::min(min_y, static_cast(data.touch[0].y)); + min_x = (std::min)(min_x, static_cast(data.touch[0].x)); + min_y = (std::min)(min_y, static_cast(data.touch[0].y)); if (current_status == Status::Ready) { // First touch - min data (min_x/min_y) current_status = Status::Stage1Completed; diff --git a/src/input_common/helpers/joycon_protocol/calibration.cpp b/src/input_common/helpers/joycon_protocol/calibration.cpp index 1300ecaf53..057bf29f71 100644 --- a/src/input_common/helpers/joycon_protocol/calibration.cpp +++ b/src/input_common/helpers/joycon_protocol/calibration.cpp @@ -140,8 +140,8 @@ Common::Input::DriverResult CalibrationProtocol::GetRingCalibration(RingCalibrat ring_data_min = current_value - DefaultRingRange; ring_data_default = current_value; } - ring_data_max = std::max(ring_data_max, current_value); - ring_data_min = std::min(ring_data_min, current_value); + ring_data_max = (std::max)(ring_data_max, current_value); + ring_data_min = (std::min)(ring_data_min, current_value); calibration = { .default_value = ring_data_default, .max_value = ring_data_max, diff --git a/src/input_common/helpers/joycon_protocol/nfc.cpp b/src/input_common/helpers/joycon_protocol/nfc.cpp index db83f9ef48..bfdaa74a62 100644 --- a/src/input_common/helpers/joycon_protocol/nfc.cpp +++ b/src/input_common/helpers/joycon_protocol/nfc.cpp @@ -327,7 +327,7 @@ Common::Input::DriverResult NfcProtocol::IsTagInRange(TagFoundData& data, (output.mcu_data[6] != 0x09 && output.mcu_data[6] != 0x04)); data.type = output.mcu_data[12]; - data.uuid_size = std::min(output.mcu_data[14], static_cast(sizeof(TagUUID))); + data.uuid_size = (std::min)(output.mcu_data[14], static_cast(sizeof(TagUUID))); memcpy(data.uuid.data(), output.mcu_data.data() + 15, data.uuid.size()); return Common::Input::DriverResult::Success; @@ -433,7 +433,7 @@ Common::Input::DriverResult NfcProtocol::WriteAmiiboData(const TagUUID& tag_uuid // Send Data. Nfc buffer size is 31, Send the data in smaller packages while (current_position < buffer.size() && tries++ < timeout_limit) { const std::size_t next_position = - std::min(current_position + sizeof(NFCRequestState::raw_data), buffer.size()); + (std::min)(current_position + sizeof(NFCRequestState::raw_data), buffer.size()); const std::size_t block_size = next_position - current_position; const bool is_last_packet = block_size < sizeof(NFCRequestState::raw_data); @@ -479,7 +479,7 @@ Common::Input::DriverResult NfcProtocol::GetMifareData( // Send data request. Nfc buffer size is 31, Send the data in smaller packages while (current_position < buffer.size() && tries++ < timeout_limit) { const std::size_t next_position = - std::min(current_position + sizeof(NFCRequestState::raw_data), buffer.size()); + (std::min)(current_position + sizeof(NFCRequestState::raw_data), buffer.size()); const std::size_t block_size = next_position - current_position; const bool is_last_packet = block_size < sizeof(NFCRequestState::raw_data); @@ -559,7 +559,7 @@ Common::Input::DriverResult NfcProtocol::WriteMifareData( // Send data request. Nfc buffer size is 31, Send the data in smaller packages while (current_position < buffer.size() && tries++ < timeout_limit) { const std::size_t next_position = - std::min(current_position + sizeof(NFCRequestState::raw_data), buffer.size()); + (std::min)(current_position + sizeof(NFCRequestState::raw_data), buffer.size()); const std::size_t block_size = next_position - current_position; const bool is_last_packet = block_size < sizeof(NFCRequestState::raw_data); @@ -731,7 +731,7 @@ Common::Input::DriverResult NfcProtocol::SendWriteDataAmiiboRequest(MCUCommandRe u8 block_id, bool is_last_packet, std::span data) { - const auto data_size = std::min(data.size(), sizeof(NFCRequestState::raw_data)); + const auto data_size = (std::min)(data.size(), sizeof(NFCRequestState::raw_data)); NFCRequestState request{ .command_argument = NFCCommand::WriteNtag, .block_id = block_id, @@ -754,7 +754,7 @@ Common::Input::DriverResult NfcProtocol::SendWriteDataAmiiboRequest(MCUCommandRe Common::Input::DriverResult NfcProtocol::SendReadDataMifareRequest(MCUCommandResponse& output, u8 block_id, bool is_last_packet, std::span data) { - const auto data_size = std::min(data.size(), sizeof(NFCRequestState::raw_data)); + const auto data_size = (std::min)(data.size(), sizeof(NFCRequestState::raw_data)); NFCRequestState request{ .command_argument = NFCCommand::Mifare, .block_id = block_id, diff --git a/src/input_common/helpers/joycon_protocol/rumble.cpp b/src/input_common/helpers/joycon_protocol/rumble.cpp index 9fd0b84708..db3420dc0b 100644 --- a/src/input_common/helpers/joycon_protocol/rumble.cpp +++ b/src/input_common/helpers/joycon_protocol/rumble.cpp @@ -29,7 +29,7 @@ Common::Input::DriverResult RumbleProtocol::SendVibration(const VibrationValue& // Protect joycons from damage from strong vibrations const f32 clamp_amplitude = - 1.0f / std::max(1.0f, vibration.high_amplitude + vibration.low_amplitude); + 1.0f / (std::max)(1.0f, vibration.high_amplitude + vibration.low_amplitude); const u16 encoded_high_frequency = EncodeHighFrequency(vibration.high_frequency); const u8 encoded_high_amplitude = diff --git a/src/network/room.cpp b/src/network/room.cpp index 99dcf0c3b4..1a3ad75d2b 100644 --- a/src/network/room.cpp +++ b/src/network/room.cpp @@ -951,7 +951,7 @@ void Room::RoomImpl::HandleChatPacket(const ENetEvent* event) { } // Limit the size of chat messages to MaxMessageSize - message.resize(std::min(static_cast(message.size()), MaxMessageSize)); + message.resize((std::min)(static_cast(message.size()), MaxMessageSize)); Packet out_packet; out_packet.Write(static_cast(IdChatMessage)); diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp index 3919d63268..8cd20a2f5c 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp @@ -123,7 +123,7 @@ Id RegAlloc::Alloc(bool is_long) { if (use[reg]) { continue; } - num_regs = std::max(num_regs, reg + 1); + num_regs = (std::max)(num_regs, reg + 1); use[reg] = true; Id ret{}; ret.is_valid.Assign(1); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index 49397c9b2e..99ed4cbc19 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -39,7 +39,7 @@ void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::strin // which may be overwritten by the result of the addition if (IR::Inst * overflow{inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) { // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c - constexpr u32 s32_max{static_cast(std::numeric_limits::max())}; + constexpr u32 s32_max{static_cast((std::numeric_limits::max)())}; const auto sub_a{fmt::format("{}u-{}", s32_max, a)}; const auto positive_result{fmt::format("int({})>int({})", b, sub_a)}; const auto negative_result{fmt::format("int({})::max())), + ctx.OpBitcast(ctx.F32[1], ctx.Const((std::numeric_limits::max)())), ctx.f32_zero_value); case IR::Attribute::PointSpriteS: return ctx.OpLoad(ctx.F32[1], diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 960bdea6f1..bf47d4ee96 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -42,7 +42,7 @@ Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { SetSignFlag(ctx, inst, result); if (IR::Inst * overflow{inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) { // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c - constexpr u32 s32_max{static_cast(std::numeric_limits::max())}; + constexpr u32 s32_max{static_cast((std::numeric_limits::max)())}; const Id is_positive{ctx.OpSGreaterThanEqual(ctx.U1, a, ctx.u32_zero_value)}; const Id sub_a{ctx.OpISub(ctx.U32[1], ctx.Const(s32_max), a)}; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 388ddce2c8..745dead6c4 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -1593,7 +1593,7 @@ void EmitContext::DefineOutputs(const IR::Program& program) { throw NotImplementedException("Storing ClipDistance in fragment stage"); } if (profile.max_user_clip_distances > 0) { - const u32 used{std::min(profile.max_user_clip_distances, 8u)}; + const u32 used{(std::min)(profile.max_user_clip_distances, 8u)}; const std::array zero{f32_zero_value, f32_zero_value, f32_zero_value, f32_zero_value, f32_zero_value, f32_zero_value, f32_zero_value, f32_zero_value}; diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp index 47111a0501..5afe8cbb14 100644 --- a/src/shader_recompiler/frontend/maxwell/decode.cpp +++ b/src/shader_recompiler/frontend/maxwell/decode.cpp @@ -73,7 +73,7 @@ constexpr auto ENCODINGS{SortedEncodings()}; constexpr int WidestLeftBits() { int bits{64}; for (const InstEncoding& encoding : ENCODINGS) { - bits = std::min(bits, std::countr_zero(encoding.mask_value.mask)); + bits = (std::min)(bits, std::countr_zero(encoding.mask_value.mask)); } return 64 - bits; } @@ -87,7 +87,7 @@ constexpr size_t ToFastLookupIndex(u64 value) { constexpr size_t FastLookupSize() { size_t max_width{}; for (const InstEncoding& encoding : ENCODINGS) { - max_width = std::max(max_width, ToFastLookupIndex(encoding.mask_value.mask)); + max_width = (std::max)(max_width, ToFastLookupIndex(encoding.mask_value.mask)); } return max_width + 1; } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index 85c18d9422..21d3c2fe53 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -60,28 +60,28 @@ std::pair ClampBounds(DestFormat format, bool is_signed) { if (is_signed) { switch (format) { case DestFormat::I16: - return {static_cast(std::numeric_limits::max()), - static_cast(std::numeric_limits::min())}; + return {static_cast((std::numeric_limits::max)()), + static_cast((std::numeric_limits::min)())}; case DestFormat::I32: - return {static_cast(std::numeric_limits::max()), - static_cast(std::numeric_limits::min())}; + return {static_cast((std::numeric_limits::max)()), + static_cast((std::numeric_limits::min)())}; case DestFormat::I64: - return {static_cast(std::numeric_limits::max()), - static_cast(std::numeric_limits::min())}; + return {static_cast((std::numeric_limits::max)()), + static_cast((std::numeric_limits::min)())}; default: break; } } else { switch (format) { case DestFormat::I16: - return {static_cast(std::numeric_limits::max()), - static_cast(std::numeric_limits::min())}; + return {static_cast((std::numeric_limits::max)()), + static_cast((std::numeric_limits::min)())}; case DestFormat::I32: - return {static_cast(std::numeric_limits::max()), - static_cast(std::numeric_limits::min())}; + return {static_cast((std::numeric_limits::max)()), + static_cast((std::numeric_limits::min)())}; case DestFormat::I64: - return {static_cast(std::numeric_limits::max()), - static_cast(std::numeric_limits::min())}; + return {static_cast((std::numeric_limits::max)()), + static_cast((std::numeric_limits::min)())}; default: break; } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp index a2dc0f4a6e..9631fd9dfe 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp @@ -114,9 +114,9 @@ void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { // Only negate if the input isn't the lowest value IR::U1 is_least; if (src_bitsize == 64) { - is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits::min())); + is_least = v.ir.IEqual(src, v.ir.Imm64((std::numeric_limits::min)())); } else if (src_bitsize == 32) { - is_least = v.ir.IEqual(src, v.ir.Imm32(std::numeric_limits::min())); + is_least = v.ir.IEqual(src, v.ir.Imm32((std::numeric_limits::min)())); } else { const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))}; is_least = v.ir.IEqual(src, least_value); diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 321ea625bc..a9559b0d7a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -336,7 +336,7 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b } result.stage = Stage::VertexB; result.info = vertex_a.info; - result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); + result.local_memory_size = (std::max)(vertex_a.local_memory_size, vertex_b.local_memory_size); result.info.loads.mask |= vertex_b.info.loads.mask; result.info.stores.mask |= vertex_b.info.stores.mask; diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index cb82a326c1..1fa39034a8 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -509,7 +509,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { u32 element_size = GetElementSize(info.used_constant_buffer_types, inst.GetOpcode()); u32& size{info.constant_buffer_used_sizes[index.U32()]}; if (offset.IsImmediate()) { - size = Common::AlignUp(std::max(size, offset.U32() + element_size), 16u); + size = Common::AlignUp((std::max)(size, offset.U32() + element_size), 16u); } else { size = 0x10'000; } diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 12d7b2d7fa..160c1aaea5 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -905,7 +905,7 @@ void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) { } void FoldDriverConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst, u32 which_bank, - u32 offset_start = 0, u32 offset_end = std::numeric_limits::max()) { + u32 offset_start = 0, u32 offset_end = (std::numeric_limits::max)()) { const IR::Value bank{inst.Arg(0)}; const IR::Value offset{inst.Arg(1)}; if (!bank.IsImmediate() || !offset.IsImmediate()) { diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index afd880526a..9f04c0afaf 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -517,11 +517,11 @@ void PatchTexelFetch(IR::Block& block, IR::Inst& inst, TexturePixelFormat pixel_ case TexturePixelFormat::A8B8G8R8_SNORM: case TexturePixelFormat::R8G8_SNORM: case TexturePixelFormat::R8_SNORM: - return 1.f / std::numeric_limits::max(); + return 1.f / (std::numeric_limits::max)(); case TexturePixelFormat::R16G16B16A16_SNORM: case TexturePixelFormat::R16G16_SNORM: case TexturePixelFormat::R16_SNORM: - return 1.f / std::numeric_limits::max(); + return 1.f / (std::numeric_limits::max)(); default: throw InvalidArgument("Invalid texture pixel format"); } diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index e0f7f82fbe..3c2473266a 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -373,7 +373,7 @@ else() set_source_files_properties(vulkan_common/vma.cpp PROPERTIES COMPILE_OPTIONS "-Wno-conversion;-Wno-unused-variable;-Wno-unused-parameter;-Wno-missing-field-initializers") # Get around GCC failing with intrinsics in Debug - if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_BUILD_TYPE MATCHES "Debug") + if (CXX_GCC AND CMAKE_BUILD_TYPE MATCHES "Debug") set_source_files_properties(host1x/vic.cpp PROPERTIES COMPILE_OPTIONS "-O2") endif() endif() diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 0cd6861b6d..94ef1a48df 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -36,14 +36,14 @@ BufferCache

::BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, R const s64 device_local_memory = static_cast(runtime.GetDeviceLocalMemory()); const s64 min_spacing_expected = device_local_memory - 1_GiB; const s64 min_spacing_critical = device_local_memory - 512_MiB; - const s64 mem_threshold = std::min(device_local_memory, TARGET_THRESHOLD); + const s64 mem_threshold = (std::min)(device_local_memory, TARGET_THRESHOLD); const s64 min_vacancy_expected = (6 * mem_threshold) / 10; const s64 min_vacancy_critical = (2 * mem_threshold) / 10; minimum_memory = static_cast( - std::max(std::min(device_local_memory - min_vacancy_expected, min_spacing_expected), + (std::max)((std::min)(device_local_memory - min_vacancy_expected, min_spacing_expected), DEFAULT_EXPECTED_MEMORY)); critical_memory = static_cast( - std::max(std::min(device_local_memory - min_vacancy_critical, min_spacing_critical), + (std::max)((std::min)(device_local_memory - min_vacancy_critical, min_spacing_critical), DEFAULT_CRITICAL_MEMORY)); } @@ -553,8 +553,8 @@ void BufferCache

::CommitAsyncFlushesHigh() { ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) { const DAddr buffer_start = buffer.CpuAddr(); const DAddr buffer_end = buffer_start + buffer.SizeBytes(); - const DAddr new_start = std::max(buffer_start, device_addr); - const DAddr new_end = std::min(buffer_end, device_addr + size); + const DAddr new_start = (std::max)(buffer_start, device_addr); + const DAddr new_end = (std::min)(buffer_end, device_addr + size); memory_tracker.ForEachDownloadRange( new_start, new_end - new_start, false, [&](u64 device_addr_out, u64 range_size) { @@ -574,7 +574,7 @@ void BufferCache

::CommitAsyncFlushesHigh() { constexpr u64 align = 64ULL; constexpr u64 mask = ~(align - 1ULL); total_size_bytes += (new_size + align - 1) & mask; - largest_copy = std::max(largest_copy, new_size); + largest_copy = (std::max)(largest_copy, new_size); }; gpu_modified_ranges.ForEachInRange(device_addr_out, range_size, @@ -729,8 +729,8 @@ void BufferCache

::BindHostVertexBuffers() { } flags[Dirty::VertexBuffer0 + index] = false; - host_bindings.min_index = std::min(host_bindings.min_index, index); - host_bindings.max_index = std::max(host_bindings.max_index, index); + host_bindings.min_index = (std::min)(host_bindings.min_index, index); + host_bindings.max_index = (std::max)(host_bindings.max_index, index); any_valid = true; } @@ -789,7 +789,7 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 bool needs_bind) { const Binding& binding = channel_state->uniform_buffers[stage][index]; const DAddr device_addr = binding.device_addr; - const u32 size = std::min(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]); + const u32 size = (std::min)(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]); Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer, binding.buffer_id); const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && @@ -956,7 +956,7 @@ void BufferCache

::BindHostComputeUniformBuffers() { Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer, binding.buffer_id); const u32 size = - std::min(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]); + (std::min)(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]); SynchronizeBuffer(buffer, binding.device_addr, size); const u32 offset = buffer.Offset(binding.device_addr); @@ -1090,7 +1090,7 @@ void BufferCache

::UpdateIndexBuffer() { const u32 address_size = static_cast(gpu_addr_end - gpu_addr_begin); const u32 draw_size = (index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes(); - const u32 size = std::min(address_size, draw_size); + const u32 size = (std::min)(address_size, draw_size); if (size == 0 || !device_addr) { channel_state->index_buffer = NULL_BINDING; return; @@ -1459,7 +1459,7 @@ bool BufferCache

::SynchronizeBuffer(Buffer& buffer, DAddr device_addr, u32 si .size = range_size, }); total_size_bytes += range_size; - largest_copy = std::max(largest_copy, range_size); + largest_copy = (std::max)(largest_copy, range_size); }); if (total_size_bytes == 0) { return true; @@ -1594,7 +1594,7 @@ void BufferCache

::DownloadBufferMemory(Buffer& buffer, DAddr device_addr, u64 constexpr u64 align = 64ULL; constexpr u64 mask = ~(align - 1ULL); total_size_bytes += (new_size + align - 1) & mask; - largest_copy = std::max(largest_copy, new_size); + largest_copy = (std::max)(largest_copy, new_size); }; gpu_modified_ranges.ForEachInRange(device_addr_out, range_size, add_download); @@ -1715,7 +1715,7 @@ Binding BufferCache

::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index, // cbufs, which do not store the sizes adjacent to the addresses, so use the fully // mapped buffer size for now. const u32 memory_layout_size = static_cast(gpu_memory->GetMemoryLayoutSize(gpu_addr)); - return std::min(memory_layout_size, static_cast(8_MiB)); + return (std::min)(memory_layout_size, static_cast(8_MiB)); }(); // Alignment only applies to the offset of the buffer const u32 alignment = runtime.GetStorageBufferAlignment(); diff --git a/src/video_core/buffer_cache/memory_tracker_base.h b/src/video_core/buffer_cache/memory_tracker_base.h index c95eed1f62..fe68bdbf23 100644 --- a/src/video_core/buffer_cache/memory_tracker_base.h +++ b/src/video_core/buffer_cache/memory_tracker_base.h @@ -230,7 +230,7 @@ private: std::size_t remaining_size{size}; std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS}; u64 page_offset{cpu_address & HIGHER_PAGE_MASK}; - u64 begin = std::numeric_limits::max(); + u64 begin = (std::numeric_limits::max)(); u64 end = 0; while (remaining_size > 0) { const std::size_t copy_amount{ @@ -240,8 +240,8 @@ private: auto [new_begin, new_end] = func(manager, page_offset, copy_amount); if (new_begin != 0 || new_end != 0) { const u64 base_address = page_index << HIGHER_PAGE_BITS; - begin = std::min(new_begin + base_address, begin); - end = std::max(new_end + base_address, end); + begin = (std::min)(new_begin + base_address, begin); + end = (std::max)(new_end + base_address, end); } }; if (manager) { diff --git a/src/video_core/buffer_cache/word_manager.h b/src/video_core/buffer_cache/word_manager.h index 3db9d8b423..8dc073240e 100644 --- a/src/video_core/buffer_cache/word_manager.h +++ b/src/video_core/buffer_cache/word_manager.h @@ -181,7 +181,7 @@ public: static u64 ExtractBits(u64 word, size_t page_start, size_t page_end) { constexpr size_t number_bits = sizeof(u64) * 8; - const size_t limit_page_end = number_bits - std::min(page_end, number_bits); + const size_t limit_page_end = number_bits - (std::min)(page_end, number_bits); u64 bits = (word >> page_start) << page_start; bits = (bits << limit_page_end) >> limit_page_end; return bits; @@ -206,11 +206,11 @@ public: auto [start_word, start_page] = GetWordPage(start); auto [end_word, end_page] = GetWordPage(end + BYTES_PER_PAGE - 1ULL); const size_t num_words = NumWords(); - start_word = std::min(start_word, num_words); - end_word = std::min(end_word, num_words); + start_word = (std::min)(start_word, num_words); + end_word = (std::min)(end_word, num_words); const size_t diff = end_word - start_word; end_word += (end_page + PAGES_PER_WORD - 1ULL) / PAGES_PER_WORD; - end_word = std::min(end_word, num_words); + end_word = (std::min)(end_word, num_words); end_page += diff * PAGES_PER_WORD; constexpr u64 base_mask{~0ULL}; for (size_t word_index = start_word; word_index < end_word; word_index++) { @@ -382,7 +382,7 @@ public: const std::span state_words = words.template Span(); [[maybe_unused]] const std::span untracked_words = words.template Span(); - u64 begin = std::numeric_limits::max(); + u64 begin = (std::numeric_limits::max)(); u64 end = 0; IterateWords(offset, size, [&](size_t index, u64 mask) { if constexpr (type == Type::GPU) { @@ -395,7 +395,7 @@ public: const u64 local_page_begin = std::countr_zero(word); const u64 local_page_end = PAGES_PER_WORD - std::countl_zero(word); const u64 page_index = index * PAGES_PER_WORD; - begin = std::min(begin, page_index + local_page_begin); + begin = (std::min)(begin, page_index + local_page_begin); end = page_index + local_page_end; }); static constexpr std::pair EMPTY{0, 0}; diff --git a/src/video_core/control/channel_state_cache.h b/src/video_core/control/channel_state_cache.h index 7480d60d13..038c5b8fd1 100644 --- a/src/video_core/control/channel_state_cache.h +++ b/src/video_core/control/channel_state_cache.h @@ -73,7 +73,7 @@ public: } protected: - static constexpr size_t UNSET_CHANNEL{std::numeric_limits::max()}; + static constexpr size_t UNSET_CHANNEL{(std::numeric_limits::max)()}; P* channel_state; size_t current_channel_id{UNSET_CHANNEL}; diff --git a/src/video_core/engines/engine_interface.h b/src/video_core/engines/engine_interface.h index 54631ee6cc..93de389a90 100644 --- a/src/video_core/engines/engine_interface.h +++ b/src/video_core/engines/engine_interface.h @@ -37,7 +37,7 @@ public: ConsumeSinkImpl(); } - std::bitset::max()> execution_mask{}; + std::bitset<(std::numeric_limits::max)()> execution_mask{}; std::vector> method_sink{}; bool current_dirty{}; GPUVAddr current_dma_segment; diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index e5cc04ec4f..e6f34c7cca 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp @@ -30,7 +30,7 @@ void State::ProcessExec(const bool is_linear_) { } void State::ProcessData(const u32 data, const bool is_last_call) { - const u32 sub_copy_size = std::min(4U, copy_size - write_offset); + const u32 sub_copy_size = (std::min)(4U, copy_size - write_offset); std::memcpy(&inner_buffer[write_offset], &data, sub_copy_size); write_offset += sub_copy_size; if (!is_last_call) { @@ -58,7 +58,7 @@ void State::ProcessData(std::span read_buffer) { u32 x_elements = regs.line_length_in; u32 x_offset = regs.dest.x; const u32 bpp_shift = Common::FoldRight( - 4U, [](u32 x, u32 y) { return std::min(x, static_cast(std::countr_zero(y))); }, + 4U, [](u32 x, u32 y) { return (std::min)(x, static_cast(std::countr_zero(y))); }, width, x_elements, x_offset, static_cast(address)); width >>= bpp_shift; x_elements >>= bpp_shift; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 0d47b032c8..c63f908bcc 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -245,15 +245,15 @@ u32 Maxwell3D::GetMaxCurrentVertices() { } const auto& attribute = regs.vertex_attrib_format[index]; if (attribute.constant) { - num_vertices = std::max(num_vertices, 1U); + num_vertices = (std::max)(num_vertices, 1U); continue; } const auto& limit = regs.vertex_stream_limits[index]; const GPUVAddr gpu_addr_begin = array.Address(); const GPUVAddr gpu_addr_end = limit.Address() + 1; const u32 address_size = static_cast(gpu_addr_end - gpu_addr_begin); - num_vertices = std::max( - num_vertices, address_size / std::max(attribute.SizeInBytes(), array.stride.Value())); + num_vertices = (std::max)( + num_vertices, address_size / (std::max)(attribute.SizeInBytes(), array.stride.Value())); break; } return num_vertices; @@ -262,9 +262,9 @@ u32 Maxwell3D::GetMaxCurrentVertices() { size_t Maxwell3D::EstimateIndexBufferSize() { GPUVAddr start_address = regs.index_buffer.StartAddress(); GPUVAddr end_address = regs.index_buffer.EndAddress(); - static constexpr std::array max_sizes = {std::numeric_limits::max(), - std::numeric_limits::max(), - std::numeric_limits::max()}; + static constexpr std::array max_sizes = {(std::numeric_limits::max)(), + (std::numeric_limits::max)(), + (std::numeric_limits::max)()}; const size_t byte_size = regs.index_buffer.FormatSizeInBytes(); const size_t log2_byte_size = Common::Log2Ceil64(byte_size); const size_t cap{GetMaxCurrentVertices() * 4 * byte_size}; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 6b4f1c570e..ce0434f3d7 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1180,11 +1180,11 @@ public: } f32 GetX() const { - return std::max(0.0f, translate_x - std::fabs(scale_x)); + return (std::max)(0.0f, translate_x - std::fabs(scale_x)); } f32 GetY() const { - return std::max(0.0f, translate_y - std::fabs(scale_y)); + return (std::max)(0.0f, translate_y - std::fabs(scale_y)); } f32 GetWidth() const { @@ -3091,7 +3091,7 @@ public: } struct DirtyState { - using Flags = std::bitset::max()>; + using Flags = std::bitset<(std::numeric_limits::max)()>; using Table = std::array; using Tables = std::array; diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index a4b2c1098b..73a62db37d 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -198,7 +198,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { u32 bpp_shift = 0U; if (!is_remapping) { bpp_shift = Common::FoldRight( - 4U, [](u32 x, u32 y) { return std::min(x, static_cast(std::countr_zero(y))); }, + 4U, [](u32 x, u32 y) { return (std::min)(x, static_cast(std::countr_zero(y))); }, width, x_elements, x_offset, static_cast(regs.offset_in)); width >>= bpp_shift; x_elements >>= bpp_shift; @@ -261,7 +261,7 @@ void MaxwellDMA::CopyPitchToBlockLinear() { u32 bpp_shift = 0U; if (!is_remapping) { bpp_shift = Common::FoldRight( - 4U, [](u32 x, u32 y) { return std::min(x, static_cast(std::countr_zero(y))); }, + 4U, [](u32 x, u32 y) { return (std::min)(x, static_cast(std::countr_zero(y))); }, width, x_elements, x_offset, static_cast(regs.offset_out)); width >>= bpp_shift; x_elements >>= bpp_shift; @@ -312,7 +312,7 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() { u32 bpp_shift = 0U; if (!is_remapping) { bpp_shift = Common::FoldRight( - 4U, [](u32 x, u32 y) { return std::min(x, static_cast(std::countr_zero(y))); }, + 4U, [](u32 x, u32 y) { return (std::min)(x, static_cast(std::countr_zero(y))); }, src_width, dst_width, x_elements, src_x_offset, dst_x_offset, static_cast(regs.offset_in), static_cast(regs.offset_out)); src_width >>= bpp_shift; diff --git a/src/video_core/engines/sw_blitter/converter.cpp b/src/video_core/engines/sw_blitter/converter.cpp index 2419b56321..785d209f98 100644 --- a/src/video_core/engines/sw_blitter/converter.cpp +++ b/src/video_core/engines/sw_blitter/converter.cpp @@ -771,7 +771,7 @@ private: }; const auto to_fp_n = [](f32 base_value, size_t bits, size_t mantissa) { constexpr size_t fp32_mantissa_bits = 23; - u32 tmp_value = Common::BitCast(std::max(base_value, 0.0f)); + u32 tmp_value = Common::BitCast((std::max)(base_value, 0.0f)); size_t shift_towards = fp32_mantissa_bits - mantissa; return tmp_value >> shift_towards; }; diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp index 782d11d725..a0b13cbffc 100644 --- a/src/video_core/host1x/codecs/h264.cpp +++ b/src/video_core/host1x/codecs/h264.cpp @@ -117,7 +117,7 @@ std::span H264::ComposeFrame() { (current_context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2); u32 max_num_ref_frames = - std::max(std::max(current_context.h264_parameter_set.num_refidx_l0_default_active, + (std::max)((std::max)(current_context.h264_parameter_set.num_refidx_l0_default_active, current_context.h264_parameter_set.num_refidx_l1_default_active) + 1, 4); diff --git a/src/video_core/host1x/codecs/vp9.cpp b/src/video_core/host1x/codecs/vp9.cpp index c70d0a506f..7b3dbd7642 100644 --- a/src/video_core/host1x/codecs/vp9.cpp +++ b/src/video_core/host1x/codecs/vp9.cpp @@ -228,10 +228,10 @@ constexpr std::array map_lut{ std::size_t index{}; if (old_prob * 2 <= 0xff) { - index = static_cast(std::max(0, RecenterNonNeg(new_prob, old_prob) - 1)); + index = static_cast((std::max)(0, RecenterNonNeg(new_prob, old_prob) - 1)); } else { index = static_cast( - std::max(0, RecenterNonNeg(0xff - 1 - new_prob, 0xff - 1 - old_prob) - 1)); + (std::max)(0, RecenterNonNeg(0xff - 1 - new_prob, 0xff - 1 - old_prob) - 1)); } return static_cast(map_lut[index]); diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp index 18b3077f9a..9c33370337 100644 --- a/src/video_core/host1x/vic.cpp +++ b/src/video_core/host1x/vic.cpp @@ -201,8 +201,8 @@ void Vic::ReadProgressiveY8__V8U8_N420(const SlotStruct& slot, slot_surface.resize_destructive(out_luma_width * out_luma_height); - const auto in_luma_width{std::min(frame->GetWidth(), static_cast(out_luma_width))}; - const auto in_luma_height{std::min(frame->GetHeight(), static_cast(out_luma_height))}; + const auto in_luma_width{(std::min)(frame->GetWidth(), static_cast(out_luma_width))}; + const auto in_luma_height{(std::min)(frame->GetHeight(), static_cast(out_luma_height))}; const auto in_luma_stride{frame->GetStride(0)}; const auto in_chroma_stride{frame->GetStride(1)}; @@ -425,9 +425,9 @@ void Vic::ReadInterlacedY8__V8U8_N420(const SlotStruct& slot, std::spanGetWidth(), static_cast(out_luma_width))}; + const auto in_luma_width{(std::min)(frame->GetWidth(), static_cast(out_luma_width))}; [[maybe_unused]] const auto in_luma_height{ - std::min(frame->GetHeight(), static_cast(out_luma_height))}; + (std::min)(frame->GetHeight(), static_cast(out_luma_height))}; const auto in_luma_stride{frame->GetStride(0)}; [[maybe_unused]] const auto in_chroma_width{(frame->GetWidth() + 1) / 2}; @@ -543,15 +543,15 @@ void Vic::Blend(const ConfigStruct& config, const SlotStruct& slot) { auto rect_top{add_one(config.output_config.target_rect_top.Value())}; auto rect_bottom{add_one(config.output_config.target_rect_bottom.Value())}; - rect_left = std::max(rect_left, dest_left); - rect_right = std::min(rect_right, dest_right); - rect_top = std::max(rect_top, dest_top); - rect_bottom = std::min(rect_bottom, dest_bottom); + rect_left = (std::max)(rect_left, dest_left); + rect_right = (std::min)(rect_right, dest_right); + rect_top = (std::max)(rect_top, dest_top); + rect_bottom = (std::min)(rect_bottom, dest_bottom); - source_left = std::max(source_left, rect_left); - source_right = std::min(source_right, rect_right); - source_top = std::max(source_top, rect_top); - source_bottom = std::min(source_bottom, rect_bottom); + source_left = (std::max)(source_left, rect_left); + source_right = (std::min)(source_right, rect_right); + source_top = (std::max)(source_top, rect_top); + source_bottom = (std::min)(source_bottom, rect_bottom); if (source_left >= source_right || source_top >= source_bottom) { return; @@ -562,14 +562,14 @@ void Vic::Blend(const ConfigStruct& config, const SlotStruct& slot) { 1}; const auto in_surface_width{slot.surface_config.slot_surface_width + 1}; - source_bottom = std::min(source_bottom, out_surface_height); - source_right = std::min(source_right, out_surface_width); + source_bottom = (std::min)(source_bottom, out_surface_height); + source_right = (std::min)(source_right, out_surface_width); // TODO Alpha blending. No games I've seen use more than a single surface or supply an alpha // below max, so it's ignored for now. if (!slot.color_matrix.matrix_enable) { - const auto copy_width = std::min(source_right - source_left, rect_right - rect_left); + const auto copy_width = (std::min)(source_right - source_left, rect_right - rect_left); for (u32 y = source_top; y < source_bottom; y++) { const auto dst_line = y * out_surface_width; @@ -818,8 +818,8 @@ void Vic::WriteY8__V8U8_N420(const OutputSurfaceConfig& output_surface_config) { const auto out_chroma_stride = Common::AlignUp(out_chroma_width * BytesPerPixel * 2, 0x10); const auto out_chroma_size = out_chroma_height * out_chroma_stride; - surface_width = std::min(surface_width, out_luma_width); - surface_height = std::min(surface_height, out_luma_height); + surface_width = (std::min)(surface_width, out_luma_width); + surface_height = (std::min)(surface_height, out_luma_height); [[maybe_unused]] auto DecodeLinear = [&](std::span out_luma, std::span out_chroma) { for (u32 y = 0; y < surface_height; ++y) { @@ -1089,8 +1089,8 @@ void Vic::WriteABGR(const OutputSurfaceConfig& output_surface_config) { const auto out_luma_stride = Common ::AlignUp(out_luma_width * BytesPerPixel, 0x10); const auto out_luma_size = out_luma_height * out_luma_stride; - surface_width = std::min(surface_width, out_luma_width); - surface_height = std::min(surface_height, out_luma_height); + surface_width = (std::min)(surface_width, out_luma_width); + surface_height = (std::min)(surface_height, out_luma_height); [[maybe_unused]] auto DecodeLinear = [&](std::span out_buffer) { for (u32 y = 0; y < surface_height; y++) { diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index fb529f88b7..328abd0ba4 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -301,7 +301,7 @@ private: const u32 indirect_words = 5 + padding; const std::size_t first_draw = start_indirect; const std::size_t effective_draws = end_indirect - start_indirect; - const std::size_t last_draw = start_indirect + std::min(effective_draws, max_draws); + const std::size_t last_draw = start_indirect + (std::min)(effective_draws, max_draws); for (std::size_t index = first_draw; index < last_draw; index++) { const std::size_t base = index * indirect_words + 5; diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index ffafc48eff..13f0ea8d96 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -293,7 +293,7 @@ const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const { return memory.GetPointer(*address); } -#ifdef _MSC_VER // no need for gcc / clang but msvc's compiler is more conservative with inlining. +#if defined(_MSC_VER) && !defined(__clang__) // no need for gcc / clang but msvc's compiler is more conservative with inlining. #pragma inline_recursion(on) #endif @@ -329,7 +329,7 @@ inline void MemoryManager::MemoryOperation(GPUVAddr gpu_src_addr, std::size_t si while (remaining_size > 0) { const std::size_t copy_amount{ - std::min(static_cast(used_page_size) - page_offset, remaining_size)}; + (std::min)(static_cast(used_page_size) - page_offset, remaining_size)}; auto entry = GetEntry(current_address); if (entry == EntryType::Mapped) [[likely]] { if constexpr (BOOL_BREAK_MAPPED) { diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 448624aa99..9be419932c 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -152,7 +152,7 @@ public: PTEKind GetPageKind(GPUVAddr gpu_addr) const; size_t GetMemoryLayoutSize(GPUVAddr gpu_addr, - size_t max_size = std::numeric_limits::max()) const; + size_t max_size = (std::numeric_limits::max)()) const; void FlushCaching(); diff --git a/src/video_core/renderer_opengl/blit_image.cpp b/src/video_core/renderer_opengl/blit_image.cpp index 3b03e8d5ac..b9a502577f 100644 --- a/src/video_core/renderer_opengl/blit_image.cpp +++ b/src/video_core/renderer_opengl/blit_image.cpp @@ -45,8 +45,8 @@ void BlitImageHelper::BlitColor(GLuint dst_framebuffer, GLuint src_image_view, G static_cast(src_region.start.x) / static_cast(src_size.width), static_cast(src_region.start.y) / static_cast(src_size.height)); - glViewport(std::min(dst_region.start.x, dst_region.end.x), - std::min(dst_region.start.y, dst_region.end.y), + glViewport((std::min)(dst_region.start.x, dst_region.end.x), + (std::min)(dst_region.start.y, dst_region.end.y), std::abs(dst_region.end.x - dst_region.start.x), std::abs(dst_region.end.y - dst_region.start.y)); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer); diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index ade72e1f95..9d7089c5de 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -248,7 +248,7 @@ void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings& bi std::ranges::transform(bindings.strides, buffer_strides.begin(), [](u64 stride) { return static_cast(stride); }); const u32 count = - std::min(static_cast(bindings.buffers.size()), max_attributes - bindings.min_index); + (std::min)(static_cast(bindings.buffers.size()), max_attributes - bindings.min_index); if (has_unified_vertex_buffers) { for (u32 index = 0; index < count; ++index) { Buffer& buffer = *bindings.buffers[index]; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index fd471e9795..59d1329d7e 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -59,7 +59,7 @@ class BufferCacheRuntime { friend Buffer; public: - static constexpr u8 INVALID_BINDING = std::numeric_limits::max(); + static constexpr u8 INVALID_BINDING = (std::numeric_limits::max)(); explicit BufferCacheRuntime(const Device& device_, StagingBufferPool& staging_buffer_pool_); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 2ea42abf4b..2746177fab 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -1266,7 +1266,7 @@ void RasterizerOpenGL::SyncPointState() { oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d->regs.point_size_attribute.enabled); const bool is_rescaling{texture_cache.IsRescaling()}; const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f; - glPointSize(std::max(1.0f, maxwell3d->regs.point_size * scale)); + glPointSize((std::max)(1.0f, maxwell3d->regs.point_size * scale)); } void RasterizerOpenGL::SyncLineState() { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index edf0bdd2f1..b6ce57b819 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -617,7 +617,7 @@ std::unique_ptr ShaderCache::CreateComputePipeline( } std::unique_ptr ShaderCache::CreateWorkers() const { - return std::make_unique(std::max(std::thread::hardware_concurrency(), 2U) - 1, + return std::make_unique((std::max)(std::thread::hardware_concurrency(), 2U) - 1, "GlShaderBuilder", [this] { return Context{emu_window}; }); } diff --git a/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp index 5767d6b7de..d9535c277d 100644 --- a/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp +++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp @@ -68,7 +68,7 @@ size_t StagingBuffers::RequestBuffer(size_t requested_size) { std::optional StagingBuffers::FindBuffer(size_t requested_size) { size_t known_unsignaled_index = current_sync_index + 1; - size_t smallest_buffer = std::numeric_limits::max(); + size_t smallest_buffer = (std::numeric_limits::max)(); std::optional found; const size_t num_buffers = allocs.size(); for (size_t index = 0; index < num_buffers; ++index) { @@ -88,7 +88,7 @@ std::optional StagingBuffers::FindBuffer(size_t requested_size) { if (!alloc.sync.IsSignaled()) { // Since this fence hasn't been signaled, it's safe to assume all later // fences haven't been signaled either - known_unsignaled_index = std::min(known_unsignaled_index, alloc.sync_index); + known_unsignaled_index = (std::min)(known_unsignaled_index, alloc.sync_index); continue; } alloc.sync.Release(); @@ -120,7 +120,7 @@ std::pair, size_t> StreamBuffer::Request(size_t size) noexcept { used_iterator = iterator; for (size_t region = Region(free_iterator) + 1, - region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS); + region_end = (std::min)(Region(iterator + size) + 1, NUM_SYNCS); region < region_end; ++region) { glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); fences[region].Release(); diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h index 19bcf3f355..4027807da1 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.h +++ b/src/video_core/renderer_opengl/gl_state_tracker.h @@ -79,7 +79,7 @@ enum : u8 { Last }; -static_assert(Last <= std::numeric_limits::max()); +static_assert(Last <= (std::numeric_limits::max)()); } // namespace Dirty diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index be14494ca5..8b1737ff51 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -717,7 +717,7 @@ Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, gl_type = tuple.type; } const int max_host_mip_levels = std::bit_width(info.size.width); - gl_num_levels = std::min(info.resources.levels, max_host_mip_levels); + gl_num_levels = (std::min)(info.resources.levels, max_host_mip_levels); texture = MakeImage(info, gl_internal_format, gl_num_levels); current_texture = texture.handle; if (runtime->device.HasDebuggingToolAttached()) { @@ -742,8 +742,8 @@ void Image::UploadMemory(GLuint buffer_handle, size_t buffer_offset, glPixelStorei(GL_UNPACK_ALIGNMENT, 1); - u32 current_row_length = std::numeric_limits::max(); - u32 current_image_height = std::numeric_limits::max(); + u32 current_row_length = (std::numeric_limits::max)(); + u32 current_image_height = (std::numeric_limits::max)(); for (const VideoCommon::BufferImageCopy& copy : copies) { if (copy.image_subresource.base_level >= gl_num_levels) { @@ -788,8 +788,8 @@ void Image::DownloadMemory(std::span buffer_handles, std::span b glBindBuffer(GL_PIXEL_PACK_BUFFER, buffer_handle); glPixelStorei(GL_PACK_ALIGNMENT, 1); - u32 current_row_length = std::numeric_limits::max(); - u32 current_image_height = std::numeric_limits::max(); + u32 current_row_length = (std::numeric_limits::max)(); + u32 current_image_height = (std::numeric_limits::max)(); for (const VideoCommon::BufferImageCopy& copy : copies) { if (copy.image_subresource.base_level >= gl_num_levels) { @@ -1033,10 +1033,10 @@ void Image::Scale(bool up_scale) { const GLuint draw_fbo = runtime->rescale_draw_fbos[fbo_index].handle; for (s32 layer = 0; layer < info.resources.layers; ++layer) { for (s32 level = 0; level < info.resources.levels; ++level) { - const u32 src_level_width = std::max(1u, src_width >> level); - const u32 src_level_height = std::max(1u, src_height >> level); - const u32 dst_level_width = std::max(1u, dst_width >> level); - const u32 dst_level_height = std::max(1u, dst_height >> level); + const u32 src_level_width = (std::max)(1u, src_width >> level); + const u32 src_level_height = (std::max)(1u, src_height >> level); + const u32 dst_level_width = (std::max)(1u, dst_width >> level); + const u32 dst_level_height = (std::max)(1u, dst_height >> level); glNamedFramebufferTextureLayer(read_fbo, attachment, src_handle, level, layer); glNamedFramebufferTextureLayer(draw_fbo, attachment, dst_handle, level, layer); diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 37213912e3..7bfcd6503b 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -340,8 +340,8 @@ void UpdateTwoTexturesDescriptorSet(const Device& device, VkDescriptorSet descri void BindBlitState(vk::CommandBuffer cmdbuf, const Region2D& dst_region) { const VkOffset2D offset{ - .x = std::min(dst_region.start.x, dst_region.end.x), - .y = std::min(dst_region.start.y, dst_region.end.y), + .x = (std::min)(dst_region.start.x, dst_region.end.x), + .y = (std::min)(dst_region.start.y, dst_region.end.y), }; const VkExtent2D extent{ .width = static_cast(std::abs(dst_region.end.x - dst_region.start.x)), diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index e5e1e3ab63..f61f4456fe 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -573,8 +573,8 @@ void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings& bi buffer_handles.push_back(handle); } const u32 device_max = device.GetMaxVertexInputBindings(); - const u32 min_binding = std::min(bindings.min_index, device_max); - const u32 max_binding = std::min(bindings.max_index, device_max); + const u32 min_binding = (std::min)(bindings.min_index, device_max); + const u32 max_binding = (std::min)(bindings.max_index, device_max); const u32 binding_count = max_binding - min_binding; if (binding_count == 0) { return; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index dc068c5e52..f5594450c2 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -562,7 +562,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { static_vector vertex_binding_divisors; static_vector vertex_attributes; if (!key.state.dynamic_vertex_input) { - const size_t num_vertex_arrays = std::min( + const size_t num_vertex_arrays = (std::min)( Maxwell::NumVertexArrays, static_cast(device.GetMaxVertexInputBindings())); for (size_t index = 0; index < num_vertex_arrays; ++index) { const bool instanced = key.state.binding_divisors[index] != 0; diff --git a/src/video_core/renderer_vulkan/vk_present_manager.cpp b/src/video_core/renderer_vulkan/vk_present_manager.cpp index 9422110895..2c76584c72 100644 --- a/src/video_core/renderer_vulkan/vk_present_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_present_manager.cpp @@ -86,8 +86,8 @@ bool CanBlitToSwapchain(const vk::PhysicalDevice& physical_device, VkFormat form }, .extent = { - .width = std::min(frame_width, swapchain_width), - .height = std::min(frame_height, swapchain_height), + .width = (std::min)(frame_width, swapchain_width), + .height = (std::min)(frame_height, swapchain_height), .depth = 1, }, }; diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index d6ecc2b65c..89e0b1114e 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -202,8 +202,8 @@ public: }); rasterizer->SyncOperation(std::move(func)); accumulation_since_last_sync = false; - first_accumulation_checkpoint = std::min(first_accumulation_checkpoint, num_slots_used); - last_accumulation_checkpoint = std::max(last_accumulation_checkpoint, num_slots_used); + first_accumulation_checkpoint = (std::min)(first_accumulation_checkpoint, num_slots_used); + last_accumulation_checkpoint = (std::max)(last_accumulation_checkpoint, num_slots_used); } void CloseCounter() override { @@ -311,9 +311,9 @@ public: if (has_multi_queries) { const size_t min_accumulation_limit = - std::min(first_accumulation_checkpoint, num_slots_used); + (std::min)(first_accumulation_checkpoint, num_slots_used); const size_t max_accumulation_limit = - std::max(last_accumulation_checkpoint, num_slots_used); + (std::max)(last_accumulation_checkpoint, num_slots_used); const size_t intermediary_buffer_index = ObtainBuffer(num_slots_used); resolve_buffers.push_back(intermediary_buffer_index); queries_prefix_scan_pass->Run(*accumulation_buffer, *buffers[intermediary_buffer_index], @@ -332,7 +332,7 @@ public: rasterizer->SyncOperation(std::move(func)); AbandonCurrentQuery(); num_slots_used = 0; - first_accumulation_checkpoint = std::numeric_limits::max(); + first_accumulation_checkpoint = (std::numeric_limits::max)(); last_accumulation_checkpoint = 0; accumulation_since_last_sync = has_multi_queries; pending_sync.clear(); @@ -414,7 +414,7 @@ private: size_t start_slot = query->start_slot; for (size_t i = 0; i < banks_set; i++) { auto& the_bank = bank_pool.GetBank(bank_id); - size_t amount = std::min(the_bank.Size() - start_slot, size_slots); + size_t amount = (std::min)(the_bank.Size() - start_slot, size_slots); func(&the_bank, start_slot, amount); bank_id = the_bank.next_bank - 1; start_slot = 0; @@ -431,11 +431,11 @@ private: auto* query = GetQuery(q); ApplyBankOp(query, [&indexer](SamplesQueryBank* bank, size_t start, size_t amount) { auto id_ = bank->GetIndex(); - auto pair = indexer.try_emplace(id_, std::numeric_limits::max(), - std::numeric_limits::min()); + auto pair = indexer.try_emplace(id_, (std::numeric_limits::max)(), + (std::numeric_limits::min)()); auto& current_pair = pair.first->second; - current_pair.first = std::min(current_pair.first, start); - current_pair.second = std::max(current_pair.second, amount + start); + current_pair.first = (std::min)(current_pair.first, start); + current_pair.second = (std::max)(current_pair.second, amount + start); }); } for (auto& cont : indexer) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 2d12fc658f..70ca9583f9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -131,8 +131,8 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index, u32 up_scale = 1, u3 s32 max_y = lower_left ? (clip_height - src.min_y) : src.max_y.Value(); // Bound to render area - min_y = std::max(min_y, 0); - max_y = std::max(max_y, 0); + min_y = (std::max)(min_y, 0); + max_y = (std::max)(max_y, 0); if (src.enable) { scissor.offset.x = scale_up(src.min_x); @@ -142,8 +142,8 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index, u32 up_scale = 1, u3 } else { scissor.offset.x = 0; scissor.offset.y = 0; - scissor.extent.width = std::numeric_limits::max(); - scissor.extent.height = std::numeric_limits::max(); + scissor.extent.width = (std::numeric_limits::max)(); + scissor.extent.height = (std::numeric_limits::max)(); } return scissor; } @@ -380,8 +380,8 @@ void RasterizerVulkan::Clear(u32 layer_count) { VkRect2D default_scissor; default_scissor.offset.x = 0; default_scissor.offset.y = 0; - default_scissor.extent.width = std::numeric_limits::max(); - default_scissor.extent.height = std::numeric_limits::max(); + default_scissor.extent.width = (std::numeric_limits::max)(); + default_scissor.extent.height = (std::numeric_limits::max)(); VkClearRect clear_rect{ .rect = regs.clear_control.use_scissor ? GetScissorState(regs, 0, up_scale, down_shift) @@ -393,8 +393,8 @@ void RasterizerVulkan::Clear(u32 layer_count) { return; } clear_rect.rect.extent = VkExtent2D{ - .width = std::min(clear_rect.rect.extent.width, render_area.width), - .height = std::min(clear_rect.rect.extent.height, render_area.height), + .width = (std::min)(clear_rect.rect.extent.width, render_area.width), + .height = (std::min)(clear_rect.rect.extent.height, render_area.height), }; const u32 color_attachment = regs.clear_surface.RT; diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 72d5ec35f9..35f497493b 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -31,7 +31,7 @@ size_t GetStreamBufferSize(const Device& device) { VkDeviceSize size{0}; if (device.HasDebuggingToolAttached()) { ForEachDeviceLocalHostVisibleHeap(device, [&size](size_t index, VkMemoryHeap& heap) { - size = std::max(size, heap.size); + size = (std::max)(size, heap.size); }); // If rebar is not supported, cut the max heap size to 40%. This will allow 2 captures to be // loaded at the same time in RenderDoc. If rebar is supported, this shouldn't be an issue @@ -42,7 +42,7 @@ size_t GetStreamBufferSize(const Device& device) { } else { size = MAX_STREAM_BUFFER_SIZE; } - return std::min(Common::AlignUp(size, MAX_ALIGNMENT), MAX_STREAM_BUFFER_SIZE); + return (std::min)(Common::AlignUp(size, MAX_ALIGNMENT), MAX_STREAM_BUFFER_SIZE); } } // Anonymous namespace @@ -104,7 +104,7 @@ void StagingBufferPool::TickFrame() { StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) { if (AreRegionsActive(Region(free_iterator) + 1, - std::min(Region(iterator + size) + 1, NUM_SYNCS))) { + (std::min)(Region(iterator + size) + 1, NUM_SYNCS))) { // Avoid waiting for the previous usages to be free return GetStagingBuffer(size, MemoryUsage::Upload); } @@ -112,7 +112,7 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) { std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + Region(iterator), current_tick); used_iterator = iterator; - free_iterator = std::max(free_iterator, iterator + size); + free_iterator = (std::max)(free_iterator, iterator + size); if (iterator + size >= stream_buffer_size) { std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS, @@ -170,7 +170,7 @@ std::optional StagingBufferPool::TryGetReservedBuffer(size_t s } } cache_level.iterate_index = std::distance(entries.begin(), it) + 1; - it->tick = deferred ? std::numeric_limits::max() : scheduler.CurrentTick(); + it->tick = deferred ? (std::numeric_limits::max)() : scheduler.CurrentTick(); ASSERT(!it->deferred); it->deferred = deferred; return it->Ref(); @@ -206,7 +206,7 @@ StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage .usage = usage, .log2_level = log2, .index = unique_ids++, - .tick = deferred ? std::numeric_limits::max() : scheduler.CurrentTick(), + .tick = deferred ? (std::numeric_limits::max)() : scheduler.CurrentTick(), .deferred = deferred, }); return entry.Ref(); @@ -240,7 +240,7 @@ void StagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, size_t log2) { return scheduler.IsFree(entry.tick); }; const size_t begin_offset = staging.delete_index; - const size_t end_offset = std::min(begin_offset + deletions_per_tick, old_size); + const size_t end_offset = (std::min)(begin_offset + deletions_per_tick, old_size); const auto begin = entries.begin() + begin_offset; const auto end = entries.begin() + end_offset; entries.erase(std::remove_if(begin, end, is_deletable), end); diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index a78d2113fb..aef726658a 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -70,7 +70,7 @@ enum : u8 { Last, }; -static_assert(Last <= std::numeric_limits::max()); +static_assert(Last <= (std::numeric_limits::max)()); } // namespace Dirty diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index a002ca83a0..096b9df087 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -79,15 +79,15 @@ static VkPresentModeKHR ChooseSwapPresentMode(bool has_imm, bool has_mailbox, } VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height) { - constexpr auto undefined_size{std::numeric_limits::max()}; + constexpr auto undefined_size{(std::numeric_limits::max)()}; if (capabilities.currentExtent.width != undefined_size) { return capabilities.currentExtent; } VkExtent2D extent; - extent.width = std::max(capabilities.minImageExtent.width, - std::min(capabilities.maxImageExtent.width, width)); - extent.height = std::max(capabilities.minImageExtent.height, - std::min(capabilities.maxImageExtent.height, height)); + extent.width = (std::max)(capabilities.minImageExtent.width, + (std::min)(capabilities.maxImageExtent.width, width)); + extent.height = (std::max)(capabilities.minImageExtent.height, + (std::min)(capabilities.maxImageExtent.height, height)); return extent; } @@ -172,7 +172,7 @@ void Swapchain::Create( bool Swapchain::AcquireNextImage() { const VkResult result = device.GetLogical().AcquireNextImageKHR( - *swapchain, std::numeric_limits::max(), *present_semaphores[frame_index], + *swapchain, (std::numeric_limits::max)(), *present_semaphores[frame_index], VK_NULL_HANDLE, &image_index); switch (result) { case VK_SUCCESS: @@ -261,10 +261,10 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities) { requested_image_count = capabilities.maxImageCount; } else { requested_image_count = - std::max(requested_image_count, std::min(3U, capabilities.maxImageCount)); + (std::max)(requested_image_count, (std::min)(3U, capabilities.maxImageCount)); } } else { - requested_image_count = std::max(requested_image_count, 3U); + requested_image_count = (std::max)(requested_image_count, 3U); } VkSwapchainCreateInfoKHR swapchain_ci{ .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index eda9ff2a5a..1e89652f50 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -509,16 +509,16 @@ TransformBufferCopies(std::span copies, size_t bu } } struct RangedBarrierRange { - u32 min_mip = std::numeric_limits::max(); - u32 max_mip = std::numeric_limits::min(); - u32 min_layer = std::numeric_limits::max(); - u32 max_layer = std::numeric_limits::min(); + u32 min_mip = (std::numeric_limits::max)(); + u32 max_mip = (std::numeric_limits::min)(); + u32 min_layer = (std::numeric_limits::max)(); + u32 max_layer = (std::numeric_limits::min)(); void AddLayers(const VkImageSubresourceLayers& layers) { - min_mip = std::min(min_mip, layers.mipLevel); - max_mip = std::max(max_mip, layers.mipLevel + 1); - min_layer = std::min(min_layer, layers.baseArrayLayer); - max_layer = std::max(max_layer, layers.baseArrayLayer + layers.layerCount); + min_mip = (std::min)(min_mip, layers.mipLevel); + max_mip = (std::max)(max_mip, layers.mipLevel + 1); + min_layer = (std::min)(min_layer, layers.baseArrayLayer); + max_layer = (std::max)(max_layer, layers.baseArrayLayer + layers.layerCount); } VkImageSubresourceRange SubresourceRange(VkImageAspectFlags aspect_mask) const noexcept { @@ -747,8 +747,8 @@ void BlitScale(Scheduler& scheduler, VkImage src_image, VkImage dst_image, const .z = 0, }, { - .x = std::max(1, src_size.x >> level), - .y = std::max(1, src_size.y >> level), + .x = (std::max)(1, src_size.x >> level), + .y = (std::max)(1, src_size.y >> level), .z = 1, }, }, @@ -765,8 +765,8 @@ void BlitScale(Scheduler& scheduler, VkImage src_image, VkImage dst_image, const .z = 0, }, { - .x = std::max(1, dst_size.x >> level), - .y = std::max(1, dst_size.y >> level), + .x = (std::max)(1, dst_size.x >> level), + .y = (std::max)(1, dst_size.y >> level), .z = 1, }, }, @@ -1956,8 +1956,8 @@ bool Image::BlitScaleHelper(bool scale_up) { .end = {static_cast(dst_width), static_cast(dst_height)}, }; const VkExtent2D extent{ - .width = std::max(scaled_width, info.size.width), - .height = std::max(scaled_height, info.size.height), + .width = (std::max)(scaled_width, info.size.width), + .height = (std::max)(scaled_height, info.size.height), }; auto* view_ptr = blit_view.get(); @@ -2310,21 +2310,21 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, is_rescaled = is_rescaled_; const auto& resolution = runtime.resolution; - u32 width = std::numeric_limits::max(); - u32 height = std::numeric_limits::max(); + u32 width = (std::numeric_limits::max)(); + u32 height = (std::numeric_limits::max)(); for (size_t index = 0; index < NUM_RT; ++index) { const ImageView* const color_buffer = color_buffers[index]; if (!color_buffer) { renderpass_key.color_formats[index] = PixelFormat::Invalid; continue; } - width = std::min(width, is_rescaled ? resolution.ScaleUp(color_buffer->size.width) + width = (std::min)(width, is_rescaled ? resolution.ScaleUp(color_buffer->size.width) : color_buffer->size.width); - height = std::min(height, is_rescaled ? resolution.ScaleUp(color_buffer->size.height) + height = (std::min)(height, is_rescaled ? resolution.ScaleUp(color_buffer->size.height) : color_buffer->size.height); attachments.push_back(color_buffer->RenderTarget()); renderpass_key.color_formats[index] = color_buffer->format; - num_layers = std::max(num_layers, color_buffer->range.extent.layers); + num_layers = (std::max)(num_layers, color_buffer->range.extent.layers); images[num_images] = color_buffer->ImageHandle(); image_ranges[num_images] = MakeSubresourceRange(color_buffer); rt_map[index] = num_images; @@ -2333,13 +2333,13 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, } const size_t num_colors = attachments.size(); if (depth_buffer) { - width = std::min(width, is_rescaled ? resolution.ScaleUp(depth_buffer->size.width) + width = (std::min)(width, is_rescaled ? resolution.ScaleUp(depth_buffer->size.width) : depth_buffer->size.width); - height = std::min(height, is_rescaled ? resolution.ScaleUp(depth_buffer->size.height) + height = (std::min)(height, is_rescaled ? resolution.ScaleUp(depth_buffer->size.height) : depth_buffer->size.height); attachments.push_back(depth_buffer->RenderTarget()); renderpass_key.depth_format = depth_buffer->format; - num_layers = std::max(num_layers, depth_buffer->range.extent.layers); + num_layers = (std::max)(num_layers, depth_buffer->range.extent.layers); images[num_images] = depth_buffer->ImageHandle(); const VkImageSubresourceRange subresource_range = MakeSubresourceRange(depth_buffer); image_ranges[num_images] = subresource_range; @@ -2353,8 +2353,8 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, renderpass_key.samples = samples; renderpass = runtime.render_pass_cache.Get(renderpass_key); - render_area.width = std::min(render_area.width, width); - render_area.height = std::min(render_area.height, height); + render_area.width = (std::min)(render_area.width, width); + render_area.height = (std::min)(render_area.height, height); num_color_buffers = static_cast(num_colors); framebuffer = runtime.device.GetLogical().CreateFramebuffer({ @@ -2366,7 +2366,7 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, .pAttachments = attachments.data(), .width = render_area.width, .height = render_area.height, - .layers = static_cast(std::max(num_layers, 1)), + .layers = static_cast((std::max)(num_layers, 1)), }); } diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index baeb8b23a0..573694a145 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -139,8 +139,8 @@ std::array GenericEnvironment::WorkgroupSize() const { } u64 GenericEnvironment::ReadInstruction(u32 address) { - read_lowest = std::min(read_lowest, address); - read_highest = std::max(read_highest, address); + read_lowest = (std::min)(read_lowest, address); + read_highest = (std::max)(read_highest, address); if (address >= cached_lowest && address < cached_highest) { return code[(address - cached_lowest) / INST_SIZE]; @@ -319,7 +319,7 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, break; } const u64 local_size{sph.LocalMemorySize()}; - ASSERT(local_size <= std::numeric_limits::max()); + ASSERT(local_size <= (std::numeric_limits::max)()); local_memory_size = static_cast(local_size) + sph.common3.shader_local_memory_crs_size; texture_bound = maxwell3d->regs.bindless_texture_const_buffer_slot; is_proprietary_driver = texture_bound == 2; diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h index 6b372e3365..2d781d82f7 100644 --- a/src/video_core/shader_environment.h +++ b/src/video_core/shader_environment.h @@ -86,10 +86,10 @@ protected: u32 shared_memory_size{}; std::array workgroup_size{}; - u32 read_lowest = std::numeric_limits::max(); + u32 read_lowest = (std::numeric_limits::max)(); u32 read_highest = 0; - u32 cached_lowest = std::numeric_limits::max(); + u32 cached_lowest = (std::numeric_limits::max)(); u32 cached_highest = 0; u32 initial_offset = 0; diff --git a/src/video_core/texture_cache/decode_bc.cpp b/src/video_core/texture_cache/decode_bc.cpp index a018c6df46..5279ff2a0a 100644 --- a/src/video_core/texture_cache/decode_bc.cpp +++ b/src/video_core/texture_cache/decode_bc.cpp @@ -67,8 +67,8 @@ void DecompressBlocks(std::span input, std::span output, BufferIma const u32 width = copy.image_extent.width; const u32 height = copy.image_extent.height * copy.image_subresource.num_layers; const u32 depth = copy.image_extent.depth; - const u32 block_width = std::min(width, BLOCK_SIZE); - const u32 block_height = std::min(height, BLOCK_SIZE); + const u32 block_width = (std::min)(width, BLOCK_SIZE); + const u32 block_height = (std::min)(height, BLOCK_SIZE); const u32 pitch = width * out_bpp; size_t input_offset = 0; size_t output_offset = 0; diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index d79594ce55..01413f0c9d 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp @@ -185,7 +185,7 @@ bool AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i const bool is_rhs_compressed = rhs_block.width > 1 || rhs_block.height > 1; const s32 lhs_mips = lhs.info.resources.levels; const s32 rhs_mips = rhs.info.resources.levels; - const s32 num_mips = std::min(lhs_mips - base->level, rhs_mips); + const s32 num_mips = (std::min)(lhs_mips - base->level, rhs_mips); AliasedImage lhs_alias; AliasedImage rhs_alias; lhs_alias.id = rhs_id; @@ -204,9 +204,9 @@ bool AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i rhs_size.height = Common::DivCeil(rhs_size.height, rhs_block.height); } const Extent3D copy_size{ - .width = std::min(lhs_size.width, rhs_size.width), - .height = std::min(lhs_size.height, rhs_size.height), - .depth = std::min(lhs_size.depth, rhs_size.depth), + .width = (std::min)(lhs_size.width, rhs_size.width), + .height = (std::min)(lhs_size.height, rhs_size.height), + .depth = (std::min)(lhs_size.depth, rhs_size.depth), }; if (copy_size.width == 0 || copy_size.height == 0) { LOG_WARNING(HW_GPU, "Copy size is smaller than block size. Mip cannot be aliased."); @@ -218,7 +218,7 @@ bool AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i const Offset3D rhs_offset{0, 0, is_rhs_3d ? base->layer : 0}; const s32 lhs_layers = is_lhs_3d ? 1 : lhs.info.resources.layers - base->layer; const s32 rhs_layers = is_rhs_3d ? 1 : rhs.info.resources.layers; - const s32 num_layers = std::min(lhs_layers, rhs_layers); + const s32 num_layers = (std::min)(lhs_layers, rhs_layers); const SubresourceLayers lhs_subresource{ .base_level = mip_level, .base_layer = 0, diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp index 18b9250f91..b7e4049f35 100644 --- a/src/video_core/texture_cache/image_view_base.cpp +++ b/src/video_core/texture_cache/image_view_base.cpp @@ -18,9 +18,9 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i ImageId image_id_, GPUVAddr addr) : image_id{image_id_}, gpu_addr{addr}, format{info.format}, type{info.type}, range{info.range}, size{ - .width = std::max(image_info.size.width >> range.base.level, 1u), - .height = std::max(image_info.size.height >> range.base.level, 1u), - .depth = std::max(image_info.size.depth >> range.base.level, 1u), + .width = (std::max)(image_info.size.width >> range.base.level, 1u), + .height = (std::max)(image_info.size.height >> range.base.level, 1u), + .depth = (std::max)(image_info.size.depth >> range.base.level, 1u), } { ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format, false, true), "Image view format {} is incompatible with image format {}", info.format, diff --git a/src/video_core/texture_cache/image_view_info.cpp b/src/video_core/texture_cache/image_view_info.cpp index 0766a3b79a..8dac8383e0 100644 --- a/src/video_core/texture_cache/image_view_info.cpp +++ b/src/video_core/texture_cache/image_view_info.cpp @@ -19,7 +19,7 @@ namespace { using Tegra::Texture::TextureType; -constexpr u8 RENDER_TARGET_SWIZZLE = std::numeric_limits::max(); +constexpr u8 RENDER_TARGET_SWIZZLE = (std::numeric_limits::max)(); [[nodiscard]] u8 CastSwizzle(SwizzleSource source) { const u8 casted = static_cast(source); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 6c733fe902..e5d559b591 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -56,14 +56,14 @@ TextureCache

::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManag const s64 device_local_memory = static_cast(runtime.GetDeviceLocalMemory()); const s64 min_spacing_expected = device_local_memory - 1_GiB; const s64 min_spacing_critical = device_local_memory - 512_MiB; - const s64 mem_threshold = std::min(device_local_memory, TARGET_THRESHOLD); + const s64 mem_threshold = (std::min)(device_local_memory, TARGET_THRESHOLD); const s64 min_vacancy_expected = (6 * mem_threshold) / 10; const s64 min_vacancy_critical = (2 * mem_threshold) / 10; expected_memory = static_cast( - std::max(std::min(device_local_memory - min_vacancy_expected, min_spacing_expected), + (std::max)((std::min)(device_local_memory - min_vacancy_expected, min_spacing_expected), DEFAULT_EXPECTED_MEMORY)); critical_memory = static_cast( - std::max(std::min(device_local_memory - min_vacancy_critical, min_spacing_critical), + (std::max)((std::min)(device_local_memory - min_vacancy_critical, min_spacing_critical), DEFAULT_CRITICAL_MEMORY)); minimum_memory = static_cast((device_local_memory - mem_threshold) / 2); } else { @@ -586,8 +586,8 @@ std::optional TextureCache

::GetFlushArea(D area->end_address = cpu_addr + size; area->preemtive = true; } - area->start_address = std::min(area->start_address, image.cpu_addr); - area->end_address = std::max(area->end_address, image.cpu_addr_end); + area->start_address = (std::min)(area->start_address, image.cpu_addr); + area->end_address = (std::max)(area->end_address, image.cpu_addr_end); for (auto image_view_id : image.image_view_ids) { auto& image_view = slot_image_views[image_view_id]; image_view.flags |= ImageViewFlagBits::PreemtiveDownload; @@ -1273,7 +1273,7 @@ u64 TextureCache

::GetScaledImageSizeBytes(const ImageBase& image) { const u64 down_shift = static_cast(Settings::values.resolution_info.down_shift + Settings::values.resolution_info.down_shift); const u64 image_size_bytes = - static_cast(std::max(image.guest_size_bytes, image.unswizzled_size_bytes)); + static_cast((std::max)(image.guest_size_bytes, image.unswizzled_size_bytes)); const u64 tentative_size = (image_size_bytes * scale_up) >> down_shift; const u64 fitted_size = Common::AlignUp(tentative_size, 1024); return fitted_size; @@ -1994,7 +1994,7 @@ void TextureCache

::RegisterImage(ImageId image_id) { ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Trying to register an already registered image"); image.flags |= ImageFlagBits::Registered; - u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); + u64 tentative_size = (std::max)(image.guest_size_bytes, image.unswizzled_size_bytes); if ((IsPixelFormatASTC(image.info.format) && True(image.flags & ImageFlagBits::AcceleratedUpload)) || True(image.flags & ImageFlagBits::Converted)) { @@ -2168,7 +2168,7 @@ void TextureCache

::DeleteImage(ImageId image_id, bool immediate_delete) { if (image.HasScaled()) { total_used_memory -= GetScaledImageSizeBytes(image); } - u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); + u64 tentative_size = (std::max)(image.guest_size_bytes, image.unswizzled_size_bytes); if ((IsPixelFormatASTC(image.info.format) && True(image.flags & ImageFlagBits::AcceleratedUpload)) || True(image.flags & ImageFlagBits::Converted)) { @@ -2302,7 +2302,7 @@ void TextureCache

::SynchronizeAliases(ImageId image_id) { for (const AliasedImage& aliased : image.aliased_images) { ImageBase& aliased_image = slot_images[aliased.id]; if (image.modification_tick < aliased_image.modification_tick) { - most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); + most_recent_tick = (std::max)(most_recent_tick, aliased_image.modification_tick); aliased_images.push_back(&aliased); any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled); any_modified |= True(aliased_image.flags & ImageFlagBits::GpuModified); @@ -2443,9 +2443,9 @@ void TextureCache

::CopyImage(ImageId dst_id, ImageId src_id, std::vector::max()}; + static constexpr size_t UNSET_CHANNEL{(std::numeric_limits::max)()}; static constexpr s64 TARGET_THRESHOLD = 4_GiB; static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB; diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index e3faa5bf95..ede451b166 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -327,8 +327,8 @@ template } const SubresourceExtent resources = new_info.resources; return SubresourceExtent{ - .levels = std::max(resources.levels, info.resources.levels), - .layers = std::max(resources.layers, info.resources.layers), + .levels = (std::max)(resources.levels, info.resources.levels), + .layers = (std::max)(resources.layers, info.resources.layers), }; } @@ -354,7 +354,7 @@ template return std::nullopt; } return SubresourceExtent{ - .levels = std::max(new_info.resources.levels, info.resources.levels + base.level), + .levels = (std::max)(new_info.resources.levels, info.resources.levels + base.level), .layers = 1, }; } @@ -388,8 +388,8 @@ template return std::nullopt; } return SubresourceExtent{ - .levels = std::max(new_info.resources.levels, info.resources.levels + base.level), - .layers = std::max(new_info.resources.layers, info.resources.layers + base.layer), + .levels = (std::max)(new_info.resources.levels, info.resources.levels + base.level), + .layers = (std::max)(new_info.resources.layers, info.resources.layers + base.layer), }; } @@ -439,14 +439,14 @@ template } layers = 1; } else { - layers = std::max(resources.layers, info.resources.layers + base->layer); + layers = (std::max)(resources.layers, info.resources.layers + base->layer); } return OverlapResult{ .gpu_addr = overlap.gpu_addr, .cpu_addr = overlap.cpu_addr, .resources = { - .levels = std::max(resources.levels + base->level, info.resources.levels), + .levels = (std::max)(resources.levels + base->level, info.resources.levels), .layers = layers, }, }; diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index fef0be31d8..85fd06957e 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -1291,7 +1291,7 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const u32*& colorValues, case 1: { READ_UINT_VALUES(2) u32 L0 = (v[0] >> 2) | (v[1] & 0xC0); - u32 L1 = std::min(L0 + (v[1] & 0x3F), 0xFFU); + u32 L1 = (std::min)(L0 + (v[1] & 0x3F), 0xFFU); ep1 = Pixel(0xFF, L0, L0, L0); ep2 = Pixel(0xFF, L1, L1, L1); } break; @@ -1522,7 +1522,7 @@ static void DecompressBlock(std::span inBuf, const u32 blockWidth, // Read color data... u32 colorDataBits = remainingBits; while (remainingBits > 0) { - u32 nb = std::min(remainingBits, 8); + u32 nb = (std::min)(remainingBits, 8); u32 b = strm.ReadBits(nb); colorEndpointStream.WriteBits(b, nb); remainingBits -= 8; @@ -1603,7 +1603,7 @@ static void DecompressBlock(std::span inBuf, const u32 blockWidth, texelWeightData[clearByteStart - 1] &= static_cast((1 << (weightParams.GetPackedBitSize() % 8)) - 1); std::memset(texelWeightData.data() + clearByteStart, 0, - std::min(16U - clearByteStart, 16U)); + (std::min)(16U - clearByteStart, 16U)); } IntegerEncodedVector texelWeightValues; @@ -1674,8 +1674,8 @@ void Decompress(std::span data, uint32_t width, uint32_t height, std::array uncompData; DecompressBlock(blockPtr, block_width, block_height, uncompData); - u32 decompWidth = std::min(block_width, width - x); - u32 decompHeight = std::min(block_height, height - y); + u32 decompWidth = (std::min)(block_width, width - x); + u32 decompHeight = (std::min)(block_height, height - y); const std::span outRow = output.subspan(depth_offset + (y * width + x) * 4); for (u32 h = 0; h < decompHeight; ++h) { diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 95bcdd37b2..12e4ddf165 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -111,13 +111,13 @@ void SwizzleSubrectImpl(std::span output, std::span input, u32 wid const u32 x_shift = GOB_SIZE_SHIFT + block_height + block_depth; u32 unprocessed_lines = num_lines; - u32 extent_y = std::min(num_lines, height - origin_y); + u32 extent_y = (std::min)(num_lines, height - origin_y); for (u32 slice = 0; slice < depth; ++slice) { const u32 z = slice + origin_z; const u32 offset_z = (z >> block_depth) * slice_size + ((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height)); - const u32 lines_in_y = std::min(unprocessed_lines, extent_y); + const u32 lines_in_y = (std::min)(unprocessed_lines, extent_y); for (u32 line = 0; line < lines_in_y; ++line) { const u32 y = line + origin_y; const u32 swizzled_y = pdep(y); @@ -180,7 +180,7 @@ void UnswizzleTexture(std::span output, std::span input, u32 bytes u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) { const u32 stride = Common::AlignUpLog2(width, stride_alignment) * bytes_per_pixel; - const u32 new_bpp = std::min(4U, static_cast(std::countr_zero(width * bytes_per_pixel))); + const u32 new_bpp = (std::min)(4U, static_cast(std::countr_zero(width * bytes_per_pixel))); width = (width * bytes_per_pixel) >> new_bpp; bytes_per_pixel = 1U << new_bpp; Swizzle(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, @@ -191,7 +191,7 @@ void SwizzleTexture(std::span output, std::span input, u32 bytes_p u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) { const u32 stride = Common::AlignUpLog2(width, stride_alignment) * bytes_per_pixel; - const u32 new_bpp = std::min(4U, static_cast(std::countr_zero(width * bytes_per_pixel))); + const u32 new_bpp = (std::min)(4U, static_cast(std::countr_zero(width * bytes_per_pixel))); width = (width * bytes_per_pixel) >> new_bpp; bytes_per_pixel = 1U << new_bpp; Swizzle(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp index 39c08b5ae1..2798d5839f 100644 --- a/src/video_core/textures/texture.cpp +++ b/src/video_core/textures/texture.cpp @@ -75,7 +75,7 @@ float TSCEntry::MaxAnisotropy() const noexcept { if (anisotropic_settings == Settings::AnisotropyMode::Automatic) { added_anisotropic = Settings::values.resolution_info.up_scale >> Settings::values.resolution_info.down_shift; - added_anisotropic = std::max(added_anisotropic - 1, 0); + added_anisotropic = (std::max)(added_anisotropic - 1, 0); } else { added_anisotropic = static_cast(Settings::values.max_anisotropy.GetValue()) - 1U; } diff --git a/src/video_core/textures/workers.cpp b/src/video_core/textures/workers.cpp index a71c305f49..01aa716e11 100644 --- a/src/video_core/textures/workers.cpp +++ b/src/video_core/textures/workers.cpp @@ -6,7 +6,7 @@ namespace Tegra::Texture { Common::ThreadWorker& GetThreadWorkers() { - static Common::ThreadWorker workers{std::max(std::thread::hardware_concurrency(), 2U) / 2, + static Common::ThreadWorker workers{(std::max)(std::thread::hardware_concurrency(), 2U) / 2, "ImageTranscode"}; return workers; diff --git a/src/video_core/transform_feedback.cpp b/src/video_core/transform_feedback.cpp index 1f353d2df0..5dda1ffafc 100644 --- a/src/video_core/transform_feedback.cpp +++ b/src/video_core/transform_feedback.cpp @@ -104,8 +104,8 @@ std::pair, u32> MakeTransformF } } xfb[attribute] = varying; - count = std::max(count, attribute); - highest = std::max(highest, (base_offset + varying.components) * 4); + count = (std::max)(count, attribute); + highest = (std::max)(highest, (base_offset + varying.components) * 4); } UNIMPLEMENTED_IF(highest != layout.stride); } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 6fdf1e7874..6d7c33099b 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -699,9 +699,9 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_WARNING(Render_Vulkan, "MVK driver breaks when using more than 16 vertex attributes/bindings"); properties.properties.limits.maxVertexInputAttributes = - std::min(properties.properties.limits.maxVertexInputAttributes, 16U); + (std::min)(properties.properties.limits.maxVertexInputAttributes, 16U); properties.properties.limits.maxVertexInputBindings = - std::min(properties.properties.limits.maxVertexInputBindings, 16U); + (std::min)(properties.properties.limits.maxVertexInputBindings, 16U); } if (is_turnip) { diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp index 4ab420afea..675dede61c 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp @@ -136,7 +136,7 @@ namespace Vulkan { if (vmaMapMemory(allocator, allocation, &mapped_ptr) != VK_SUCCESS) return {}; } const size_t n = static_cast(std::min(size, - std::numeric_limits::max())); + (std::numeric_limits::max)())); return std::span{static_cast(mapped_ptr), n}; } @@ -149,7 +149,7 @@ namespace Vulkan { const_cast(this)->mapped_ptr = p; } const size_t n = static_cast(std::min(size, - std::numeric_limits::max())); + (std::numeric_limits::max)())); return std::span{static_cast(mapped_ptr), n}; } diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 8fd0bff6af..6501094f05 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -860,7 +860,7 @@ public: /// Set object name. void SetObjectNameEXT(const char* name) const; - VkResult Wait(u64 timeout = std::numeric_limits::max()) const noexcept { + VkResult Wait(u64 timeout = (std::numeric_limits::max)()) const noexcept { return dld->vkWaitForFences(owner, 1, &handle, true, timeout); } @@ -961,7 +961,7 @@ public: * @param timeout Time in nanoseconds to timeout * @return True on successful wait, false on timeout */ - bool Wait(u64 value, u64 timeout = std::numeric_limits::max()) const { + bool Wait(u64 value, u64 timeout = (std::numeric_limits::max)()) const { const VkSemaphoreWaitInfo wait_info{ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, .pNext = nullptr, diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt index 38b7b0eec7..d663f6c282 100644 --- a/src/yuzu/CMakeLists.txt +++ b/src/yuzu/CMakeLists.txt @@ -256,7 +256,7 @@ if (YUZU_CRASH_DUMPS) target_compile_definitions(yuzu PRIVATE YUZU_CRASH_DUMPS) endif() -if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") +if (CXX_CLANG) target_compile_definitions(yuzu PRIVATE $<$,15>:CANNOT_EXPLICITLY_INSTANTIATE> ) diff --git a/src/yuzu/about_dialog.cpp b/src/yuzu/about_dialog.cpp index 5b6e32149d..c8edb90268 100644 --- a/src/yuzu/about_dialog.cpp +++ b/src/yuzu/about_dialog.cpp @@ -11,14 +11,15 @@ AboutDialog::AboutDialog(QWidget* parent) : QDialog(parent) , ui{std::make_unique()} { - const auto description = std::string(Common::g_build_version); - const auto build_id = std::string(Common::g_build_id); + static const std::string description = std::string(Common::g_build_version); + static const std::string build_id = std::string(Common::g_build_id); + static const std::string compiler = std::string(Common::g_compiler_id); std::string yuzu_build; if (Common::g_is_dev_build) { - yuzu_build = fmt::format("Eden Nightly | {}-{}", description, build_id); + yuzu_build = fmt::format("Eden Nightly | {}-{} | {}", description, build_id, compiler); } else { - yuzu_build = fmt::format("Eden | {}", description); + yuzu_build = fmt::format("Eden | {} | {}", description, compiler); } const auto override_build = fmt::format(fmt::runtime( diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index b1ca497e32..63e7a74003 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -381,8 +381,8 @@ qreal GRenderWindow::windowPixelRatio() const { std::pair GRenderWindow::ScaleTouch(const QPointF& pos) const { const qreal pixel_ratio = windowPixelRatio(); - return {static_cast(std::max(std::round(pos.x() * pixel_ratio), qreal{0.0})), - static_cast(std::max(std::round(pos.y() * pixel_ratio), qreal{0.0}))}; + return {static_cast((std::max)(std::round(pos.x() * pixel_ratio), qreal{0.0})), + static_cast((std::max)(std::round(pos.y() * pixel_ratio), qreal{0.0}))}; } void GRenderWindow::closeEvent(QCloseEvent* event) { diff --git a/src/yuzu/configuration/configure_touch_from_button.cpp b/src/yuzu/configuration/configure_touch_from_button.cpp index a6237ab72f..2a4ae3bc89 100644 --- a/src/yuzu/configuration/configure_touch_from_button.cpp +++ b/src/yuzu/configuration/configure_touch_from_button.cpp @@ -484,8 +484,8 @@ void TouchScreenPreview::resizeEvent(QResizeEvent* event) { return; } - const int target_width = std::min(width(), height() * 4 / 3); - const int target_height = std::min(height(), width() * 3 / 4); + const int target_width = (std::min)(width(), height() * 4 / 3); + const int target_height = (std::min)(height(), width() * 3 / 4); if (target_width == width() && target_height == height()) { return; } diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp index 80dd90d876..1ecef4af92 100644 --- a/src/yuzu/game_list.cpp +++ b/src/yuzu/game_list.cpp @@ -490,7 +490,7 @@ void GameList::DonePopulating(const QStringList& watch_list) { // Also artificially caps the watcher to a certain number of directories constexpr int LIMIT_WATCH_DIRECTORIES = 5000; constexpr int SLICE_SIZE = 25; - int len = std::min(static_cast(watch_list.size()), LIMIT_WATCH_DIRECTORIES); + int len = (std::min)(static_cast(watch_list.size()), LIMIT_WATCH_DIRECTORIES); for (int i = 0; i < len; i += SLICE_SIZE) { watcher->addPaths(watch_list.mid(i, i + SLICE_SIZE)); QCoreApplication::processEvents(); diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 4604a7b904..c6e004813c 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -386,6 +386,7 @@ static void OverrideWindowsFont() { } #endif +#ifndef _WIN32 inline static bool isDarkMode() { #if QT_VERSION >= QT_VERSION_CHECK(6, 5, 0) const auto scheme = QGuiApplication::styleHints()->colorScheme(); @@ -397,6 +398,7 @@ inline static bool isDarkMode() { return text.lightness() > window.lightness(); #endif // QT_VERSION } +#endif // _WIN32 GMainWindow::GMainWindow(bool has_broken_vulkan) : ui{std::make_unique()}, system{std::make_unique()}, @@ -2473,7 +2475,7 @@ void GMainWindow::StoreRecentFile(const QString& filename) { void GMainWindow::UpdateRecentFiles() { const int num_recent_files = - std::min(static_cast(UISettings::values.recent_files.size()), max_recent_files_item); + (std::min)(static_cast(UISettings::values.recent_files.size()), max_recent_files_item); for (int i = 0; i < num_recent_files; i++) { const QString text = QStringLiteral("&%1. %2").arg(i + 1).arg( @@ -2652,7 +2654,7 @@ static bool RomFSRawCopy(size_t total_size, size_t& read_size, QProgressDialog& if ((new_timestamp - last_timestamp) > 33ms) { last_timestamp = new_timestamp; dialog.setValue( - static_cast(std::min(read_size, total_size) * 100 / total_size)); + static_cast((std::min)(read_size, total_size) * 100 / total_size)); QCoreApplication::processEvents(); } @@ -4115,7 +4117,7 @@ void GMainWindow::OnDecreaseVolume() { if (current_volume <= 6) { step = 1; } - Settings::values.volume.SetValue(std::max(current_volume - step, 0)); + Settings::values.volume.SetValue((std::max)(current_volume - step, 0)); UpdateVolumeUI(); } @@ -5020,14 +5022,15 @@ void GMainWindow::OnEmulatorUpdateAvailable() { void GMainWindow::UpdateWindowTitle(std::string_view title_name, std::string_view title_version, std::string_view gpu_vendor) { - const auto description = std::string(Common::g_build_version); - const auto build_id = std::string(Common::g_build_id); + static const std::string description = std::string(Common::g_build_version); + static const std::string build_id = std::string(Common::g_build_id); + static const std::string compiler = std::string(Common::g_compiler_id); std::string yuzu_title; if (Common::g_is_dev_build) { - yuzu_title = fmt::format("Eden Nightly | {}-{}", description, build_id); + yuzu_title = fmt::format("Eden Nightly | {}-{} | {}", description, build_id, compiler); } else { - yuzu_title = fmt::format("Eden | {}", description); + yuzu_title = fmt::format("Eden | {} | {}", description, compiler); } const auto override_title = @@ -5674,13 +5677,13 @@ void VolumeButton::wheelEvent(QWheelEvent* event) { if (num_steps > 0) { Settings::values.volume.SetValue( - std::min(200, Settings::values.volume.GetValue() + num_steps)); + (std::min)(200, Settings::values.volume.GetValue() + num_steps)); } else { Settings::values.volume.SetValue( - std::max(0, Settings::values.volume.GetValue() + num_steps)); + (std::max)(0, Settings::values.volume.GetValue() + num_steps)); } - scroll_multiplier = std::min(MaxMultiplier, scroll_multiplier * 2); + scroll_multiplier = (std::min)(MaxMultiplier, scroll_multiplier * 2); scroll_timer.start(100); // reset the multiplier if no scroll event occurs within 100 ms emit VolumeChanged(); @@ -5721,11 +5724,11 @@ static void SetHighDPIAttributes() { constexpr float minimum_width = 1350.0f; constexpr float minimum_height = 900.0f; - const float width_ratio = std::max(1.0f, real_width / minimum_width); - const float height_ratio = std::max(1.0f, real_height / minimum_height); + const float width_ratio = (std::max)(1.0f, real_width / minimum_width); + const float height_ratio = (std::max)(1.0f, real_height / minimum_height); // Get the lower of the 2 ratios and truncate, this is the maximum integer scale. - const float max_ratio = std::trunc(std::min(width_ratio, height_ratio)); + const float max_ratio = std::trunc((std::min)(width_ratio, height_ratio)); if (max_ratio > real_ratio) { QApplication::setHighDpiScaleFactorRoundingPolicy( diff --git a/src/yuzu/play_time_manager.cpp b/src/yuzu/play_time_manager.cpp index 2669e3a7ab..8317386816 100644 --- a/src/yuzu/play_time_manager.cpp +++ b/src/yuzu/play_time_manager.cpp @@ -168,7 +168,7 @@ QString ReadablePlayTime(qulonglong time_seconds) { if (time_seconds == 0) { return {}; } - const auto time_minutes = std::max(static_cast(time_seconds) / 60, 1.0); + const auto time_minutes = (std::max)(static_cast(time_seconds) / 60, 1.0); const auto time_hours = static_cast(time_seconds) / 3600; const bool is_minutes = time_minutes < 60; const char* unit = is_minutes ? "m" : "h"; diff --git a/src/yuzu/util/util.cpp b/src/yuzu/util/util.cpp index 551df7b4cd..844da5c401 100644 --- a/src/yuzu/util/util.cpp +++ b/src/yuzu/util/util.cpp @@ -30,7 +30,7 @@ QString ReadableByteSize(qulonglong size) { return QStringLiteral("0"); } - const int digit_groups = std::min(static_cast(std::log10(size) / std::log10(1024)), + const int digit_groups = (std::min)(static_cast(std::log10(size) / std::log10(1024)), static_cast(units.size())); return QStringLiteral("%L1 %2") .arg(size / std::pow(1024, digit_groups), 0, 'f', 1) diff --git a/tools/cpm-fetch.sh b/tools/cpm-fetch.sh index 648bbae1c8..5620996433 100755 --- a/tools/cpm-fetch.sh +++ b/tools/cpm-fetch.sh @@ -59,7 +59,7 @@ download_package() { if grep -e "patches" <<< "$JSON" > /dev/null; then PATCHES=$(jq -r '.patches | join(" ")' <<< "$JSON") for patch in $PATCHES; do - patch -p1 < "$ROOTDIR"/.patch/$package/$patch + patch --binary -p1 < "$ROOTDIR"/.patch/$package/$patch done fi @@ -118,6 +118,14 @@ do continue fi + VERSION=$(jq -r ".version" <<< "$JSON") + GIT_VERSION=$(jq -r ".git_version" <<< "$JSON") + TAG=$(jq -r ".tag" <<< "$JSON") + SHA=$(jq -r ".sha" <<< "$JSON") + + [ "$GIT_VERSION" == null ] && GIT_VERSION="$VERSION" + [ "$GIT_VERSION" == null ] && GIT_VERSION="$TAG" + # url parsing WOOOHOOHOHOOHOHOH URL=$(jq -r ".url" <<< "$JSON") REPO=$(jq -r ".repo" <<< "$JSON") @@ -173,7 +181,7 @@ do # key parsing KEY=$(jq -r ".key" <<< "$JSON") - if [ "$KEY" == null ]; then + if [ "$KEY" == null ]; then if [ "$SHA" != null ]; then KEY=$(cut -c1-4 - <<< "$SHA") elif [ "$GIT_VERSION" != null ]; then From 2502352180c6a96b2b544f3bbf41b5569af3441d Mon Sep 17 00:00:00 2001 From: Caio Oliveira Date: Wed, 10 Sep 2025 02:22:07 +0200 Subject: [PATCH 24/47] [core, desktop] "fixes" from building on mxe/x86_64-w64-mingw32 (#396) * well, i couldn't build the executable, but in anyway those build errors can come back later to bite our backs * include missing include * safeguard _MSC_VER only headers * saw some of those changes on another PR but I cant find it at moment Signed-off-by: Caio Oliveira Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/396 Reviewed-by: crueter Co-authored-by: Caio Oliveira Co-committed-by: Caio Oliveira --- src/core/file_sys/vfs/vfs_real.cpp | 6 +++++- src/core/hle/service/nifm/nifm.cpp | 3 +++ src/core/hle/service/nvnflinger/hardware_composer.cpp | 2 ++ src/core/internal_network/emu_net_state.cpp | 2 ++ src/core/internal_network/wifi_scanner.cpp | 2 ++ src/yuzu/main.cpp | 2 ++ 6 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/core/file_sys/vfs/vfs_real.cpp b/src/core/file_sys/vfs/vfs_real.cpp index 052684e9db..4199667171 100644 --- a/src/core/file_sys/vfs/vfs_real.cpp +++ b/src/core/file_sys/vfs/vfs_real.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -442,11 +445,12 @@ std::vector RealVfsDirectory::GetFiles() const { FileTimeStampRaw RealVfsDirectory::GetFileTimeStamp(std::string_view path_) const { const auto full_path = FS::SanitizePath(path + '/' + std::string(path_)); const auto fs_path = std::filesystem::path{FS::ToU8String(full_path)}; - struct stat file_status; #ifdef _WIN32 + struct _stat64 file_status; const auto stat_result = _wstat64(fs_path.c_str(), &file_status); #else + struct stat file_status; const auto stat_result = stat(fs_path.c_str(), &file_status); #endif diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp index 15c7d8d2c7..7d43677c6f 100644 --- a/src/core/hle/service/nifm/nifm.cpp +++ b/src/core/hle/service/nifm/nifm.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -32,8 +33,10 @@ #undef interface #include #pragma pop_macro("interface") +#ifdef _MSC_VER #pragma comment(lib, "wlanapi.lib") #endif +#endif namespace { diff --git a/src/core/hle/service/nvnflinger/hardware_composer.cpp b/src/core/hle/service/nvnflinger/hardware_composer.cpp index 7098f4709d..a262a3dcd5 100644 --- a/src/core/hle/service/nvnflinger/hardware_composer.cpp +++ b/src/core/hle/service/nvnflinger/hardware_composer.cpp @@ -4,6 +4,8 @@ // SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later +#include + #include #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" diff --git a/src/core/internal_network/emu_net_state.cpp b/src/core/internal_network/emu_net_state.cpp index 17fa58fa08..d6d1a70a60 100644 --- a/src/core/internal_network/emu_net_state.cpp +++ b/src/core/internal_network/emu_net_state.cpp @@ -10,8 +10,10 @@ #define NOMINMAX #include #include +#ifdef _MSC_VER #pragma comment(lib, "wlanapi.lib") #endif +#endif #include #include diff --git a/src/core/internal_network/wifi_scanner.cpp b/src/core/internal_network/wifi_scanner.cpp index f4b1738e69..127221099f 100644 --- a/src/core/internal_network/wifi_scanner.cpp +++ b/src/core/internal_network/wifi_scanner.cpp @@ -15,8 +15,10 @@ using namespace std::chrono_literals; #define NOMINMAX #include #include +#ifdef _MSC_VER #pragma comment(lib, "wlanapi.lib") #endif +#endif namespace Network { #ifdef ENABLE_WIFI_SCAN diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index c6e004813c..e23e9a6a48 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -178,7 +178,9 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual #include #include #include +#ifdef _MSC_VER #pragma comment(lib, "Dwmapi.lib") +#endif static inline void ApplyWindowsTitleBarDarkMode(HWND hwnd, bool enabled) { if (!hwnd) From 13ecc1e481cbb455ef89291ee9f2421386657dcb Mon Sep 17 00:00:00 2001 From: Marcin Serwin Date: Wed, 10 Sep 2025 18:36:42 +0200 Subject: [PATCH 25/47] [cmake] fix issues when using CPMUTIL_FORCE_SYSTEM (#399) Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/399 Reviewed-by: crueter Reviewed-by: MaranBr Co-authored-by: Marcin Serwin Co-committed-by: Marcin Serwin --- CMakeLists.txt | 7 +++++++ CMakeModules/Findsirit.cmake | 11 +++++++++++ externals/cpmfile.json | 1 + externals/nx_tzdb/CMakeLists.txt | 9 ++++++--- src/core/CMakeLists.txt | 2 +- src/dynarmic/src/dynarmic/CMakeLists.txt | 4 ++++ 6 files changed, 30 insertions(+), 4 deletions(-) create mode 100644 CMakeModules/Findsirit.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 144e77684e..6a9e15cfbd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -204,6 +204,8 @@ CMAKE_DEPENDENT_OPTION(YUZU_USE_FASTER_LD "Check if a faster linker is available CMAKE_DEPENDENT_OPTION(USE_SYSTEM_MOLTENVK "Use the system MoltenVK lib (instead of the bundled one)" OFF "APPLE" OFF) +set(YUZU_TZDB_PATH "" CACHE STRING "Path to a pre-downloaded timezone database") + set(DEFAULT_ENABLE_OPENSSL ON) if (ANDROID OR WIN32 OR APPLE OR PLATFORM_SUN) # - Windows defaults to the Schannel backend. @@ -465,6 +467,7 @@ else() find_package(Opus 1.3 MODULE REQUIRED) find_package(ZLIB 1.2 REQUIRED) find_package(zstd 1.5 REQUIRED MODULE) + find_package(Boost 1.79.0 REQUIRED headers context system fiber) if (YUZU_TESTS) find_package(Catch2 3.0.1 REQUIRED) @@ -596,6 +599,10 @@ find_package(libusb) find_package(VulkanMemoryAllocator) find_package(SPIRV-Tools) +if (ARCHITECTURE_x86 OR ARCHITECTURE_x86_64) + find_package(xbyak) +endif() + if (ENABLE_WEB_SERVICE) find_package(httplib) endif() diff --git a/CMakeModules/Findsirit.cmake b/CMakeModules/Findsirit.cmake new file mode 100644 index 0000000000..1611efaad8 --- /dev/null +++ b/CMakeModules/Findsirit.cmake @@ -0,0 +1,11 @@ +# SPDX-FileCopyrightText: 2025 Eden Emulator Project +# SPDX-License-Identifier: GPL-3.0-or-later + +include(FindPackageHandleStandardArgs) + +find_package(PkgConfig QUIET) +pkg_search_module(sirit QUIET IMPORTED_TARGET sirit) +find_package_handle_standard_args(sirit + REQUIRED_VARS sirit_LINK_LIBRARIES + VERSION_VAR sirit_VERSION +) diff --git a/externals/cpmfile.json b/externals/cpmfile.json index 57258f771b..65f50ffdfc 100644 --- a/externals/cpmfile.json +++ b/externals/cpmfile.json @@ -15,6 +15,7 @@ "repo": "eden-emulator/sirit", "sha": "db1f1e8ab5", "hash": "73eb3a042848c63a10656545797e85f40d142009dfb7827384548a385e1e28e1ac72f42b25924ce530d58275f8638554281e884d72f9c7aaf4ed08690a414b05", + "find_args": "MODULE", "options": [ "SIRIT_USE_SYSTEM_SPIRV_HEADERS ON" ] diff --git a/externals/nx_tzdb/CMakeLists.txt b/externals/nx_tzdb/CMakeLists.txt index 242e1e1fcf..a08c80f2bd 100644 --- a/externals/nx_tzdb/CMakeLists.txt +++ b/externals/nx_tzdb/CMakeLists.txt @@ -33,9 +33,12 @@ if (CAN_BUILD_NX_TZDB AND NOT YUZU_DOWNLOAD_TIME_ZONE_DATA) set(NX_TZDB_TZ_DIR "${NX_TZDB_BASE_DIR}/zoneinfo") endif() -# TODO(crueter): This is a terrible solution, but MSVC fails to link without it -# Need to investigate further but I still can't reproduce... -if (MSVC) +if(NOT YUZU_TZDB_PATH STREQUAL "") + set(NX_TZDB_BASE_DIR "${YUZU_TZDB_PATH}") + set(NX_TZDB_TZ_DIR "${NX_TZDB_BASE_DIR}/zoneinfo") +elseif (MSVC) + # TODO(crueter): This is a terrible solution, but MSVC fails to link without it + # Need to investigate further but I still can't reproduce... set(NX_TZDB_VERSION "250725") set(NX_TZDB_ARCHIVE "${CPM_SOURCE_CACHE}/nx_tzdb/${NX_TZDB_VERSION}.zip") diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 0be60b55c6..1e8e4ec07a 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -1200,7 +1200,7 @@ else() target_link_libraries(core PUBLIC Boost::headers) endif() -target_link_libraries(core PRIVATE fmt::fmt nlohmann_json::nlohmann_json RenderDoc::API mbedtls) +target_link_libraries(core PRIVATE fmt::fmt nlohmann_json::nlohmann_json RenderDoc::API mbedtls mbedcrypto) if (MINGW) target_link_libraries(core PRIVATE ${MSWSOCK_LIBRARY}) endif() diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index efae44d917..b74626bcd5 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -160,6 +160,10 @@ if ("A64" IN_LIST DYNARMIC_FRONTENDS) endif() if ("x86_64" IN_LIST ARCHITECTURE) + # Newer versions of xbyak (>= 7.25.0) have stricter checks that currently + # fail in dynarmic + target_compile_definitions(dynarmic PRIVATE XBYAK_STRICT_CHECK_MEM_REG_SIZE=0) + target_compile_definitions(dynarmic PRIVATE XBYAK_OLD_DISP_CHECK=1) target_link_libraries(dynarmic PRIVATE From 36b0c5860289e800f583752e8c8d97a1c7fb7c81 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 30 Aug 2025 05:54:53 +0000 Subject: [PATCH 26/47] [dynarmic] reduce opt pass latency Signed-off-by: lizzie --- src/dynarmic/docs/RegisterAllocator.md | 40 +- .../docs/ReturnStackBufferOptimization.md | 162 +- src/dynarmic/src/dynarmic/CMakeLists.txt | 199 +-- .../backend/arm64/a32_address_space.cpp | 18 +- .../backend/arm64/a64_address_space.cpp | 19 +- .../backend/riscv64/a32_address_space.cpp | 16 +- .../dynarmic/backend/x64/a32_interface.cpp | 16 +- .../dynarmic/backend/x64/a64_interface.cpp | 18 +- .../src/dynarmic/common/memory_pool.cpp | 13 - .../src/dynarmic/common/memory_pool.h | 61 - src/dynarmic/src/dynarmic/ir/basic_block.cpp | 7 +- src/dynarmic/src/dynarmic/ir/basic_block.h | 3 - src/dynarmic/src/dynarmic/ir/ir_emitter.cpp | 21 - .../ir/opt/a32_constant_memory_reads_pass.cpp | 70 - .../ir/opt/a32_get_set_elimination_pass.cpp | 382 ----- .../ir/opt/a64_callback_config_pass.cpp | 57 - .../ir/opt/a64_get_set_elimination_pass.cpp | 165 -- .../ir/opt/a64_merge_interpret_blocks.cpp | 57 - .../ir/opt/constant_propagation_pass.cpp | 559 ------ .../ir/opt/dead_code_elimination_pass.cpp | 23 - .../dynarmic/ir/opt/identity_removal_pass.cpp | 44 - src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h | 127 -- .../src/dynarmic/ir/opt/naming_pass.cpp | 18 - src/dynarmic/src/dynarmic/ir/opt/passes.h | 47 - .../src/dynarmic/ir/opt/polyfill_pass.cpp | 218 --- .../src/dynarmic/ir/opt/verification_pass.cpp | 51 - src/dynarmic/src/dynarmic/ir/opt_passes.cpp | 1502 +++++++++++++++++ src/dynarmic/src/dynarmic/ir/opt_passes.h | 34 + src/dynarmic/tests/A32/fuzz_thumb.cpp | 10 +- src/dynarmic/tests/A64/fuzz_with_unicorn.cpp | 2 +- src/dynarmic/tests/CMakeLists.txt | 146 +- src/dynarmic/tests/print_info.cpp | 2 +- 32 files changed, 1808 insertions(+), 2299 deletions(-) delete mode 100644 src/dynarmic/src/dynarmic/common/memory_pool.cpp delete mode 100644 src/dynarmic/src/dynarmic/common/memory_pool.h delete mode 100644 src/dynarmic/src/dynarmic/ir/ir_emitter.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/passes.h delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp delete mode 100644 src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp create mode 100644 src/dynarmic/src/dynarmic/ir/opt_passes.cpp create mode 100644 src/dynarmic/src/dynarmic/ir/opt_passes.h diff --git a/src/dynarmic/docs/RegisterAllocator.md b/src/dynarmic/docs/RegisterAllocator.md index fea6f19e6a..5a7210c791 100644 --- a/src/dynarmic/docs/RegisterAllocator.md +++ b/src/dynarmic/docs/RegisterAllocator.md @@ -16,19 +16,31 @@ Note that `Use`ing a value decrements its `use_count` by one. When the `use_coun The member functions on `RegAlloc` are just a combination of the above concepts. +The following registers are reserved for internal use and should NOT participate in register allocation: +- `%xmm0`, `%xmm1`, `%xmm2`: Used as scratch in exclusive memory access. +- `%rsp`: Stack pointer. + +The layout convenes `%r15` as the JIT state pointer - while it may be tempting to turn it into a synthetic pointer, keeping an entire register (out of 12 available) is preferable over inlining a directly computed immediate. + +Do NEVER modify `%r15`, we must make it clear that this register is "immutable" for the entirety of the JIT block duration. + ### `Scratch` - Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr) - Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm) +```c++ +Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr); +Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm); +``` At runtime, allocate one of the registers in `desired_locations`. You are free to modify the register. The register is discarded at the end of the allocation scope. ### Pure `Use` - Xbyak::Reg64 UseGpr(Argument& arg); - Xbyak::Xmm UseXmm(Argument& arg); - OpArg UseOpArg(Argument& arg); - void Use(Argument& arg, HostLoc host_loc); +```c++ +Xbyak::Reg64 UseGpr(Argument& arg); +Xbyak::Xmm UseXmm(Argument& arg); +OpArg UseOpArg(Argument& arg); +void Use(Argument& arg, HostLoc host_loc); +``` At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by which one of the above functions is called. `UseGpr` places it in an unused GPR, `UseXmm` places it @@ -39,9 +51,11 @@ This register **must not** have it's value changed. ### `UseScratch` - Xbyak::Reg64 UseScratchGpr(Argument& arg); - Xbyak::Xmm UseScratchXmm(Argument& arg); - void UseScratch(Argument& arg, HostLoc host_loc); +```c++ +Xbyak::Reg64 UseScratchGpr(Argument& arg); +Xbyak::Xmm UseScratchXmm(Argument& arg); +void UseScratch(Argument& arg, HostLoc host_loc); +``` At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by which one of the above functions is called. `UseScratchGpr` places it in an unused GPR, `UseScratchXmm` places it @@ -55,7 +69,9 @@ You are free to modify the value in the register. The register is discarded at t A `Define` is the defintion of a value. This is the only time when a value may be set. - void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg); +```c++ +void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg); +``` By calling `DefineValue`, you are stating that you wish to define the value for `inst`, and you have written the value to the specified register `reg`. @@ -64,7 +80,9 @@ value to the specified register `reg`. Adding a `Define` to an existing value. - void DefineValue(IR::Inst* inst, Argument& arg); +```c++ +void DefineValue(IR::Inst* inst, Argument& arg); +``` You are declaring that the value for `inst` is the same as the value for `arg`. No host machine instructions are emitted. diff --git a/src/dynarmic/docs/ReturnStackBufferOptimization.md b/src/dynarmic/docs/ReturnStackBufferOptimization.md index 6ffe41bcc6..0e72c3bce8 100644 --- a/src/dynarmic/docs/ReturnStackBufferOptimization.md +++ b/src/dynarmic/docs/ReturnStackBufferOptimization.md @@ -23,15 +23,17 @@ One complication dynarmic has is that a compiled block is not uniquely identifia the PC alone, but bits in the FPSCR and CPSR are also relevant. We resolve this by computing a 64-bit `UniqueHash` that is guaranteed to uniquely identify a block. - u64 LocationDescriptor::UniqueHash() const { - // This value MUST BE UNIQUE. - // This calculation has to match up with EmitX64::EmitTerminalPopRSBHint - u64 pc_u64 = u64(arm_pc) << 32; - u64 fpscr_u64 = u64(fpscr.Value()); - u64 t_u64 = cpsr.T() ? 1 : 0; - u64 e_u64 = cpsr.E() ? 2 : 0; - return pc_u64 | fpscr_u64 | t_u64 | e_u64; - } +```c++ +u64 LocationDescriptor::UniqueHash() const { + // This value MUST BE UNIQUE. + // This calculation has to match up with EmitX64::EmitTerminalPopRSBHint + u64 pc_u64 = u64(arm_pc) << 32; + u64 fpscr_u64 = u64(fpscr.Value()); + u64 t_u64 = cpsr.T() ? 1 : 0; + u64 e_u64 = cpsr.E() ? 2 : 0; + return pc_u64 | fpscr_u64 | t_u64 | e_u64; +} +``` ## Our implementation isn't actually a stack @@ -49,97 +51,107 @@ host addresses for the corresponding the compiled blocks. size of the real RSB in hardware (which has 3 entries). Larger RSBs than 8 showed degraded performance. - struct JitState { - // ... +```c++ +struct JitState { + // ... - static constexpr size_t RSBSize = 8; // MUST be a power of 2. - u32 rsb_ptr = 0; - std::array rsb_location_descriptors; - std::array rsb_codeptrs; - void ResetRSB(); + static constexpr size_t RSBSize = 8; // MUST be a power of 2. + u32 rsb_ptr = 0; + std::array rsb_location_descriptors; + std::array rsb_codeptrs; + void ResetRSB(); - // ... - }; + // ... +}; +``` ### RSB Push We insert our prediction at the insertion point iff the RSB doesn't already contain a prediction with the same `UniqueHash`. - void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) { - using namespace Xbyak::util; +```c++ +void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) { + using namespace Xbyak::util; - ASSERT(inst->GetArg(0).IsImmediate()); - u64 imm64 = inst->GetArg(0).GetU64(); + ASSERT(inst->GetArg(0).IsImmediate()); + u64 imm64 = inst->GetArg(0).GetU64(); - Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX}); - Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr(); - Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32(); - u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end() - ? u64(unique_hash_to_code_ptr[imm64]) - : u64(code->GetReturnFromRunCodeAddress()); + Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX}); + Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr(); + Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32(); + u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end() + ? u64(unique_hash_to_code_ptr[imm64]) + : u64(code->GetReturnFromRunCodeAddress()); - code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]); - code->add(index_reg, 1); - code->and_(index_reg, u32(JitState::RSBSize - 1)); + code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]); + code->add(index_reg, 1); + code->and_(index_reg, u32(JitState::RSBSize - 1)); - code->mov(loc_desc_reg, u64(imm64)); - CodePtr patch_location = code->getCurr(); - patch_unique_hash_locations[imm64].emplace_back(patch_location); - code->mov(code_ptr_reg, u64(code_ptr)); // This line has to match up with EmitX64::Patch. - code->EnsurePatchLocationSize(patch_location, 10); + code->mov(loc_desc_reg, u64(imm64)); + CodePtr patch_location = code->getCurr(); + patch_unique_hash_locations[imm64].emplace_back(patch_location); + code->mov(code_ptr_reg, u64(code_ptr)); // This line has to match up with EmitX64::Patch. + code->EnsurePatchLocationSize(patch_location, 10); - Xbyak::Label label; - for (size_t i = 0; i < JitState::RSBSize; ++i) { - code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); - code->je(label, code->T_SHORT); - } - - code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg); - code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg); - code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg); - code->L(label); + Xbyak::Label label; + for (size_t i = 0; i < JitState::RSBSize; ++i) { + code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); + code->je(label, code->T_SHORT); } + code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg); + code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg); + code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg); + code->L(label); +} +``` + In pseudocode: - for (i := 0 .. RSBSize-1) - if (rsb_location_descriptors[i] == imm64) - goto label; - rsb_ptr++; - rsb_ptr %= RSBSize; - rsb_location_desciptors[rsb_ptr] = imm64; //< The UniqueHash - rsb_codeptr[rsb_ptr] = /* codeptr corresponding to the UniqueHash */; - label: +```c++ + for (i := 0 .. RSBSize-1) + if (rsb_location_descriptors[i] == imm64) + goto label; + rsb_ptr++; + rsb_ptr %= RSBSize; + rsb_location_desciptors[rsb_ptr] = imm64; //< The UniqueHash + rsb_codeptr[rsb_ptr] = /* codeptr corresponding to the UniqueHash */; +label: +``` ## RSB Pop To check if a predicition is in the RSB, we linearly scan the RSB. - void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) { - using namespace Xbyak::util; +```c++ +void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) { + using namespace Xbyak::util; - // This calculation has to match up with IREmitter::PushRSB - code->mov(ecx, MJitStateReg(Arm::Reg::PC)); - code->shl(rcx, 32); - code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]); - code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]); - code->or_(rbx, rcx); + // This calculation has to match up with IREmitter::PushRSB + code->mov(ecx, MJitStateReg(Arm::Reg::PC)); + code->shl(rcx, 32); + code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]); + code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]); + code->or_(rbx, rcx); - code->mov(rax, u64(code->GetReturnFromRunCodeAddress())); - for (size_t i = 0; i < JitState::RSBSize; ++i) { - code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); - code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]); - } - - code->jmp(rax); + code->mov(rax, u64(code->GetReturnFromRunCodeAddress())); + for (size_t i = 0; i < JitState::RSBSize; ++i) { + code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]); + code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]); } + code->jmp(rax); +} +``` + In pseudocode: - rbx := ComputeUniqueHash() - rax := ReturnToDispatch - for (i := 0 .. RSBSize-1) - if (rbx == rsb_location_descriptors[i]) - rax = rsb_codeptrs[i] - goto rax \ No newline at end of file +```c++ +rbx := ComputeUniqueHash() +rax := ReturnToDispatch +for (i := 0 .. RSBSize-1) + if (rbx == rsb_location_descriptors[i]) + rax = rsb_codeptrs[i] +goto rax +``` diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index b74626bcd5..ee06ccf19a 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -56,8 +56,6 @@ add_library(dynarmic common/lut_from_list.h common/math_util.cpp common/math_util.h - common/memory_pool.cpp - common/memory_pool.h common/safe_ops.h common/spin_lock.h common/string_util.h @@ -78,7 +76,6 @@ add_library(dynarmic ir/basic_block.cpp ir/basic_block.h ir/cond.h - ir/ir_emitter.cpp ir/ir_emitter.h ir/location_descriptor.cpp ir/location_descriptor.h @@ -87,78 +84,59 @@ add_library(dynarmic ir/opcodes.cpp ir/opcodes.h ir/opcodes.inc - ir/opt/constant_propagation_pass.cpp - ir/opt/dead_code_elimination_pass.cpp - ir/opt/identity_removal_pass.cpp - ir/opt/ir_matcher.h - ir/opt/naming_pass.cpp - ir/opt/passes.h - ir/opt/polyfill_pass.cpp - ir/opt/verification_pass.cpp + ir/opt_passes.cpp + ir/opt_passes.h ir/terminal.h ir/type.cpp ir/type.h ir/value.cpp ir/value.h + # A32 + frontend/A32/a32_ir_emitter.cpp + frontend/A32/a32_ir_emitter.h + frontend/A32/a32_location_descriptor.cpp + frontend/A32/a32_location_descriptor.h + frontend/A32/decoder/arm.h + frontend/A32/decoder/arm.inc + frontend/A32/decoder/asimd.h + frontend/A32/decoder/asimd.inc + frontend/A32/decoder/thumb16.h + frontend/A32/decoder/thumb16.inc + frontend/A32/decoder/thumb32.h + frontend/A32/decoder/thumb32.inc + frontend/A32/decoder/vfp.h + frontend/A32/decoder/vfp.inc + frontend/A32/disassembler/disassembler.h + frontend/A32/disassembler/disassembler_arm.cpp + frontend/A32/disassembler/disassembler_thumb.cpp + frontend/A32/FPSCR.h + frontend/A32/ITState.h + frontend/A32/PSR.h + frontend/A32/translate/a32_translate.cpp + frontend/A32/translate/a32_translate.h + frontend/A32/translate/conditional_state.cpp + frontend/A32/translate/conditional_state.h + frontend/A32/translate/translate_arm.cpp + frontend/A32/translate/translate_thumb.cpp + interface/A32/a32.h + interface/A32/arch_version.h + interface/A32/config.h + interface/A32/coprocessor.h + interface/A32/coprocessor_util.h + interface/A32/disassembler.h + # A64 + frontend/A64/a64_ir_emitter.cpp + frontend/A64/a64_ir_emitter.h + frontend/A64/a64_location_descriptor.cpp + frontend/A64/a64_location_descriptor.h + frontend/A64/decoder/a64.h + frontend/A64/decoder/a64.inc + frontend/A64/translate/a64_translate.cpp + frontend/A64/translate/a64_translate.h + interface/A64/a64.h + interface/A64/config.h ) -if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic PRIVATE - frontend/A32/a32_ir_emitter.cpp - frontend/A32/a32_ir_emitter.h - frontend/A32/a32_location_descriptor.cpp - frontend/A32/a32_location_descriptor.h - frontend/A32/decoder/arm.h - frontend/A32/decoder/arm.inc - frontend/A32/decoder/asimd.h - frontend/A32/decoder/asimd.inc - frontend/A32/decoder/thumb16.h - frontend/A32/decoder/thumb16.inc - frontend/A32/decoder/thumb32.h - frontend/A32/decoder/thumb32.inc - frontend/A32/decoder/vfp.h - frontend/A32/decoder/vfp.inc - frontend/A32/disassembler/disassembler.h - frontend/A32/disassembler/disassembler_arm.cpp - frontend/A32/disassembler/disassembler_thumb.cpp - frontend/A32/FPSCR.h - frontend/A32/ITState.h - frontend/A32/PSR.h - frontend/A32/translate/a32_translate.cpp - frontend/A32/translate/a32_translate.h - frontend/A32/translate/conditional_state.cpp - frontend/A32/translate/conditional_state.h - frontend/A32/translate/translate_arm.cpp - frontend/A32/translate/translate_thumb.cpp - interface/A32/a32.h - interface/A32/arch_version.h - interface/A32/config.h - interface/A32/coprocessor.h - interface/A32/coprocessor_util.h - interface/A32/disassembler.h - ir/opt/a32_constant_memory_reads_pass.cpp - ir/opt/a32_get_set_elimination_pass.cpp - ) -endif() - -if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic PRIVATE - frontend/A64/a64_ir_emitter.cpp - frontend/A64/a64_ir_emitter.h - frontend/A64/a64_location_descriptor.cpp - frontend/A64/a64_location_descriptor.h - frontend/A64/decoder/a64.h - frontend/A64/decoder/a64.inc - frontend/A64/translate/a64_translate.cpp - frontend/A64/translate/a64_translate.h - interface/A64/a64.h - interface/A64/config.h - ir/opt/a64_callback_config_pass.cpp - ir/opt/a64_get_set_elimination_pass.cpp - ir/opt/a64_merge_interpret_blocks.cpp - ) -endif() - if ("x86_64" IN_LIST ARCHITECTURE) # Newer versions of xbyak (>= 7.25.0) have stricter checks that currently # fail in dynarmic @@ -215,29 +193,21 @@ if ("x86_64" IN_LIST ARCHITECTURE) common/spin_lock_x64.h common/x64_disassemble.cpp common/x64_disassemble.h + # A32 + backend/x64/a32_emit_x64.cpp + backend/x64/a32_emit_x64.h + backend/x64/a32_emit_x64_memory.cpp + backend/x64/a32_interface.cpp + backend/x64/a32_jitstate.cpp + backend/x64/a32_jitstate.h + # A64 + backend/x64/a64_emit_x64.cpp + backend/x64/a64_emit_x64.h + backend/x64/a64_emit_x64_memory.cpp + backend/x64/a64_interface.cpp + backend/x64/a64_jitstate.cpp + backend/x64/a64_jitstate.h ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "x86_64" - backend/x64/a32_emit_x64.cpp - backend/x64/a32_emit_x64.h - backend/x64/a32_emit_x64_memory.cpp - backend/x64/a32_interface.cpp - backend/x64/a32_jitstate.cpp - backend/x64/a32_jitstate.h - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "x86_64" - backend/x64/a64_emit_x64.cpp - backend/x64/a64_emit_x64.h - backend/x64/a64_emit_x64_memory.cpp - backend/x64/a64_interface.cpp - backend/x64/a64_jitstate.cpp - backend/x64/a64_jitstate.h - ) - endif() endif() if ("arm64" IN_LIST ARCHITECTURE) @@ -281,25 +251,17 @@ if ("arm64" IN_LIST ARCHITECTURE) backend/arm64/verbose_debugging_output.h common/spin_lock_arm64.cpp common/spin_lock_arm64.h + # A32 + backend/arm64/a32_address_space.cpp + backend/arm64/a32_address_space.h + backend/arm64/a32_core.h + backend/arm64/a32_interface.cpp + # A64 + backend/arm64/a64_address_space.cpp + backend/arm64/a64_address_space.h + backend/arm64/a64_core.h + backend/arm64/a64_interface.cpp ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "arm64" - backend/arm64/a32_address_space.cpp - backend/arm64/a32_address_space.h - backend/arm64/a32_core.h - backend/arm64/a32_interface.cpp - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_architecture_specific_sources(dynarmic "arm64" - backend/arm64/a64_address_space.cpp - backend/arm64/a64_address_space.h - backend/arm64/a64_core.h - backend/arm64/a64_interface.cpp - ) - endif() endif() if ("riscv" IN_LIST ARCHITECTURE) @@ -328,21 +290,14 @@ if ("riscv" IN_LIST ARCHITECTURE) backend/riscv64/reg_alloc.cpp backend/riscv64/reg_alloc.h backend/riscv64/stack_layout.h + # A32 + backend/riscv64/a32_address_space.cpp + backend/riscv64/a32_address_space.h + backend/riscv64/a32_core.h + backend/riscv64/a32_interface.cpp + backend/riscv64/code_block.h ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic PRIVATE - backend/riscv64/a32_address_space.cpp - backend/riscv64/a32_address_space.h - backend/riscv64/a32_core.h - backend/riscv64/a32_interface.cpp - backend/riscv64/code_block.h - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture") - endif() + message(FATAL_ERROR "TODO: Unimplemented frontend for this host architecture") endif() if (WIN32) diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp index bbdf925ff4..8e48fa3687 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp @@ -16,7 +16,7 @@ #include "dynarmic/frontend/A32/translate/a32_translate.h" #include "dynarmic/interface/A32/config.h" #include "dynarmic/interface/exclusive_monitor.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::Backend::Arm64 { @@ -163,21 +163,7 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf) IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); - - Optimization::PolyfillPass(ir_block, {}); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) { - Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true}); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - Optimization::IdentityRemovalPass(ir_block); - Optimization::VerificationPass(ir_block); - + Optimization::Optimize(ir_block, conf, {}); return ir_block; } diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp index d4fe9a9cb7..2b50ad9ea3 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -15,7 +15,7 @@ #include "dynarmic/frontend/A64/translate/a64_translate.h" #include "dynarmic/interface/A64/config.h" #include "dynarmic/interface/exclusive_monitor.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::Backend::Arm64 { @@ -331,22 +331,7 @@ IR::Block A64AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }; IR::Block ir_block = A64::Translate(A64::LocationDescriptor{descriptor}, get_code, {conf.define_unpredictable_behaviour, conf.wall_clock_cntpct}); - - Optimization::A64CallbackConfigPass(ir_block, conf); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) { - Optimization::A64GetSetElimination(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) { - Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks); - } - Optimization::VerificationPass(ir_block); - + Optimization::Optimize(ir_block, conf, {}); return ir_block; } diff --git a/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp b/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp index efa211618b..0bb9591411 100644 --- a/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/riscv64/a32_address_space.cpp @@ -15,7 +15,7 @@ #include "dynarmic/backend/riscv64/stack_layout.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/frontend/A32/translate/a32_translate.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::Backend::RV64 { @@ -28,19 +28,7 @@ A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf) IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const { IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); - - Optimization::PolyfillPass(ir_block, {}); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) { - Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true}); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - Optimization::VerificationPass(ir_block); - + Optimization::Optimize(ir_block, conf, {}); return ir_block; } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp b/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp index b116ec180e..382eb70f3f 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_interface.cpp @@ -29,7 +29,7 @@ #include "dynarmic/interface/A32/a32.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/location_descriptor.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::A32 { @@ -217,19 +217,7 @@ private: block_of_code.EnsureMemoryCommitted(MINIMUM_REMAINING_CODESIZE); IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions}); - Optimization::PolyfillPass(ir_block, polyfill_options); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) { - Optimization::A32GetSetElimination(ir_block, {.convert_nz_to_nzc = true}); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - Optimization::IdentityRemovalPass(ir_block); - Optimization::VerificationPass(ir_block); + Optimization::Optimize(ir_block, conf, polyfill_options); return emitter.Emit(ir_block); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp index ddd2327395..c65b582982 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp @@ -25,7 +25,7 @@ #include "dynarmic/frontend/A64/translate/a64_translate.h" #include "dynarmic/interface/A64/a64.h" #include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" namespace Dynarmic::A64 { @@ -275,21 +275,7 @@ private: const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); }; IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code, {conf.define_unpredictable_behaviour, conf.wall_clock_cntpct}); - Optimization::PolyfillPass(ir_block, polyfill_options); - Optimization::A64CallbackConfigPass(ir_block, conf); - Optimization::NamingPass(ir_block); - if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) { - Optimization::A64GetSetElimination(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::ConstProp)) { - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - } - if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) { - Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks); - } - Optimization::VerificationPass(ir_block); + Optimization::Optimize(ir_block, conf, polyfill_options); return emitter.Emit(ir_block).entrypoint; } diff --git a/src/dynarmic/src/dynarmic/common/memory_pool.cpp b/src/dynarmic/src/dynarmic/common/memory_pool.cpp deleted file mode 100644 index f41dd92af5..0000000000 --- a/src/dynarmic/src/dynarmic/common/memory_pool.cpp +++ /dev/null @@ -1,13 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/common/memory_pool.h" - -#include - -namespace Dynarmic::Common { - - -} // namespace Dynarmic::Common diff --git a/src/dynarmic/src/dynarmic/common/memory_pool.h b/src/dynarmic/src/dynarmic/common/memory_pool.h deleted file mode 100644 index c99316e107..0000000000 --- a/src/dynarmic/src/dynarmic/common/memory_pool.h +++ /dev/null @@ -1,61 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -#include -#include - -namespace Dynarmic::Common { - -/// @tparam object_size Byte-size of objects to construct -/// @tparam slab_size Number of objects to have per slab -template -class Pool { -public: - inline Pool() noexcept { - AllocateNewSlab(); - } - inline ~Pool() noexcept { - std::free(current_slab); - for (char* slab : slabs) { - std::free(slab); - } - } - - Pool(const Pool&) = delete; - Pool(Pool&&) = delete; - - Pool& operator=(const Pool&) = delete; - Pool& operator=(Pool&&) = delete; - - /// @brief Returns a pointer to an `object_size`-bytes block of memory. - [[nodiscard]] void* Alloc() noexcept { - if (remaining == 0) { - slabs.push_back(current_slab); - AllocateNewSlab(); - } - void* ret = static_cast(current_ptr); - current_ptr += object_size; - remaining--; - return ret; - } -private: - /// @brief Allocates a completely new memory slab. - /// Used when an entirely new slab is needed - /// due the current one running out of usable space. - void AllocateNewSlab() noexcept { - current_slab = static_cast(std::malloc(object_size * slab_size)); - current_ptr = current_slab; - remaining = slab_size; - } - - std::vector slabs; - char* current_slab = nullptr; - char* current_ptr = nullptr; - size_t remaining = 0; -}; - -} // namespace Dynarmic::Common diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.cpp b/src/dynarmic/src/dynarmic/ir/basic_block.cpp index b00ab3cb20..a13a7ebfc9 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.cpp +++ b/src/dynarmic/src/dynarmic/ir/basic_block.cpp @@ -15,8 +15,6 @@ #include #include "dynarmic/common/assert.h" - -#include "dynarmic/common/memory_pool.h" #include "dynarmic/frontend/A32/a32_types.h" #include "dynarmic/frontend/A64/a64_types.h" #include "dynarmic/ir/cond.h" @@ -27,8 +25,7 @@ namespace Dynarmic::IR { Block::Block(const LocationDescriptor& location) : location{location}, end_location{location}, - cond{Cond::AL}, - instruction_alloc_pool{std::make_unique>()} + cond{Cond::AL} { } @@ -40,7 +37,7 @@ Block::Block(const LocationDescriptor& location) /// @param args A sequence of Value instances used as arguments for the instruction. /// @returns Iterator to the newly created instruction. Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list args) noexcept { - IR::Inst* inst = new (instruction_alloc_pool->Alloc()) IR::Inst(opcode); + IR::Inst* inst = new IR::Inst(opcode); DEBUG_ASSERT(args.size() == inst->NumArgs()); std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable { inst->SetArg(index, arg); diff --git a/src/dynarmic/src/dynarmic/ir/basic_block.h b/src/dynarmic/src/dynarmic/ir/basic_block.h index e88dc92fc4..2f2d9ab6de 100644 --- a/src/dynarmic/src/dynarmic/ir/basic_block.h +++ b/src/dynarmic/src/dynarmic/ir/basic_block.h @@ -21,7 +21,6 @@ #include "dynarmic/ir/terminal.h" #include "dynarmic/ir/value.h" #include "dynarmic/ir/dense_list.h" -#include "dynarmic/common/memory_pool.h" namespace Dynarmic::IR { @@ -174,8 +173,6 @@ private: LocationDescriptor end_location; /// Conditional to pass in order to execute this block Cond cond; - /// Memory pool for instruction list - std::unique_ptr> instruction_alloc_pool; /// Terminal instruction of this block. Terminal terminal = Term::Invalid{}; /// Number of cycles this block takes to execute if the conditional fails. diff --git a/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp b/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp deleted file mode 100644 index a8ef7e2989..0000000000 --- a/src/dynarmic/src/dynarmic/ir/ir_emitter.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/ir/ir_emitter.h" - -#include - -#include "dynarmic/common/assert.h" -#include - -#include "dynarmic/ir/opcodes.h" - -namespace Dynarmic::IR { - - -} // namespace Dynarmic::IR diff --git a/src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp deleted file mode 100644 index 9699f18345..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a32_constant_memory_reads_pass.cpp +++ /dev/null @@ -1,70 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/interface/A32/config.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { - for (auto& inst : block) { - switch (inst.GetOpcode()) { - case IR::Opcode::A32ReadMemory8: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u8 value_from_memory = cb->MemoryRead8(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - case IR::Opcode::A32ReadMemory16: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u16 value_from_memory = cb->MemoryRead16(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - case IR::Opcode::A32ReadMemory32: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u32 value_from_memory = cb->MemoryRead32(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - case IR::Opcode::A32ReadMemory64: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u64 value_from_memory = cb->MemoryRead64(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); - } - break; - } - default: - break; - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp deleted file mode 100644 index 499b38b120..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp +++ /dev/null @@ -1,382 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include -#include -#include - -#include "dynarmic/common/assert.h" -#include "dynarmic/common/common_types.h" - -#include "dynarmic/frontend/A32/a32_ir_emitter.h" -#include "dynarmic/frontend/A32/a32_types.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" -#include "dynarmic/ir/value.h" - -namespace Dynarmic::Optimization { - -namespace { - -void FlagsPass(IR::Block& block) { - using Iterator = std::reverse_iterator; - - struct FlagInfo { - bool set_not_required = false; - bool has_value_request = false; - Iterator value_request = {}; - }; - struct ValuelessFlagInfo { - bool set_not_required = false; - }; - ValuelessFlagInfo nzcvq; - ValuelessFlagInfo nzcv; - ValuelessFlagInfo nz; - FlagInfo c_flag; - FlagInfo ge; - - auto do_set = [&](FlagInfo& info, IR::Value value, Iterator inst) { - if (info.has_value_request) { - info.value_request->ReplaceUsesWith(value); - } - info.has_value_request = false; - - if (info.set_not_required) { - inst->Invalidate(); - } - info.set_not_required = true; - }; - - auto do_set_valueless = [&](ValuelessFlagInfo& info, Iterator inst) { - if (info.set_not_required) { - inst->Invalidate(); - } - info.set_not_required = true; - }; - - auto do_get = [](FlagInfo& info, Iterator inst) { - if (info.has_value_request) { - info.value_request->ReplaceUsesWith(IR::Value{&*inst}); - } - info.has_value_request = true; - info.value_request = inst; - }; - - A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; - - for (auto inst = block.rbegin(); inst != block.rend(); ++inst) { - auto const opcode = inst->GetOpcode(); - switch (opcode) { - case IR::Opcode::A32GetCFlag: { - do_get(c_flag, inst); - break; - } - case IR::Opcode::A32SetCpsrNZCV: { - if (c_flag.has_value_request) { - ir.SetInsertionPointBefore(inst.base()); // base is one ahead - IR::U1 c = ir.GetCFlagFromNZCV(IR::NZCV{inst->GetArg(0)}); - c_flag.value_request->ReplaceUsesWith(c); - c_flag.has_value_request = false; - break; // This case will be executed again because of the above - } - - do_set_valueless(nzcv, inst); - - nz = {.set_not_required = true}; - c_flag = {.set_not_required = true}; - break; - } - case IR::Opcode::A32SetCpsrNZCVRaw: { - if (c_flag.has_value_request) { - nzcv.set_not_required = false; - } - - do_set_valueless(nzcv, inst); - - nzcvq = {}; - nz = {.set_not_required = true}; - c_flag = {.set_not_required = true}; - break; - } - case IR::Opcode::A32SetCpsrNZCVQ: { - if (c_flag.has_value_request) { - nzcvq.set_not_required = false; - } - - do_set_valueless(nzcvq, inst); - - nzcv = {.set_not_required = true}; - nz = {.set_not_required = true}; - c_flag = {.set_not_required = true}; - break; - } - case IR::Opcode::A32SetCpsrNZ: { - do_set_valueless(nz, inst); - - nzcvq = {}; - nzcv = {}; - break; - } - case IR::Opcode::A32SetCpsrNZC: { - if (c_flag.has_value_request) { - c_flag.value_request->ReplaceUsesWith(inst->GetArg(1)); - c_flag.has_value_request = false; - } - - if (!inst->GetArg(1).IsImmediate() && inst->GetArg(1).GetInstRecursive()->GetOpcode() == IR::Opcode::A32GetCFlag) { - const auto nz_value = inst->GetArg(0); - - inst->Invalidate(); - - ir.SetInsertionPointBefore(inst.base()); - ir.SetCpsrNZ(IR::NZCV{nz_value}); - - nzcvq = {}; - nzcv = {}; - nz = {.set_not_required = true}; - break; - } - - if (nz.set_not_required && c_flag.set_not_required) { - inst->Invalidate(); - } else if (nz.set_not_required) { - inst->SetArg(0, IR::Value::EmptyNZCVImmediateMarker()); - } - nz.set_not_required = true; - c_flag.set_not_required = true; - - nzcv = {}; - nzcvq = {}; - break; - } - case IR::Opcode::A32SetGEFlags: { - do_set(ge, inst->GetArg(0), inst); - break; - } - case IR::Opcode::A32GetGEFlags: { - do_get(ge, inst); - break; - } - case IR::Opcode::A32SetGEFlagsCompressed: { - ge = {.set_not_required = true}; - break; - } - case IR::Opcode::A32OrQFlag: { - break; - } - default: { - if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { - nzcvq = {}; - nzcv = {}; - nz = {}; - c_flag = {}; - ge = {}; - } - break; - } - } - } -} - -void RegisterPass(IR::Block& block) { - using Iterator = IR::Block::iterator; - - struct RegInfo { - IR::Value register_value; - std::optional last_set_instruction; - }; - std::array reg_info; - - const auto do_get = [](RegInfo& info, Iterator get_inst) { - if (info.register_value.IsEmpty()) { - info.register_value = IR::Value(&*get_inst); - return; - } - get_inst->ReplaceUsesWith(info.register_value); - }; - - const auto do_set = [](RegInfo& info, IR::Value value, Iterator set_inst) { - if (info.last_set_instruction) { - (*info.last_set_instruction)->Invalidate(); - } - info = { - .register_value = value, - .last_set_instruction = set_inst, - }; - }; - - enum class ExtValueType { - Empty, - Single, - Double, - VectorDouble, - VectorQuad, - }; - struct ExtRegInfo { - ExtValueType value_type = {}; - IR::Value register_value; - std::optional last_set_instruction; - }; - std::array ext_reg_info; - - const auto do_ext_get = [](ExtValueType type, std::initializer_list> infos, Iterator get_inst) { - if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { - for (auto& info : infos) { - info.get() = { - .value_type = type, - .register_value = IR::Value(&*get_inst), - .last_set_instruction = std::nullopt, - }; - } - return; - } - get_inst->ReplaceUsesWith(std::data(infos)[0].get().register_value); - }; - - const auto do_ext_set = [](ExtValueType type, std::initializer_list> infos, IR::Value value, Iterator set_inst) { - if (std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { - if (std::data(infos)[0].get().last_set_instruction) { - (*std::data(infos)[0].get().last_set_instruction)->Invalidate(); - } - } - for (auto& info : infos) { - info.get() = { - .value_type = type, - .register_value = value, - .last_set_instruction = set_inst, - }; - } - }; - - // Location and version don't matter here. - A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; - - for (auto inst = block.begin(); inst != block.end(); ++inst) { - auto const opcode = inst->GetOpcode(); - switch (opcode) { - case IR::Opcode::A32GetRegister: { - const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); - ASSERT(reg != A32::Reg::PC); - const size_t reg_index = static_cast(reg); - do_get(reg_info[reg_index], inst); - break; - } - case IR::Opcode::A32SetRegister: { - const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); - if (reg == A32::Reg::PC) { - break; - } - const auto reg_index = static_cast(reg); - do_set(reg_info[reg_index], inst->GetArg(1), inst); - break; - } - case IR::Opcode::A32GetExtendedRegister32: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_get(ExtValueType::Single, {ext_reg_info[reg_index]}, inst); - break; - } - case IR::Opcode::A32SetExtendedRegister32: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_set(ExtValueType::Single, {ext_reg_info[reg_index]}, inst->GetArg(1), inst); - break; - } - case IR::Opcode::A32GetExtendedRegister64: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_get(ExtValueType::Double, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - inst); - break; - } - case IR::Opcode::A32SetExtendedRegister64: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - do_ext_set(ExtValueType::Double, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - inst->GetArg(1), - inst); - break; - } - case IR::Opcode::A32GetVector: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - if (A32::IsDoubleExtReg(reg)) { - do_ext_get(ExtValueType::VectorDouble, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - inst); - } else { - DEBUG_ASSERT(A32::IsQuadExtReg(reg)); - do_ext_get(ExtValueType::VectorQuad, - { - ext_reg_info[reg_index * 4 + 0], - ext_reg_info[reg_index * 4 + 1], - ext_reg_info[reg_index * 4 + 2], - ext_reg_info[reg_index * 4 + 3], - }, - inst); - } - break; - } - case IR::Opcode::A32SetVector: { - const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); - const size_t reg_index = A32::RegNumber(reg); - if (A32::IsDoubleExtReg(reg)) { - ir.SetInsertionPointAfter(inst); - const IR::U128 stored_value = ir.VectorZeroUpper(IR::U128{inst->GetArg(1)}); - do_ext_set(ExtValueType::VectorDouble, - { - ext_reg_info[reg_index * 2 + 0], - ext_reg_info[reg_index * 2 + 1], - }, - stored_value, - inst); - } else { - DEBUG_ASSERT(A32::IsQuadExtReg(reg)); - do_ext_set(ExtValueType::VectorQuad, - { - ext_reg_info[reg_index * 4 + 0], - ext_reg_info[reg_index * 4 + 1], - ext_reg_info[reg_index * 4 + 2], - ext_reg_info[reg_index * 4 + 3], - }, - inst->GetArg(1), - inst); - } - break; - } - default: { - if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { - reg_info = {}; - ext_reg_info = {}; - } - break; - } - } - } -} - -} // namespace - -void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) { - FlagsPass(block); - RegisterPass(block); -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp deleted file mode 100644 index 79d9769520..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2018 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/frontend/A64/a64_ir_emitter.h" -#include "dynarmic/interface/A64/config.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) { - if (conf.hook_data_cache_operations) { - return; - } - - for (auto& inst : block) { - if (inst.GetOpcode() != IR::Opcode::A64DataCacheOperationRaised) { - continue; - } - - const auto op = static_cast(inst.GetArg(1).GetU64()); - if (op == A64::DataCacheOperation::ZeroByVA) { - A64::IREmitter ir{block}; - ir.current_location = A64::LocationDescriptor{IR::LocationDescriptor{inst.GetArg(0).GetU64()}}; - ir.SetInsertionPointBefore(&inst); - - size_t bytes = 4 << static_cast(conf.dczid_el0 & 0b1111); - IR::U64 addr{inst.GetArg(2)}; - - const IR::U128 zero_u128 = ir.ZeroExtendToQuad(ir.Imm64(0)); - while (bytes >= 16) { - ir.WriteMemory128(addr, zero_u128, IR::AccType::DCZVA); - addr = ir.Add(addr, ir.Imm64(16)); - bytes -= 16; - } - - while (bytes >= 8) { - ir.WriteMemory64(addr, ir.Imm64(0), IR::AccType::DCZVA); - addr = ir.Add(addr, ir.Imm64(8)); - bytes -= 8; - } - - while (bytes >= 4) { - ir.WriteMemory32(addr, ir.Imm32(0), IR::AccType::DCZVA); - addr = ir.Add(addr, ir.Imm64(4)); - bytes -= 4; - } - } - inst.Invalidate(); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp deleted file mode 100644 index 53e3b27176..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a64_get_set_elimination_pass.cpp +++ /dev/null @@ -1,165 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/common/common_types.h" - -#include "dynarmic/frontend/A64/a64_types.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" -#include "dynarmic/ir/value.h" - -namespace Dynarmic::Optimization { - -void A64GetSetElimination(IR::Block& block) { - using Iterator = IR::Block::iterator; - - enum class TrackingType { - W, - X, - S, - D, - Q, - SP, - NZCV, - NZCVRaw, - }; - struct RegisterInfo { - IR::Value register_value; - TrackingType tracking_type; - bool set_instruction_present = false; - Iterator last_set_instruction; - }; - std::array reg_info; - std::array vec_info; - RegisterInfo sp_info; - RegisterInfo nzcv_info; - - const auto do_set = [&block](RegisterInfo& info, IR::Value value, Iterator set_inst, TrackingType tracking_type) { - if (info.set_instruction_present) { - info.last_set_instruction->Invalidate(); - block.Instructions().erase(info.last_set_instruction); - } - - info.register_value = value; - info.tracking_type = tracking_type; - info.set_instruction_present = true; - info.last_set_instruction = set_inst; - }; - - const auto do_get = [](RegisterInfo& info, Iterator get_inst, TrackingType tracking_type) { - const auto do_nothing = [&] { - info = {}; - info.register_value = IR::Value(&*get_inst); - info.tracking_type = tracking_type; - }; - - if (info.register_value.IsEmpty()) { - do_nothing(); - return; - } - - if (info.tracking_type == tracking_type) { - get_inst->ReplaceUsesWith(info.register_value); - return; - } - - do_nothing(); - }; - - for (auto inst = block.begin(); inst != block.end(); ++inst) { - auto const opcode = inst->GetOpcode(); - switch (opcode) { - case IR::Opcode::A64GetW: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_get(reg_info.at(index), inst, TrackingType::W); - break; - } - case IR::Opcode::A64GetX: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_get(reg_info.at(index), inst, TrackingType::X); - break; - } - case IR::Opcode::A64GetS: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_get(vec_info.at(index), inst, TrackingType::S); - break; - } - case IR::Opcode::A64GetD: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_get(vec_info.at(index), inst, TrackingType::D); - break; - } - case IR::Opcode::A64GetQ: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_get(vec_info.at(index), inst, TrackingType::Q); - break; - } - case IR::Opcode::A64GetSP: { - do_get(sp_info, inst, TrackingType::SP); - break; - } - case IR::Opcode::A64GetNZCVRaw: { - do_get(nzcv_info, inst, TrackingType::NZCVRaw); - break; - } - case IR::Opcode::A64SetW: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::W); - break; - } - case IR::Opcode::A64SetX: { - const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); - do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::X); - break; - } - case IR::Opcode::A64SetS: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::S); - break; - } - case IR::Opcode::A64SetD: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::D); - break; - } - case IR::Opcode::A64SetQ: { - const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); - do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::Q); - break; - } - case IR::Opcode::A64SetSP: { - do_set(sp_info, inst->GetArg(0), inst, TrackingType::SP); - break; - } - case IR::Opcode::A64SetNZCV: { - do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCV); - break; - } - case IR::Opcode::A64SetNZCVRaw: { - do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCVRaw); - break; - } - default: { - if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { - nzcv_info = {}; - } - if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { - reg_info = {}; - vec_info = {}; - sp_info = {}; - } - break; - } - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp b/src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp deleted file mode 100644 index 25b7ef0ff1..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/a64_merge_interpret_blocks.cpp +++ /dev/null @@ -1,57 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2018 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include -#include "dynarmic/common/common_types.h" - -#include "dynarmic/frontend/A64/a64_location_descriptor.h" -#include "dynarmic/frontend/A64/translate/a64_translate.h" -#include "dynarmic/interface/A64/config.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb) { - const auto is_interpret_instruction = [cb](A64::LocationDescriptor location) { - const auto instruction = cb->MemoryReadCode(location.PC()); - if (!instruction) - return false; - - IR::Block new_block{location}; - A64::TranslateSingleInstruction(new_block, location, *instruction); - - if (!new_block.Instructions().empty()) - return false; - - const IR::Terminal terminal = new_block.GetTerminal(); - if (auto term = boost::get(&terminal)) { - return term->next == location; - } - - return false; - }; - - IR::Terminal terminal = block.GetTerminal(); - auto term = boost::get(&terminal); - if (!term) - return; - - A64::LocationDescriptor location{term->next}; - size_t num_instructions = 1; - - while (is_interpret_instruction(location.AdvancePC(static_cast(num_instructions * 4)))) { - num_instructions++; - } - - term->num_instructions = num_instructions; - block.ReplaceTerminal(terminal); - block.CycleCount() += num_instructions - 1; -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp deleted file mode 100644 index 86ebca87d2..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp +++ /dev/null @@ -1,559 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/common/assert.h" -#include -#include -#include "dynarmic/common/common_types.h" - -#include "dynarmic/common/safe_ops.h" -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/ir_emitter.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -using Op = Dynarmic::IR::Opcode; - -namespace { - -// Tiny helper to avoid the need to store based off the opcode -// bit size all over the place within folding functions. -void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) { - if (is_32_bit) { - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); - } else { - inst.ReplaceUsesWith(IR::Value{value}); - } -} - -IR::Value Value(bool is_32_bit, u64 value) { - return is_32_bit ? IR::Value{static_cast(value)} : IR::Value{value}; -} - -template -bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) { - const auto lhs = inst.GetArg(0); - const auto rhs = inst.GetArg(1); - - const bool is_lhs_immediate = lhs.IsImmediate(); - const bool is_rhs_immediate = rhs.IsImmediate(); - - if (is_lhs_immediate && is_rhs_immediate) { - const u64 result = imm_fn(lhs.GetImmediateAsU64(), rhs.GetImmediateAsU64()); - ReplaceUsesWith(inst, is_32_bit, result); - return false; - } - - if (is_lhs_immediate && !is_rhs_immediate) { - const IR::Inst* rhs_inst = rhs.GetInstRecursive(); - if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->GetArg(1).IsImmediate()) { - const u64 combined = imm_fn(lhs.GetImmediateAsU64(), rhs_inst->GetArg(1).GetImmediateAsU64()); - inst.SetArg(0, rhs_inst->GetArg(0)); - inst.SetArg(1, Value(is_32_bit, combined)); - } else { - // Normalize - inst.SetArg(0, rhs); - inst.SetArg(1, lhs); - } - } - - if (!is_lhs_immediate && is_rhs_immediate) { - const IR::Inst* lhs_inst = lhs.GetInstRecursive(); - if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate()) { - const u64 combined = imm_fn(rhs.GetImmediateAsU64(), lhs_inst->GetArg(1).GetImmediateAsU64()); - inst.SetArg(0, lhs_inst->GetArg(0)); - inst.SetArg(1, Value(is_32_bit, combined)); - } - } - - return true; -} - -void FoldAdd(IR::Inst& inst, bool is_32_bit) { - const auto lhs = inst.GetArg(0); - const auto rhs = inst.GetArg(1); - const auto carry = inst.GetArg(2); - - if (lhs.IsImmediate() && !rhs.IsImmediate()) { - // Normalize - inst.SetArg(0, rhs); - inst.SetArg(1, lhs); - FoldAdd(inst, is_32_bit); - return; - } - - if (inst.HasAssociatedPseudoOperation()) { - return; - } - - if (!lhs.IsImmediate() && rhs.IsImmediate()) { - const IR::Inst* lhs_inst = lhs.GetInstRecursive(); - if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate() && lhs_inst->GetArg(2).IsImmediate()) { - const u64 combined = rhs.GetImmediateAsU64() + lhs_inst->GetArg(1).GetImmediateAsU64() + lhs_inst->GetArg(2).GetU1(); - if (combined == 0) { - inst.ReplaceUsesWith(lhs_inst->GetArg(0)); - return; - } - inst.SetArg(0, lhs_inst->GetArg(0)); - inst.SetArg(1, Value(is_32_bit, combined)); - return; - } - if (rhs.IsZero() && carry.IsZero()) { - inst.ReplaceUsesWith(lhs); - return; - } - } - - if (inst.AreAllArgsImmediates()) { - const u64 result = lhs.GetImmediateAsU64() + rhs.GetImmediateAsU64() + carry.GetU1(); - ReplaceUsesWith(inst, is_32_bit, result); - return; - } -} - -/// Folds AND operations based on the following: -/// -/// 1. imm_x & imm_y -> result -/// 2. x & 0 -> 0 -/// 3. 0 & y -> 0 -/// 4. x & y -> y (where x has all bits set to 1) -/// 5. x & y -> x (where y has all bits set to 1) -/// -void FoldAND(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a & b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - ReplaceUsesWith(inst, is_32_bit, 0); - } else if (rhs.HasAllBitsSet()) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -/// Folds byte reversal opcodes based on the following: -/// -/// 1. imm -> swap(imm) -/// -void FoldByteReverse(IR::Inst& inst, Op op) { - const auto operand = inst.GetArg(0); - - if (!operand.IsImmediate()) { - return; - } - - if (op == Op::ByteReverseWord) { - const u32 result = mcl::bit::swap_bytes_32(static_cast(operand.GetImmediateAsU64())); - inst.ReplaceUsesWith(IR::Value{result}); - } else if (op == Op::ByteReverseHalf) { - const u16 result = mcl::bit::swap_bytes_16(static_cast(operand.GetImmediateAsU64())); - inst.ReplaceUsesWith(IR::Value{result}); - } else { - const u64 result = mcl::bit::swap_bytes_64(operand.GetImmediateAsU64()); - inst.ReplaceUsesWith(IR::Value{result}); - } -} - -/// Folds division operations based on the following: -/// -/// 1. x / 0 -> 0 (NOTE: This is an ARM-specific behavior defined in the architecture reference manual) -/// 2. imm_x / imm_y -> result -/// 3. x / 1 -> x -/// -void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) { - const auto rhs = inst.GetArg(1); - - if (rhs.IsZero()) { - ReplaceUsesWith(inst, is_32_bit, 0); - return; - } - - const auto lhs = inst.GetArg(0); - if (lhs.IsImmediate() && rhs.IsImmediate()) { - if (is_signed) { - const s64 result = lhs.GetImmediateAsS64() / rhs.GetImmediateAsS64(); - ReplaceUsesWith(inst, is_32_bit, static_cast(result)); - } else { - const u64 result = lhs.GetImmediateAsU64() / rhs.GetImmediateAsU64(); - ReplaceUsesWith(inst, is_32_bit, result); - } - } else if (rhs.IsUnsignedImmediate(1)) { - inst.ReplaceUsesWith(IR::Value{lhs}); - } -} - -// Folds EOR operations based on the following: -// -// 1. imm_x ^ imm_y -> result -// 2. x ^ 0 -> x -// 3. 0 ^ y -> y -// -void FoldEOR(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a ^ b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -void FoldLeastSignificantByte(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); -} - -void FoldLeastSignificantHalf(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); -} - -void FoldLeastSignificantWord(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); -} - -void FoldMostSignificantBit(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - inst.ReplaceUsesWith(IR::Value{(operand.GetImmediateAsU64() >> 31) != 0}); -} - -void FoldMostSignificantWord(IR::Inst& inst) { - IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); - - if (!inst.AreAllArgsImmediates()) { - return; - } - - const auto operand = inst.GetArg(0); - if (carry_inst) { - carry_inst->ReplaceUsesWith(IR::Value{mcl::bit::get_bit<31>(operand.GetImmediateAsU64())}); - } - inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64() >> 32)}); -} - -// Folds multiplication operations based on the following: -// -// 1. imm_x * imm_y -> result -// 2. x * 0 -> 0 -// 3. 0 * y -> 0 -// 4. x * 1 -> x -// 5. 1 * y -> y -// -void FoldMultiply(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a * b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - ReplaceUsesWith(inst, is_32_bit, 0); - } else if (rhs.IsUnsignedImmediate(1)) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -// Folds NOT operations if the contained value is an immediate. -void FoldNOT(IR::Inst& inst, bool is_32_bit) { - const auto operand = inst.GetArg(0); - - if (!operand.IsImmediate()) { - return; - } - - const u64 result = ~operand.GetImmediateAsU64(); - ReplaceUsesWith(inst, is_32_bit, result); -} - -// Folds OR operations based on the following: -// -// 1. imm_x | imm_y -> result -// 2. x | 0 -> x -// 3. 0 | y -> y -// -void FoldOR(IR::Inst& inst, bool is_32_bit) { - if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a | b; })) { - const auto rhs = inst.GetArg(1); - if (rhs.IsZero()) { - inst.ReplaceUsesWith(inst.GetArg(0)); - } - } -} - -bool FoldShifts(IR::Inst& inst) { - IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); - - // The 32-bit variants can contain 3 arguments, while the - // 64-bit variants only contain 2. - if (inst.NumArgs() == 3 && !carry_inst) { - inst.SetArg(2, IR::Value(false)); - } - - const auto shift_amount = inst.GetArg(1); - - if (shift_amount.IsZero()) { - if (carry_inst) { - carry_inst->ReplaceUsesWith(inst.GetArg(2)); - } - inst.ReplaceUsesWith(inst.GetArg(0)); - return false; - } - - if (inst.NumArgs() == 3 && shift_amount.IsImmediate() && !shift_amount.IsZero()) { - inst.SetArg(2, IR::Value(false)); - } - - if (!inst.AreAllArgsImmediates() || carry_inst) { - return false; - } - - return true; -} - -void FoldSignExtendXToWord(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const s64 value = inst.GetArg(0).GetImmediateAsS64(); - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); -} - -void FoldSignExtendXToLong(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const s64 value = inst.GetArg(0).GetImmediateAsS64(); - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); -} - -void FoldSub(IR::Inst& inst, bool is_32_bit) { - if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { - return; - } - - const auto lhs = inst.GetArg(0); - const auto rhs = inst.GetArg(1); - const auto carry = inst.GetArg(2); - - const u64 result = lhs.GetImmediateAsU64() + (~rhs.GetImmediateAsU64()) + carry.GetU1(); - ReplaceUsesWith(inst, is_32_bit, result); -} - -void FoldZeroExtendXToWord(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const u64 value = inst.GetArg(0).GetImmediateAsU64(); - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); -} - -void FoldZeroExtendXToLong(IR::Inst& inst) { - if (!inst.AreAllArgsImmediates()) { - return; - } - - const u64 value = inst.GetArg(0).GetImmediateAsU64(); - inst.ReplaceUsesWith(IR::Value{value}); -} -} // Anonymous namespace - -void ConstantPropagation(IR::Block& block) { - for (auto& inst : block) { - const auto opcode = inst.GetOpcode(); - - switch (opcode) { - case Op::LeastSignificantWord: - FoldLeastSignificantWord(inst); - break; - case Op::MostSignificantWord: - FoldMostSignificantWord(inst); - break; - case Op::LeastSignificantHalf: - FoldLeastSignificantHalf(inst); - break; - case Op::LeastSignificantByte: - FoldLeastSignificantByte(inst); - break; - case Op::MostSignificantBit: - FoldMostSignificantBit(inst); - break; - case Op::IsZero32: - if (inst.AreAllArgsImmediates()) { - inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU32() == 0}); - } - break; - case Op::IsZero64: - if (inst.AreAllArgsImmediates()) { - inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU64() == 0}); - } - break; - case Op::LogicalShiftLeft32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, Safe::LogicalShiftLeft(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftLeft64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, Safe::LogicalShiftLeft(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftRight32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, Safe::LogicalShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftRight64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, Safe::LogicalShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::ArithmeticShiftRight32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::ArithmeticShiftRight64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::RotateRight32: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); - } - break; - case Op::RotateRight64: - if (FoldShifts(inst)) { - ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); - } - break; - case Op::LogicalShiftLeftMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() << (inst.GetArg(1).GetU32() & 0x1f)); - } - break; - case Op::LogicalShiftLeftMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() << (inst.GetArg(1).GetU64() & 0x3f)); - } - break; - case Op::LogicalShiftRightMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() >> (inst.GetArg(1).GetU32() & 0x1f)); - } - break; - case Op::LogicalShiftRightMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() >> (inst.GetArg(1).GetU64() & 0x3f)); - } - break; - case Op::ArithmeticShiftRightMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, static_cast(inst.GetArg(0).GetU32()) >> (inst.GetArg(1).GetU32() & 0x1f)); - } - break; - case Op::ArithmeticShiftRightMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, static_cast(inst.GetArg(0).GetU64()) >> (inst.GetArg(1).GetU64() & 0x3f)); - } - break; - case Op::RotateRightMasked32: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU32())); - } - break; - case Op::RotateRightMasked64: - if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU64())); - } - break; - case Op::Add32: - case Op::Add64: - FoldAdd(inst, opcode == Op::Add32); - break; - case Op::Sub32: - case Op::Sub64: - FoldSub(inst, opcode == Op::Sub32); - break; - case Op::Mul32: - case Op::Mul64: - FoldMultiply(inst, opcode == Op::Mul32); - break; - case Op::SignedDiv32: - case Op::SignedDiv64: - FoldDivide(inst, opcode == Op::SignedDiv32, true); - break; - case Op::UnsignedDiv32: - case Op::UnsignedDiv64: - FoldDivide(inst, opcode == Op::UnsignedDiv32, false); - break; - case Op::And32: - case Op::And64: - FoldAND(inst, opcode == Op::And32); - break; - case Op::Eor32: - case Op::Eor64: - FoldEOR(inst, opcode == Op::Eor32); - break; - case Op::Or32: - case Op::Or64: - FoldOR(inst, opcode == Op::Or32); - break; - case Op::Not32: - case Op::Not64: - FoldNOT(inst, opcode == Op::Not32); - break; - case Op::SignExtendByteToWord: - case Op::SignExtendHalfToWord: - FoldSignExtendXToWord(inst); - break; - case Op::SignExtendByteToLong: - case Op::SignExtendHalfToLong: - case Op::SignExtendWordToLong: - FoldSignExtendXToLong(inst); - break; - case Op::ZeroExtendByteToWord: - case Op::ZeroExtendHalfToWord: - FoldZeroExtendXToWord(inst); - break; - case Op::ZeroExtendByteToLong: - case Op::ZeroExtendHalfToLong: - case Op::ZeroExtendWordToLong: - FoldZeroExtendXToLong(inst); - break; - case Op::ByteReverseWord: - case Op::ByteReverseHalf: - case Op::ByteReverseDual: - FoldByteReverse(inst, opcode); - break; - default: - break; - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp deleted file mode 100644 index bda9f6efd1..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/dead_code_elimination_pass.cpp +++ /dev/null @@ -1,23 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void DeadCodeElimination(IR::Block& block) { - // We iterate over the instructions in reverse order. - // This is because removing an instruction reduces the number of uses for earlier instructions. - for (auto& inst : mcl::iterator::reverse(block)) { - if (!inst.HasUses() && !MayHaveSideEffects(inst.GetOpcode())) { - inst.Invalidate(); - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp deleted file mode 100644 index e87fcc335b..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp +++ /dev/null @@ -1,44 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2020 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -void IdentityRemovalPass(IR::Block& block) { - std::vector to_invalidate; - - auto iter = block.begin(); - while (iter != block.end()) { - IR::Inst& inst = *iter; - - const size_t num_args = inst.NumArgs(); - for (size_t i = 0; i < num_args; i++) { - while (true) { - IR::Value arg = inst.GetArg(i); - if (!arg.IsIdentity()) - break; - inst.SetArg(i, arg.GetInst()->GetArg(0)); - } - } - - if (inst.GetOpcode() == IR::Opcode::Identity || inst.GetOpcode() == IR::Opcode::Void) { - iter = block.Instructions().erase(inst); - to_invalidate.push_back(&inst); - } else { - ++iter; - } - } - - for (IR::Inst* inst : to_invalidate) { - inst->Invalidate(); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h b/src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h deleted file mode 100644 index 5eb1a55100..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/ir_matcher.h +++ /dev/null @@ -1,127 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2020 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/value.h" - -namespace Dynarmic::Optimization::IRMatcher { - -struct CaptureValue { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - return std::tuple(value); - } -}; - -struct CaptureInst { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - if (value.IsImmediate()) - return std::nullopt; - return std::tuple(value.GetInstRecursive()); - } -}; - -struct CaptureUImm { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - return std::tuple(value.GetImmediateAsU64()); - } -}; - -struct CaptureSImm { - using ReturnType = std::tuple; - - static std::optional Match(IR::Value value) { - return std::tuple(value.GetImmediateAsS64()); - } -}; - -template -struct UImm { - using ReturnType = std::tuple<>; - - static std::optional> Match(IR::Value value) { - if (value.GetImmediateAsU64() == Value) - return std::tuple(); - return std::nullopt; - } -}; - -template -struct SImm { - using ReturnType = std::tuple<>; - - static std::optional> Match(IR::Value value) { - if (value.GetImmediateAsS64() == Value) - return std::tuple(); - return std::nullopt; - } -}; - -template -struct Inst { -public: - using ReturnType = mp::concat, typename Args::ReturnType...>; - - static std::optional Match(const IR::Inst& inst) { - if (inst.GetOpcode() != Opcode) - return std::nullopt; - if (inst.HasAssociatedPseudoOperation()) - return std::nullopt; - return MatchArgs<0>(inst); - } - - static std::optional Match(IR::Value value) { - if (value.IsImmediate()) - return std::nullopt; - return Match(*value.GetInstRecursive()); - } - -private: - template - static auto MatchArgs(const IR::Inst& inst) -> std::optional>, std::tuple<>>>> { - if constexpr (I >= sizeof...(Args)) { - return std::tuple(); - } else { - using Arg = mp::get>; - - if (const auto arg = Arg::Match(inst.GetArg(I))) { - if (const auto rest = MatchArgs(inst)) { - return std::tuple_cat(*arg, *rest); - } - } - - return std::nullopt; - } - } -}; - -inline bool IsSameInst(std::tuple t) { - return std::get<0>(t) == std::get<1>(t); -} - -inline bool IsSameInst(std::tuple t) { - return std::get<0>(t) == std::get<1>(t) && std::get<0>(t) == std::get<2>(t); -} - -} // namespace Dynarmic::Optimization::IRMatcher diff --git a/src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp deleted file mode 100644 index a766bdc83f..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/naming_pass.cpp +++ /dev/null @@ -1,18 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2023 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/microinstruction.h" - -namespace Dynarmic::Optimization { - -void NamingPass(IR::Block& block) { - unsigned name = 1; - for (auto& inst : block) { - inst.SetName(name++); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/passes.h b/src/dynarmic/src/dynarmic/ir/opt/passes.h deleted file mode 100644 index 703145b556..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/passes.h +++ /dev/null @@ -1,47 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -namespace Dynarmic::A32 { -struct UserCallbacks; -} - -namespace Dynarmic::A64 { -struct UserCallbacks; -struct UserConfig; -} // namespace Dynarmic::A64 - -namespace Dynarmic::IR { -class Block; -} - -namespace Dynarmic::Optimization { - -struct PolyfillOptions { - bool sha256 = false; - bool vector_multiply_widen = false; - - bool operator==(const PolyfillOptions&) const = default; -}; - -struct A32GetSetEliminationOptions { - bool convert_nzc_to_nz = false; - bool convert_nz_to_nzc = false; -}; - -void PolyfillPass(IR::Block& block, const PolyfillOptions& opt); -void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb); -void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions opt); -void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf); -void A64GetSetElimination(IR::Block& block); -void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb); -void ConstantPropagation(IR::Block& block); -void DeadCodeElimination(IR::Block& block); -void IdentityRemovalPass(IR::Block& block); -void VerificationPass(const IR::Block& block); -void NamingPass(IR::Block& block); - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp deleted file mode 100644 index 1aa3aea91e..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp +++ /dev/null @@ -1,218 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2022 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/ir_emitter.h" -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" - -namespace Dynarmic::Optimization { - -namespace { - -void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) { - const IR::U128 x = (IR::U128)inst.GetArg(0); - const IR::U128 y = (IR::U128)inst.GetArg(1); - - const IR::U128 t = ir.VectorExtract(x, y, 32); - - IR::U128 result = ir.ZeroVector(); - for (size_t i = 0; i < 4; i++) { - const IR::U32 modified_element = [&] { - const IR::U32 element = ir.VectorGetElement(32, t, i); - const IR::U32 tmp1 = ir.RotateRight(element, ir.Imm8(7)); - const IR::U32 tmp2 = ir.RotateRight(element, ir.Imm8(18)); - const IR::U32 tmp3 = ir.LogicalShiftRight(element, ir.Imm8(3)); - - return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); - }(); - - result = ir.VectorSetElement(32, result, i, modified_element); - } - result = ir.VectorAdd(32, result, x); - - inst.ReplaceUsesWith(result); -} - -void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) { - const IR::U128 x = (IR::U128)inst.GetArg(0); - const IR::U128 y = (IR::U128)inst.GetArg(1); - const IR::U128 z = (IR::U128)inst.GetArg(2); - - const IR::U128 T0 = ir.VectorExtract(y, z, 32); - - const IR::U128 lower_half = [&] { - const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64); - const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17); - const IR::U128 tmp2 = ir.VectorRotateRight(32, T, 19); - const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, T, 10); - const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); - const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, x, T0)); - return ir.VectorZeroUpper(tmp5); - }(); - - const IR::U64 upper_half = [&] { - const IR::U128 tmp1 = ir.VectorRotateRight(32, lower_half, 17); - const IR::U128 tmp2 = ir.VectorRotateRight(32, lower_half, 19); - const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, lower_half, 10); - const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); - - // Shuffle the top two 32-bit elements downwards [3, 2, 1, 0] -> [1, 0, 3, 2] - const IR::U128 shuffled_d = ir.VectorRotateWholeVectorRight(x, 64); - const IR::U128 shuffled_T0 = ir.VectorRotateWholeVectorRight(T0, 64); - - const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, shuffled_d, shuffled_T0)); - return ir.VectorGetElement(64, tmp5, 0); - }(); - - const IR::U128 result = ir.VectorSetElement(64, lower_half, 1, upper_half); - - inst.ReplaceUsesWith(result); -} - -IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { - return ir.Eor(ir.And(ir.Eor(y, z), x), z); -} - -IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { - return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z)); -} - -IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) { - const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2)); - const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13)); - const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22)); - - return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); -} - -IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) { - const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6)); - const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11)); - const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25)); - - return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); -} - -void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) { - IR::U128 x = (IR::U128)inst.GetArg(0); - IR::U128 y = (IR::U128)inst.GetArg(1); - const IR::U128 w = (IR::U128)inst.GetArg(2); - const bool part1 = inst.GetArg(3).GetU1(); - - for (size_t i = 0; i < 4; i++) { - const IR::U32 low_x = ir.VectorGetElement(32, x, 0); - const IR::U32 after_low_x = ir.VectorGetElement(32, x, 1); - const IR::U32 before_high_x = ir.VectorGetElement(32, x, 2); - const IR::U32 high_x = ir.VectorGetElement(32, x, 3); - - const IR::U32 low_y = ir.VectorGetElement(32, y, 0); - const IR::U32 after_low_y = ir.VectorGetElement(32, y, 1); - const IR::U32 before_high_y = ir.VectorGetElement(32, y, 2); - const IR::U32 high_y = ir.VectorGetElement(32, y, 3); - - const IR::U32 choice = SHAchoose(ir, low_y, after_low_y, before_high_y); - const IR::U32 majority = SHAmajority(ir, low_x, after_low_x, before_high_x); - - const IR::U32 t = [&] { - const IR::U32 w_element = ir.VectorGetElement(32, w, i); - const IR::U32 sig = SHAhashSIGMA1(ir, low_y); - - return ir.Add(high_y, ir.Add(sig, ir.Add(choice, w_element))); - }(); - - const IR::U32 new_low_x = ir.Add(t, ir.Add(SHAhashSIGMA0(ir, low_x), majority)); - const IR::U32 new_low_y = ir.Add(t, high_x); - - // Shuffle all words left by 1 element: [3, 2, 1, 0] -> [2, 1, 0, 3] - const IR::U128 shuffled_x = ir.VectorRotateWholeVectorRight(x, 96); - const IR::U128 shuffled_y = ir.VectorRotateWholeVectorRight(y, 96); - - x = ir.VectorSetElement(32, shuffled_x, 0, new_low_x); - y = ir.VectorSetElement(32, shuffled_y, 0, new_low_y); - } - - inst.ReplaceUsesWith(part1 ? x : y); -} - -template -void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) { - IR::U128 n = (IR::U128)inst.GetArg(0); - IR::U128 m = (IR::U128)inst.GetArg(1); - - const IR::U128 wide_n = is_signed ? ir.VectorSignExtend(esize, n) : ir.VectorZeroExtend(esize, n); - const IR::U128 wide_m = is_signed ? ir.VectorSignExtend(esize, m) : ir.VectorZeroExtend(esize, m); - - const IR::U128 result = ir.VectorMultiply(esize * 2, wide_n, wide_m); - - inst.ReplaceUsesWith(result); -} - -} // namespace - -void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) { - if (polyfill == PolyfillOptions{}) { - return; - } - - IR::IREmitter ir{block}; - - for (auto& inst : block) { - ir.SetInsertionPointBefore(&inst); - - switch (inst.GetOpcode()) { - case IR::Opcode::SHA256MessageSchedule0: - if (polyfill.sha256) { - PolyfillSHA256MessageSchedule0(ir, inst); - } - break; - case IR::Opcode::SHA256MessageSchedule1: - if (polyfill.sha256) { - PolyfillSHA256MessageSchedule1(ir, inst); - } - break; - case IR::Opcode::SHA256Hash: - if (polyfill.sha256) { - PolyfillSHA256Hash(ir, inst); - } - break; - case IR::Opcode::VectorMultiplySignedWiden8: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<8, true>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplySignedWiden16: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<16, true>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplySignedWiden32: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<32, true>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplyUnsignedWiden8: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<8, false>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplyUnsignedWiden16: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<16, false>(ir, inst); - } - break; - case IR::Opcode::VectorMultiplyUnsignedWiden32: - if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<32, false>(ir, inst); - } - break; - default: - break; - } - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp b/src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp deleted file mode 100644 index c6c2cff231..0000000000 --- a/src/dynarmic/src/dynarmic/ir/opt/verification_pass.cpp +++ /dev/null @@ -1,51 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include -#include - -#include "dynarmic/common/assert.h" -#include "dynarmic/common/common_types.h" -#include - -#include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/microinstruction.h" -#include "dynarmic/ir/opcodes.h" -#include "dynarmic/ir/opt/passes.h" -#include "dynarmic/ir/type.h" - -namespace Dynarmic::Optimization { - -void VerificationPass(const IR::Block& block) { - for (const auto& inst : block) { - for (size_t i = 0; i < inst.NumArgs(); i++) { - const IR::Type t1 = inst.GetArg(i).GetType(); - const IR::Type t2 = IR::GetArgTypeOf(inst.GetOpcode(), i); - if (!IR::AreTypesCompatible(t1, t2)) { - std::puts(IR::DumpBlock(block).c_str()); - ASSERT_FALSE("above block failed validation"); - } - } - } - - ankerl::unordered_dense::map actual_uses; - for (const auto& inst : block) { - for (size_t i = 0; i < inst.NumArgs(); i++) { - const auto arg = inst.GetArg(i); - if (!arg.IsImmediate()) { - actual_uses[arg.GetInst()]++; - } - } - } - - for (const auto& pair : actual_uses) { - ASSERT(pair.first->UseCount() == pair.second); - } -} - -} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp new file mode 100644 index 0000000000..383b915839 --- /dev/null +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp @@ -0,0 +1,1502 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#include +#include + +#include +#include "boost/container/small_vector.hpp" +#include "dynarmic/frontend/A32/a32_ir_emitter.h" +#include "dynarmic/frontend/A32/a32_location_descriptor.h" +#include "dynarmic/frontend/A32/a32_types.h" +#include "dynarmic/frontend/A64/a64_ir_emitter.h" +#include "dynarmic/frontend/A64/a64_location_descriptor.h" +#include "dynarmic/frontend/A64/translate/a64_translate.h" +#include "dynarmic/interface/A32/config.h" +#include "dynarmic/interface/A64/config.h" +#include "dynarmic/interface/optimization_flags.h" +#include "dynarmic/common/safe_ops.h" +#include "dynarmic/ir/basic_block.h" +#include "dynarmic/ir/microinstruction.h" +#include "dynarmic/ir/opcodes.h" +#include "dynarmic/ir/opt_passes.h" +#include "dynarmic/ir/type.h" +#include "mcl/bit/swap.hpp" +#include "mcl/bit/rotate.hpp" +#include "mcl/iterator/reverse.hpp" + +namespace Dynarmic::Optimization { + +static void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { + for (auto& inst : block) { + switch (inst.GetOpcode()) { + case IR::Opcode::A32ReadMemory8: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u8 value_from_memory = cb->MemoryRead8(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + case IR::Opcode::A32ReadMemory16: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u16 value_from_memory = cb->MemoryRead16(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + case IR::Opcode::A32ReadMemory32: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u32 value_from_memory = cb->MemoryRead32(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + case IR::Opcode::A32ReadMemory64: { + if (!inst.AreAllArgsImmediates()) { + break; + } + + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u64 value_from_memory = cb->MemoryRead64(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } + break; + } + default: + break; + } + } +} + +static void FlagsPass(IR::Block& block) { + using Iterator = std::reverse_iterator; + + struct FlagInfo { + bool set_not_required = false; + bool has_value_request = false; + Iterator value_request = {}; + }; + struct ValuelessFlagInfo { + bool set_not_required = false; + }; + ValuelessFlagInfo nzcvq; + ValuelessFlagInfo nzcv; + ValuelessFlagInfo nz; + FlagInfo c_flag; + FlagInfo ge; + + auto do_set = [&](FlagInfo& info, IR::Value value, Iterator inst) { + if (info.has_value_request) { + info.value_request->ReplaceUsesWith(value); + } + info.has_value_request = false; + + if (info.set_not_required) { + inst->Invalidate(); + } + info.set_not_required = true; + }; + + auto do_set_valueless = [&](ValuelessFlagInfo& info, Iterator inst) { + if (info.set_not_required) { + inst->Invalidate(); + } + info.set_not_required = true; + }; + + auto do_get = [](FlagInfo& info, Iterator inst) { + if (info.has_value_request) { + info.value_request->ReplaceUsesWith(IR::Value{&*inst}); + } + info.has_value_request = true; + info.value_request = inst; + }; + + A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; + + for (auto inst = block.rbegin(); inst != block.rend(); ++inst) { + auto const opcode = inst->GetOpcode(); + switch (opcode) { + case IR::Opcode::A32GetCFlag: { + do_get(c_flag, inst); + break; + } + case IR::Opcode::A32SetCpsrNZCV: { + if (c_flag.has_value_request) { + ir.SetInsertionPointBefore(inst.base()); // base is one ahead + IR::U1 c = ir.GetCFlagFromNZCV(IR::NZCV{inst->GetArg(0)}); + c_flag.value_request->ReplaceUsesWith(c); + c_flag.has_value_request = false; + break; // This case will be executed again because of the above + } + + do_set_valueless(nzcv, inst); + + nz = {.set_not_required = true}; + c_flag = {.set_not_required = true}; + break; + } + case IR::Opcode::A32SetCpsrNZCVRaw: { + if (c_flag.has_value_request) { + nzcv.set_not_required = false; + } + + do_set_valueless(nzcv, inst); + + nzcvq = {}; + nz = {.set_not_required = true}; + c_flag = {.set_not_required = true}; + break; + } + case IR::Opcode::A32SetCpsrNZCVQ: { + if (c_flag.has_value_request) { + nzcvq.set_not_required = false; + } + + do_set_valueless(nzcvq, inst); + + nzcv = {.set_not_required = true}; + nz = {.set_not_required = true}; + c_flag = {.set_not_required = true}; + break; + } + case IR::Opcode::A32SetCpsrNZ: { + do_set_valueless(nz, inst); + + nzcvq = {}; + nzcv = {}; + break; + } + case IR::Opcode::A32SetCpsrNZC: { + if (c_flag.has_value_request) { + c_flag.value_request->ReplaceUsesWith(inst->GetArg(1)); + c_flag.has_value_request = false; + } + + if (!inst->GetArg(1).IsImmediate() && inst->GetArg(1).GetInstRecursive()->GetOpcode() == IR::Opcode::A32GetCFlag) { + const auto nz_value = inst->GetArg(0); + + inst->Invalidate(); + + ir.SetInsertionPointBefore(inst.base()); + ir.SetCpsrNZ(IR::NZCV{nz_value}); + + nzcvq = {}; + nzcv = {}; + nz = {.set_not_required = true}; + break; + } + + if (nz.set_not_required && c_flag.set_not_required) { + inst->Invalidate(); + } else if (nz.set_not_required) { + inst->SetArg(0, IR::Value::EmptyNZCVImmediateMarker()); + } + nz.set_not_required = true; + c_flag.set_not_required = true; + + nzcv = {}; + nzcvq = {}; + break; + } + case IR::Opcode::A32SetGEFlags: { + do_set(ge, inst->GetArg(0), inst); + break; + } + case IR::Opcode::A32GetGEFlags: { + do_get(ge, inst); + break; + } + case IR::Opcode::A32SetGEFlagsCompressed: { + ge = {.set_not_required = true}; + break; + } + case IR::Opcode::A32OrQFlag: { + break; + } + default: { + if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { + nzcvq = {}; + nzcv = {}; + nz = {}; + c_flag = {}; + ge = {}; + } + break; + } + } + } +} + +static void RegisterPass(IR::Block& block) { + using Iterator = IR::Block::iterator; + + struct RegInfo { + IR::Value register_value; + std::optional last_set_instruction; + }; + std::array reg_info; + + const auto do_get = [](RegInfo& info, Iterator get_inst) { + if (info.register_value.IsEmpty()) { + info.register_value = IR::Value(&*get_inst); + return; + } + get_inst->ReplaceUsesWith(info.register_value); + }; + + const auto do_set = [](RegInfo& info, IR::Value value, Iterator set_inst) { + if (info.last_set_instruction) { + (*info.last_set_instruction)->Invalidate(); + } + info = { + .register_value = value, + .last_set_instruction = set_inst, + }; + }; + + enum class ExtValueType { + Empty, + Single, + Double, + VectorDouble, + VectorQuad, + }; + struct ExtRegInfo { + ExtValueType value_type = {}; + IR::Value register_value; + std::optional last_set_instruction; + }; + std::array ext_reg_info; + + const auto do_ext_get = [](ExtValueType type, std::initializer_list> infos, Iterator get_inst) { + if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { + for (auto& info : infos) { + info.get() = { + .value_type = type, + .register_value = IR::Value(&*get_inst), + .last_set_instruction = std::nullopt, + }; + } + return; + } + get_inst->ReplaceUsesWith(std::data(infos)[0].get().register_value); + }; + + const auto do_ext_set = [](ExtValueType type, std::initializer_list> infos, IR::Value value, Iterator set_inst) { + if (std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { + if (std::data(infos)[0].get().last_set_instruction) { + (*std::data(infos)[0].get().last_set_instruction)->Invalidate(); + } + } + for (auto& info : infos) { + info.get() = { + .value_type = type, + .register_value = value, + .last_set_instruction = set_inst, + }; + } + }; + + // Location and version don't matter here. + A32::IREmitter ir{block, A32::LocationDescriptor{block.Location()}, {}}; + + for (auto inst = block.begin(); inst != block.end(); ++inst) { + auto const opcode = inst->GetOpcode(); + switch (opcode) { + case IR::Opcode::A32GetRegister: { + const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); + ASSERT(reg != A32::Reg::PC); + const size_t reg_index = size_t(reg); + do_get(reg_info[reg_index], inst); + break; + } + case IR::Opcode::A32SetRegister: { + const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); + if (reg == A32::Reg::PC) { + break; + } + const auto reg_index = size_t(reg); + do_set(reg_info[reg_index], inst->GetArg(1), inst); + break; + } + case IR::Opcode::A32GetExtendedRegister32: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_get(ExtValueType::Single, {ext_reg_info[reg_index]}, inst); + break; + } + case IR::Opcode::A32SetExtendedRegister32: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_set(ExtValueType::Single, {ext_reg_info[reg_index]}, inst->GetArg(1), inst); + break; + } + case IR::Opcode::A32GetExtendedRegister64: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_get(ExtValueType::Double, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + inst); + break; + } + case IR::Opcode::A32SetExtendedRegister64: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + do_ext_set(ExtValueType::Double, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + inst->GetArg(1), + inst); + break; + } + case IR::Opcode::A32GetVector: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + if (A32::IsDoubleExtReg(reg)) { + do_ext_get(ExtValueType::VectorDouble, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + inst); + } else { + DEBUG_ASSERT(A32::IsQuadExtReg(reg)); + do_ext_get(ExtValueType::VectorQuad, + { + ext_reg_info[reg_index * 4 + 0], + ext_reg_info[reg_index * 4 + 1], + ext_reg_info[reg_index * 4 + 2], + ext_reg_info[reg_index * 4 + 3], + }, + inst); + } + break; + } + case IR::Opcode::A32SetVector: { + const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); + const size_t reg_index = A32::RegNumber(reg); + if (A32::IsDoubleExtReg(reg)) { + ir.SetInsertionPointAfter(inst); + const IR::U128 stored_value = ir.VectorZeroUpper(IR::U128{inst->GetArg(1)}); + do_ext_set(ExtValueType::VectorDouble, + { + ext_reg_info[reg_index * 2 + 0], + ext_reg_info[reg_index * 2 + 1], + }, + stored_value, + inst); + } else { + DEBUG_ASSERT(A32::IsQuadExtReg(reg)); + do_ext_set(ExtValueType::VectorQuad, + { + ext_reg_info[reg_index * 4 + 0], + ext_reg_info[reg_index * 4 + 1], + ext_reg_info[reg_index * 4 + 2], + ext_reg_info[reg_index * 4 + 3], + }, + inst->GetArg(1), + inst); + } + break; + } + default: { + if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { + reg_info = {}; + ext_reg_info = {}; + } + break; + } + } + } +} + +struct A32GetSetEliminationOptions { + bool convert_nzc_to_nz = false; + bool convert_nz_to_nzc = false; +}; + +static void A32GetSetElimination(IR::Block& block, A32GetSetEliminationOptions) { + FlagsPass(block); + RegisterPass(block); +} + +static void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) { + if (conf.hook_data_cache_operations) { + return; + } + + for (auto& inst : block) { + if (inst.GetOpcode() != IR::Opcode::A64DataCacheOperationRaised) { + continue; + } + + const auto op = static_cast(inst.GetArg(1).GetU64()); + if (op == A64::DataCacheOperation::ZeroByVA) { + A64::IREmitter ir{block}; + ir.current_location = A64::LocationDescriptor{IR::LocationDescriptor{inst.GetArg(0).GetU64()}}; + ir.SetInsertionPointBefore(&inst); + + size_t bytes = 4 << static_cast(conf.dczid_el0 & 0b1111); + IR::U64 addr{inst.GetArg(2)}; + + const IR::U128 zero_u128 = ir.ZeroExtendToQuad(ir.Imm64(0)); + while (bytes >= 16) { + ir.WriteMemory128(addr, zero_u128, IR::AccType::DCZVA); + addr = ir.Add(addr, ir.Imm64(16)); + bytes -= 16; + } + + while (bytes >= 8) { + ir.WriteMemory64(addr, ir.Imm64(0), IR::AccType::DCZVA); + addr = ir.Add(addr, ir.Imm64(8)); + bytes -= 8; + } + + while (bytes >= 4) { + ir.WriteMemory32(addr, ir.Imm32(0), IR::AccType::DCZVA); + addr = ir.Add(addr, ir.Imm64(4)); + bytes -= 4; + } + } + inst.Invalidate(); + } +} + +static void A64GetSetElimination(IR::Block& block) { + using Iterator = IR::Block::iterator; + + enum class TrackingType { + W, + X, + S, + D, + Q, + SP, + NZCV, + NZCVRaw, + }; + struct RegisterInfo { + IR::Value register_value; + TrackingType tracking_type; + bool set_instruction_present = false; + Iterator last_set_instruction; + }; + std::array reg_info; + std::array vec_info; + RegisterInfo sp_info; + RegisterInfo nzcv_info; + + const auto do_set = [&block](RegisterInfo& info, IR::Value value, Iterator set_inst, TrackingType tracking_type) { + if (info.set_instruction_present) { + info.last_set_instruction->Invalidate(); + block.Instructions().erase(info.last_set_instruction); + } + + info.register_value = value; + info.tracking_type = tracking_type; + info.set_instruction_present = true; + info.last_set_instruction = set_inst; + }; + + const auto do_get = [](RegisterInfo& info, Iterator get_inst, TrackingType tracking_type) { + const auto do_nothing = [&] { + info = {}; + info.register_value = IR::Value(&*get_inst); + info.tracking_type = tracking_type; + }; + + if (info.register_value.IsEmpty()) { + do_nothing(); + return; + } + + if (info.tracking_type == tracking_type) { + get_inst->ReplaceUsesWith(info.register_value); + return; + } + + do_nothing(); + }; + + for (auto inst = block.begin(); inst != block.end(); ++inst) { + auto const opcode = inst->GetOpcode(); + switch (opcode) { + case IR::Opcode::A64GetW: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_get(reg_info.at(index), inst, TrackingType::W); + break; + } + case IR::Opcode::A64GetX: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_get(reg_info.at(index), inst, TrackingType::X); + break; + } + case IR::Opcode::A64GetS: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_get(vec_info.at(index), inst, TrackingType::S); + break; + } + case IR::Opcode::A64GetD: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_get(vec_info.at(index), inst, TrackingType::D); + break; + } + case IR::Opcode::A64GetQ: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_get(vec_info.at(index), inst, TrackingType::Q); + break; + } + case IR::Opcode::A64GetSP: { + do_get(sp_info, inst, TrackingType::SP); + break; + } + case IR::Opcode::A64GetNZCVRaw: { + do_get(nzcv_info, inst, TrackingType::NZCVRaw); + break; + } + case IR::Opcode::A64SetW: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::W); + break; + } + case IR::Opcode::A64SetX: { + const size_t index = A64::RegNumber(inst->GetArg(0).GetA64RegRef()); + do_set(reg_info.at(index), inst->GetArg(1), inst, TrackingType::X); + break; + } + case IR::Opcode::A64SetS: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::S); + break; + } + case IR::Opcode::A64SetD: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::D); + break; + } + case IR::Opcode::A64SetQ: { + const size_t index = A64::VecNumber(inst->GetArg(0).GetA64VecRef()); + do_set(vec_info.at(index), inst->GetArg(1), inst, TrackingType::Q); + break; + } + case IR::Opcode::A64SetSP: { + do_set(sp_info, inst->GetArg(0), inst, TrackingType::SP); + break; + } + case IR::Opcode::A64SetNZCV: { + do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCV); + break; + } + case IR::Opcode::A64SetNZCVRaw: { + do_set(nzcv_info, inst->GetArg(0), inst, TrackingType::NZCVRaw); + break; + } + default: { + if (ReadsFromCPSR(opcode) || WritesToCPSR(opcode)) { + nzcv_info = {}; + } + if (ReadsFromCoreRegister(opcode) || WritesToCoreRegister(opcode)) { + reg_info = {}; + vec_info = {}; + sp_info = {}; + } + break; + } + } + } +} + +static void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb) { + const auto is_interpret_instruction = [cb](A64::LocationDescriptor location) { + const auto instruction = cb->MemoryReadCode(location.PC()); + if (!instruction) + return false; + + IR::Block new_block{location}; + A64::TranslateSingleInstruction(new_block, location, *instruction); + + if (!new_block.Instructions().empty()) + return false; + + const IR::Terminal terminal = new_block.GetTerminal(); + if (auto term = boost::get(&terminal)) { + return term->next == location; + } + + return false; + }; + + IR::Terminal terminal = block.GetTerminal(); + auto term = boost::get(&terminal); + if (!term) + return; + + A64::LocationDescriptor location{term->next}; + size_t num_instructions = 1; + + while (is_interpret_instruction(location.AdvancePC(static_cast(num_instructions * 4)))) { + num_instructions++; + } + + term->num_instructions = num_instructions; + block.ReplaceTerminal(terminal); + block.CycleCount() += num_instructions - 1; +} + +using Op = Dynarmic::IR::Opcode; + +// Tiny helper to avoid the need to store based off the opcode +// bit size all over the place within folding functions. +static void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) { + if (is_32_bit) { + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); + } else { + inst.ReplaceUsesWith(IR::Value{value}); + } +} + +static IR::Value Value(bool is_32_bit, u64 value) { + return is_32_bit ? IR::Value{static_cast(value)} : IR::Value{value}; +} + +template +static bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) { + const auto lhs = inst.GetArg(0); + const auto rhs = inst.GetArg(1); + + const bool is_lhs_immediate = lhs.IsImmediate(); + const bool is_rhs_immediate = rhs.IsImmediate(); + + if (is_lhs_immediate && is_rhs_immediate) { + const u64 result = imm_fn(lhs.GetImmediateAsU64(), rhs.GetImmediateAsU64()); + ReplaceUsesWith(inst, is_32_bit, result); + return false; + } + + if (is_lhs_immediate && !is_rhs_immediate) { + const IR::Inst* rhs_inst = rhs.GetInstRecursive(); + if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->GetArg(1).IsImmediate()) { + const u64 combined = imm_fn(lhs.GetImmediateAsU64(), rhs_inst->GetArg(1).GetImmediateAsU64()); + inst.SetArg(0, rhs_inst->GetArg(0)); + inst.SetArg(1, Value(is_32_bit, combined)); + } else { + // Normalize + inst.SetArg(0, rhs); + inst.SetArg(1, lhs); + } + } + + if (!is_lhs_immediate && is_rhs_immediate) { + const IR::Inst* lhs_inst = lhs.GetInstRecursive(); + if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate()) { + const u64 combined = imm_fn(rhs.GetImmediateAsU64(), lhs_inst->GetArg(1).GetImmediateAsU64()); + inst.SetArg(0, lhs_inst->GetArg(0)); + inst.SetArg(1, Value(is_32_bit, combined)); + } + } + + return true; +} + +static void FoldAdd(IR::Inst& inst, bool is_32_bit) { + const auto lhs = inst.GetArg(0); + const auto rhs = inst.GetArg(1); + const auto carry = inst.GetArg(2); + + if (lhs.IsImmediate() && !rhs.IsImmediate()) { + // Normalize + inst.SetArg(0, rhs); + inst.SetArg(1, lhs); + FoldAdd(inst, is_32_bit); + return; + } + + if (inst.HasAssociatedPseudoOperation()) { + return; + } + + if (!lhs.IsImmediate() && rhs.IsImmediate()) { + const IR::Inst* lhs_inst = lhs.GetInstRecursive(); + if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->GetArg(1).IsImmediate() && lhs_inst->GetArg(2).IsImmediate()) { + const u64 combined = rhs.GetImmediateAsU64() + lhs_inst->GetArg(1).GetImmediateAsU64() + lhs_inst->GetArg(2).GetU1(); + if (combined == 0) { + inst.ReplaceUsesWith(lhs_inst->GetArg(0)); + return; + } + inst.SetArg(0, lhs_inst->GetArg(0)); + inst.SetArg(1, Value(is_32_bit, combined)); + return; + } + if (rhs.IsZero() && carry.IsZero()) { + inst.ReplaceUsesWith(lhs); + return; + } + } + + if (inst.AreAllArgsImmediates()) { + const u64 result = lhs.GetImmediateAsU64() + rhs.GetImmediateAsU64() + carry.GetU1(); + ReplaceUsesWith(inst, is_32_bit, result); + return; + } +} + +/// Folds AND operations based on the following: +/// +/// 1. imm_x & imm_y -> result +/// 2. x & 0 -> 0 +/// 3. 0 & y -> 0 +/// 4. x & y -> y (where x has all bits set to 1) +/// 5. x & y -> x (where y has all bits set to 1) +/// +static void FoldAND(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a & b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + ReplaceUsesWith(inst, is_32_bit, 0); + } else if (rhs.HasAllBitsSet()) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +/// Folds byte reversal opcodes based on the following: +/// +/// 1. imm -> swap(imm) +/// +static void FoldByteReverse(IR::Inst& inst, Op op) { + const auto operand = inst.GetArg(0); + + if (!operand.IsImmediate()) { + return; + } + + if (op == Op::ByteReverseWord) { + const u32 result = mcl::bit::swap_bytes_32(u32(operand.GetImmediateAsU64())); + inst.ReplaceUsesWith(IR::Value{result}); + } else if (op == Op::ByteReverseHalf) { + const u16 result = mcl::bit::swap_bytes_16(u16(operand.GetImmediateAsU64())); + inst.ReplaceUsesWith(IR::Value{result}); + } else { + const u64 result = mcl::bit::swap_bytes_64(operand.GetImmediateAsU64()); + inst.ReplaceUsesWith(IR::Value{result}); + } +} + +/// Folds division operations based on the following: +/// +/// 1. x / 0 -> 0 (NOTE: This is an ARM-specific behavior defined in the architecture reference manual) +/// 2. imm_x / imm_y -> result +/// 3. x / 1 -> x +/// +static void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) { + const auto rhs = inst.GetArg(1); + + if (rhs.IsZero()) { + ReplaceUsesWith(inst, is_32_bit, 0); + return; + } + + const auto lhs = inst.GetArg(0); + if (lhs.IsImmediate() && rhs.IsImmediate()) { + if (is_signed) { + const s64 result = lhs.GetImmediateAsS64() / rhs.GetImmediateAsS64(); + ReplaceUsesWith(inst, is_32_bit, static_cast(result)); + } else { + const u64 result = lhs.GetImmediateAsU64() / rhs.GetImmediateAsU64(); + ReplaceUsesWith(inst, is_32_bit, result); + } + } else if (rhs.IsUnsignedImmediate(1)) { + inst.ReplaceUsesWith(IR::Value{lhs}); + } +} + +// Folds EOR operations based on the following: +// +// 1. imm_x ^ imm_y -> result +// 2. x ^ 0 -> x +// 3. 0 ^ y -> y +// +static void FoldEOR(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a ^ b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +static void FoldLeastSignificantByte(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); +} + +static void FoldLeastSignificantHalf(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); +} + +static void FoldLeastSignificantWord(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); +} + +static void FoldMostSignificantBit(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + inst.ReplaceUsesWith(IR::Value{(operand.GetImmediateAsU64() >> 31) != 0}); +} + +static void FoldMostSignificantWord(IR::Inst& inst) { + IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); + + if (!inst.AreAllArgsImmediates()) { + return; + } + + const auto operand = inst.GetArg(0); + if (carry_inst) { + carry_inst->ReplaceUsesWith(IR::Value{mcl::bit::get_bit<31>(operand.GetImmediateAsU64())}); + } + inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64() >> 32)}); +} + +// Folds multiplication operations based on the following: +// +// 1. imm_x * imm_y -> result +// 2. x * 0 -> 0 +// 3. 0 * y -> 0 +// 4. x * 1 -> x +// 5. 1 * y -> y +// +static void FoldMultiply(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a * b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + ReplaceUsesWith(inst, is_32_bit, 0); + } else if (rhs.IsUnsignedImmediate(1)) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +// Folds NOT operations if the contained value is an immediate. +static void FoldNOT(IR::Inst& inst, bool is_32_bit) { + const auto operand = inst.GetArg(0); + + if (!operand.IsImmediate()) { + return; + } + + const u64 result = ~operand.GetImmediateAsU64(); + ReplaceUsesWith(inst, is_32_bit, result); +} + +// Folds OR operations based on the following: +// +// 1. imm_x | imm_y -> result +// 2. x | 0 -> x +// 3. 0 | y -> y +// +static void FoldOR(IR::Inst& inst, bool is_32_bit) { + if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a | b; })) { + const auto rhs = inst.GetArg(1); + if (rhs.IsZero()) { + inst.ReplaceUsesWith(inst.GetArg(0)); + } + } +} + +static bool FoldShifts(IR::Inst& inst) { + IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); + + // The 32-bit variants can contain 3 arguments, while the + // 64-bit variants only contain 2. + if (inst.NumArgs() == 3 && !carry_inst) { + inst.SetArg(2, IR::Value(false)); + } + + const auto shift_amount = inst.GetArg(1); + + if (shift_amount.IsZero()) { + if (carry_inst) { + carry_inst->ReplaceUsesWith(inst.GetArg(2)); + } + inst.ReplaceUsesWith(inst.GetArg(0)); + return false; + } + + if (inst.NumArgs() == 3 && shift_amount.IsImmediate() && !shift_amount.IsZero()) { + inst.SetArg(2, IR::Value(false)); + } + + if (!inst.AreAllArgsImmediates() || carry_inst) { + return false; + } + + return true; +} + +static void FoldSignExtendXToWord(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const s64 value = inst.GetArg(0).GetImmediateAsS64(); + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); +} + +static void FoldSignExtendXToLong(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const s64 value = inst.GetArg(0).GetImmediateAsS64(); + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); +} + +static void FoldSub(IR::Inst& inst, bool is_32_bit) { + if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { + return; + } + + const auto lhs = inst.GetArg(0); + const auto rhs = inst.GetArg(1); + const auto carry = inst.GetArg(2); + + const u64 result = lhs.GetImmediateAsU64() + (~rhs.GetImmediateAsU64()) + carry.GetU1(); + ReplaceUsesWith(inst, is_32_bit, result); +} + +static void FoldZeroExtendXToWord(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const u64 value = inst.GetArg(0).GetImmediateAsU64(); + inst.ReplaceUsesWith(IR::Value{static_cast(value)}); +} + +static void FoldZeroExtendXToLong(IR::Inst& inst) { + if (!inst.AreAllArgsImmediates()) { + return; + } + + const u64 value = inst.GetArg(0).GetImmediateAsU64(); + inst.ReplaceUsesWith(IR::Value{value}); +} + +static void ConstantPropagation(IR::Block& block) { + for (auto& inst : block) { + const auto opcode = inst.GetOpcode(); + + switch (opcode) { + case Op::LeastSignificantWord: + FoldLeastSignificantWord(inst); + break; + case Op::MostSignificantWord: + FoldMostSignificantWord(inst); + break; + case Op::LeastSignificantHalf: + FoldLeastSignificantHalf(inst); + break; + case Op::LeastSignificantByte: + FoldLeastSignificantByte(inst); + break; + case Op::MostSignificantBit: + FoldMostSignificantBit(inst); + break; + case Op::IsZero32: + if (inst.AreAllArgsImmediates()) { + inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU32() == 0}); + } + break; + case Op::IsZero64: + if (inst.AreAllArgsImmediates()) { + inst.ReplaceUsesWith(IR::Value{inst.GetArg(0).GetU64() == 0}); + } + break; + case Op::LogicalShiftLeft32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, Safe::LogicalShiftLeft(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftLeft64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, Safe::LogicalShiftLeft(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftRight32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, Safe::LogicalShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftRight64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, Safe::LogicalShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::ArithmeticShiftRight32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::ArithmeticShiftRight64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, Safe::ArithmeticShiftRight(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::RotateRight32: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU8())); + } + break; + case Op::RotateRight64: + if (FoldShifts(inst)) { + ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU8())); + } + break; + case Op::LogicalShiftLeftMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() << (inst.GetArg(1).GetU32() & 0x1f)); + } + break; + case Op::LogicalShiftLeftMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() << (inst.GetArg(1).GetU64() & 0x3f)); + } + break; + case Op::LogicalShiftRightMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, inst.GetArg(0).GetU32() >> (inst.GetArg(1).GetU32() & 0x1f)); + } + break; + case Op::LogicalShiftRightMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, inst.GetArg(0).GetU64() >> (inst.GetArg(1).GetU64() & 0x3f)); + } + break; + case Op::ArithmeticShiftRightMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, static_cast(inst.GetArg(0).GetU32()) >> (inst.GetArg(1).GetU32() & 0x1f)); + } + break; + case Op::ArithmeticShiftRightMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, static_cast(inst.GetArg(0).GetU64()) >> (inst.GetArg(1).GetU64() & 0x3f)); + } + break; + case Op::RotateRightMasked32: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, true, mcl::bit::rotate_right(inst.GetArg(0).GetU32(), inst.GetArg(1).GetU32())); + } + break; + case Op::RotateRightMasked64: + if (inst.AreAllArgsImmediates()) { + ReplaceUsesWith(inst, false, mcl::bit::rotate_right(inst.GetArg(0).GetU64(), inst.GetArg(1).GetU64())); + } + break; + case Op::Add32: + case Op::Add64: + FoldAdd(inst, opcode == Op::Add32); + break; + case Op::Sub32: + case Op::Sub64: + FoldSub(inst, opcode == Op::Sub32); + break; + case Op::Mul32: + case Op::Mul64: + FoldMultiply(inst, opcode == Op::Mul32); + break; + case Op::SignedDiv32: + case Op::SignedDiv64: + FoldDivide(inst, opcode == Op::SignedDiv32, true); + break; + case Op::UnsignedDiv32: + case Op::UnsignedDiv64: + FoldDivide(inst, opcode == Op::UnsignedDiv32, false); + break; + case Op::And32: + case Op::And64: + FoldAND(inst, opcode == Op::And32); + break; + case Op::Eor32: + case Op::Eor64: + FoldEOR(inst, opcode == Op::Eor32); + break; + case Op::Or32: + case Op::Or64: + FoldOR(inst, opcode == Op::Or32); + break; + case Op::Not32: + case Op::Not64: + FoldNOT(inst, opcode == Op::Not32); + break; + case Op::SignExtendByteToWord: + case Op::SignExtendHalfToWord: + FoldSignExtendXToWord(inst); + break; + case Op::SignExtendByteToLong: + case Op::SignExtendHalfToLong: + case Op::SignExtendWordToLong: + FoldSignExtendXToLong(inst); + break; + case Op::ZeroExtendByteToWord: + case Op::ZeroExtendHalfToWord: + FoldZeroExtendXToWord(inst); + break; + case Op::ZeroExtendByteToLong: + case Op::ZeroExtendHalfToLong: + case Op::ZeroExtendWordToLong: + FoldZeroExtendXToLong(inst); + break; + case Op::ByteReverseWord: + case Op::ByteReverseHalf: + case Op::ByteReverseDual: + FoldByteReverse(inst, opcode); + break; + default: + break; + } + } +} + +static void DeadCodeElimination(IR::Block& block) { + // We iterate over the instructions in reverse order. + // This is because removing an instruction reduces the number of uses for earlier instructions. + for (auto& inst : mcl::iterator::reverse(block)) { + if (!inst.HasUses() && !MayHaveSideEffects(inst.GetOpcode())) { + inst.Invalidate(); + } + } +} + +static void IdentityRemovalPass(IR::Block& block) { + boost::container::small_vector to_invalidate; + + auto iter = block.begin(); + while (iter != block.end()) { + IR::Inst& inst = *iter; + + const size_t num_args = inst.NumArgs(); + for (size_t i = 0; i < num_args; i++) { + while (true) { + IR::Value arg = inst.GetArg(i); + if (!arg.IsIdentity()) + break; + inst.SetArg(i, arg.GetInst()->GetArg(0)); + } + } + + if (inst.GetOpcode() == IR::Opcode::Identity || inst.GetOpcode() == IR::Opcode::Void) { + iter = block.Instructions().erase(inst); + to_invalidate.push_back(&inst); + } else { + ++iter; + } + } + for (IR::Inst* inst : to_invalidate) { + inst->Invalidate(); + } +} + +static void NamingPass(IR::Block& block) { + u32 name = 1; + for (auto& inst : block) + inst.SetName(name++); +} + +static void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) { + const IR::U128 x = (IR::U128)inst.GetArg(0); + const IR::U128 y = (IR::U128)inst.GetArg(1); + + const IR::U128 t = ir.VectorExtract(x, y, 32); + + IR::U128 result = ir.ZeroVector(); + for (size_t i = 0; i < 4; i++) { + const IR::U32 modified_element = [&] { + const IR::U32 element = ir.VectorGetElement(32, t, i); + const IR::U32 tmp1 = ir.RotateRight(element, ir.Imm8(7)); + const IR::U32 tmp2 = ir.RotateRight(element, ir.Imm8(18)); + const IR::U32 tmp3 = ir.LogicalShiftRight(element, ir.Imm8(3)); + + return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); + }(); + + result = ir.VectorSetElement(32, result, i, modified_element); + } + result = ir.VectorAdd(32, result, x); + + inst.ReplaceUsesWith(result); +} + +static void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) { + const IR::U128 x = (IR::U128)inst.GetArg(0); + const IR::U128 y = (IR::U128)inst.GetArg(1); + const IR::U128 z = (IR::U128)inst.GetArg(2); + + const IR::U128 T0 = ir.VectorExtract(y, z, 32); + + const IR::U128 lower_half = [&] { + const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64); + const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17); + const IR::U128 tmp2 = ir.VectorRotateRight(32, T, 19); + const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, T, 10); + const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); + const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, x, T0)); + return ir.VectorZeroUpper(tmp5); + }(); + + const IR::U64 upper_half = [&] { + const IR::U128 tmp1 = ir.VectorRotateRight(32, lower_half, 17); + const IR::U128 tmp2 = ir.VectorRotateRight(32, lower_half, 19); + const IR::U128 tmp3 = ir.VectorLogicalShiftRight(32, lower_half, 10); + const IR::U128 tmp4 = ir.VectorEor(tmp1, ir.VectorEor(tmp2, tmp3)); + + // Shuffle the top two 32-bit elements downwards [3, 2, 1, 0] -> [1, 0, 3, 2] + const IR::U128 shuffled_d = ir.VectorRotateWholeVectorRight(x, 64); + const IR::U128 shuffled_T0 = ir.VectorRotateWholeVectorRight(T0, 64); + + const IR::U128 tmp5 = ir.VectorAdd(32, tmp4, ir.VectorAdd(32, shuffled_d, shuffled_T0)); + return ir.VectorGetElement(64, tmp5, 0); + }(); + + const IR::U128 result = ir.VectorSetElement(64, lower_half, 1, upper_half); + + inst.ReplaceUsesWith(result); +} + +static IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { + return ir.Eor(ir.And(ir.Eor(y, z), x), z); +} + +static IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { + return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z)); +} + +static IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) { + const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2)); + const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13)); + const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22)); + + return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); +} + +static IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) { + const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6)); + const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11)); + const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25)); + + return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); +} + +static void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) { + IR::U128 x = (IR::U128)inst.GetArg(0); + IR::U128 y = (IR::U128)inst.GetArg(1); + const IR::U128 w = (IR::U128)inst.GetArg(2); + const bool part1 = inst.GetArg(3).GetU1(); + + for (size_t i = 0; i < 4; i++) { + const IR::U32 low_x = ir.VectorGetElement(32, x, 0); + const IR::U32 after_low_x = ir.VectorGetElement(32, x, 1); + const IR::U32 before_high_x = ir.VectorGetElement(32, x, 2); + const IR::U32 high_x = ir.VectorGetElement(32, x, 3); + + const IR::U32 low_y = ir.VectorGetElement(32, y, 0); + const IR::U32 after_low_y = ir.VectorGetElement(32, y, 1); + const IR::U32 before_high_y = ir.VectorGetElement(32, y, 2); + const IR::U32 high_y = ir.VectorGetElement(32, y, 3); + + const IR::U32 choice = SHAchoose(ir, low_y, after_low_y, before_high_y); + const IR::U32 majority = SHAmajority(ir, low_x, after_low_x, before_high_x); + + const IR::U32 t = [&] { + const IR::U32 w_element = ir.VectorGetElement(32, w, i); + const IR::U32 sig = SHAhashSIGMA1(ir, low_y); + + return ir.Add(high_y, ir.Add(sig, ir.Add(choice, w_element))); + }(); + + const IR::U32 new_low_x = ir.Add(t, ir.Add(SHAhashSIGMA0(ir, low_x), majority)); + const IR::U32 new_low_y = ir.Add(t, high_x); + + // Shuffle all words left by 1 element: [3, 2, 1, 0] -> [2, 1, 0, 3] + const IR::U128 shuffled_x = ir.VectorRotateWholeVectorRight(x, 96); + const IR::U128 shuffled_y = ir.VectorRotateWholeVectorRight(y, 96); + + x = ir.VectorSetElement(32, shuffled_x, 0, new_low_x); + y = ir.VectorSetElement(32, shuffled_y, 0, new_low_y); + } + + inst.ReplaceUsesWith(part1 ? x : y); +} + +template +static void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) { + IR::U128 n = (IR::U128)inst.GetArg(0); + IR::U128 m = (IR::U128)inst.GetArg(1); + + const IR::U128 wide_n = is_signed ? ir.VectorSignExtend(esize, n) : ir.VectorZeroExtend(esize, n); + const IR::U128 wide_m = is_signed ? ir.VectorSignExtend(esize, m) : ir.VectorZeroExtend(esize, m); + + const IR::U128 result = ir.VectorMultiply(esize * 2, wide_n, wide_m); + + inst.ReplaceUsesWith(result); +} + +static void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) { + if (polyfill == PolyfillOptions{}) { + return; + } + + IR::IREmitter ir{block}; + + for (auto& inst : block) { + ir.SetInsertionPointBefore(&inst); + + switch (inst.GetOpcode()) { + case IR::Opcode::SHA256MessageSchedule0: + if (polyfill.sha256) { + PolyfillSHA256MessageSchedule0(ir, inst); + } + break; + case IR::Opcode::SHA256MessageSchedule1: + if (polyfill.sha256) { + PolyfillSHA256MessageSchedule1(ir, inst); + } + break; + case IR::Opcode::SHA256Hash: + if (polyfill.sha256) { + PolyfillSHA256Hash(ir, inst); + } + break; + case IR::Opcode::VectorMultiplySignedWiden8: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<8, true>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplySignedWiden16: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<16, true>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplySignedWiden32: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<32, true>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplyUnsignedWiden8: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<8, false>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplyUnsignedWiden16: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<16, false>(ir, inst); + } + break; + case IR::Opcode::VectorMultiplyUnsignedWiden32: + if (polyfill.vector_multiply_widen) { + PolyfillVectorMultiplyWiden<32, false>(ir, inst); + } + break; + default: + break; + } + } +} + +static void VerificationPass(const IR::Block& block) { + for (auto const& inst : block) { + for (size_t i = 0; i < inst.NumArgs(); i++) { + const IR::Type t1 = inst.GetArg(i).GetType(); + const IR::Type t2 = IR::GetArgTypeOf(inst.GetOpcode(), i); + ASSERT_MSG(IR::AreTypesCompatible(t1, t2), "Block failed:\n{}", IR::DumpBlock(block)); + } + } + ankerl::unordered_dense::map actual_uses; + for (auto const& inst : block) { + for (size_t i = 0; i < inst.NumArgs(); i++) + if (IR::Value const arg = inst.GetArg(i); !arg.IsImmediate()) + actual_uses[arg.GetInst()]++; + } + for (auto const& pair : actual_uses) + ASSERT(pair.first->UseCount() == pair.second); +} + +void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options) { + Optimization::PolyfillPass(block, polyfill_options); + Optimization::NamingPass(block); + if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) [[likely]] { + Optimization::A32GetSetElimination(block, {.convert_nzc_to_nz = true}); + Optimization::DeadCodeElimination(block); + } + if (conf.HasOptimization(OptimizationFlag::ConstProp)) [[likely]] { + Optimization::A32ConstantMemoryReads(block, conf.callbacks); + Optimization::ConstantPropagation(block); + Optimization::DeadCodeElimination(block); + } + Optimization::IdentityRemovalPass(block); + Optimization::VerificationPass(block); +} + +void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options) { + Optimization::PolyfillPass(block, polyfill_options); + Optimization::A64CallbackConfigPass(block, conf); + Optimization::NamingPass(block); + if (conf.HasOptimization(OptimizationFlag::GetSetElimination) && !conf.check_halt_on_memory_access) [[likely]] { + Optimization::A64GetSetElimination(block); + Optimization::DeadCodeElimination(block); + } + if (conf.HasOptimization(OptimizationFlag::ConstProp)) [[likely]] { + Optimization::ConstantPropagation(block); + Optimization::DeadCodeElimination(block); + } + if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) [[likely]] { + Optimization::A64MergeInterpretBlocksPass(block, conf.callbacks); + } + Optimization::VerificationPass(block); +} + +} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.h b/src/dynarmic/src/dynarmic/ir/opt_passes.h new file mode 100644 index 0000000000..1963fad0a0 --- /dev/null +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.h @@ -0,0 +1,34 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#pragma once + +namespace Dynarmic::A32 { +struct UserCallbacks; +struct UserConfig; +} + +namespace Dynarmic::A64 { +struct UserCallbacks; +struct UserConfig; +} + +namespace Dynarmic::IR { +class Block; +} + +namespace Dynarmic::Optimization { + +struct PolyfillOptions { + bool sha256 = false; + bool vector_multiply_widen = false; + + bool operator==(const PolyfillOptions&) const = default; +}; + +void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options); +void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options); + +} // namespace Dynarmic::Optimization diff --git a/src/dynarmic/tests/A32/fuzz_thumb.cpp b/src/dynarmic/tests/A32/fuzz_thumb.cpp index 4d14141bbf..ad01e5718b 100644 --- a/src/dynarmic/tests/A32/fuzz_thumb.cpp +++ b/src/dynarmic/tests/A32/fuzz_thumb.cpp @@ -29,7 +29,7 @@ #include "dynarmic/frontend/A32/translate/a32_translate.h" #include "dynarmic/interface/A32/a32.h" #include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" using namespace Dynarmic; @@ -179,13 +179,7 @@ static void RunInstance(size_t run_number, ThumbTestEnv& test_env, A32Unicorn diff --git a/src/dynarmic/tests/CMakeLists.txt b/src/dynarmic/tests/CMakeLists.txt index 85d86c7966..29a8f23478 100644 --- a/src/dynarmic/tests/CMakeLists.txt +++ b/src/dynarmic/tests/CMakeLists.txt @@ -6,33 +6,24 @@ add_executable(dynarmic_tests fp/mantissa_util_tests.cpp fp/unpacked_tests.cpp rand_int.h + # A32 + A32/test_arm_disassembler.cpp + A32/test_arm_instructions.cpp + A32/test_coprocessor.cpp + A32/test_svc.cpp + A32/test_thumb_instructions.cpp + A32/testenv.h + decoder_tests.cpp + # A64 + A64/a64.cpp + A64/fibonacci.cpp + A64/fp_min_max.cpp + A64/misaligned_page_table.cpp + A64/test_invalidation.cpp + A64/real_world.cpp + A64/testenv.h ) - -if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic_tests PRIVATE - A32/test_arm_disassembler.cpp - A32/test_arm_instructions.cpp - A32/test_coprocessor.cpp - A32/test_svc.cpp - A32/test_thumb_instructions.cpp - A32/testenv.h - decoder_tests.cpp - ) -endif() - -if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_link_libraries(dynarmic_tests PRIVATE merry::oaknut) - - target_sources(dynarmic_tests PRIVATE - A64/a64.cpp - A64/fibonacci.cpp - A64/fp_min_max.cpp - A64/misaligned_page_table.cpp - A64/test_invalidation.cpp - A64/real_world.cpp - A64/testenv.h - ) -endif() +target_link_libraries(dynarmic_tests PRIVATE merry::oaknut) if (DYNARMIC_TESTS_USE_UNICORN) target_link_libraries(dynarmic_tests PRIVATE Unicorn::Unicorn) @@ -40,25 +31,17 @@ if (DYNARMIC_TESTS_USE_UNICORN) target_sources(dynarmic_tests PRIVATE fuzz_util.cpp fuzz_util.h + # A32 + A32/fuzz_arm.cpp + A32/fuzz_thumb.cpp + unicorn_emu/a32_unicorn.cpp + unicorn_emu/a32_unicorn.h + # A64 + A64/fuzz_with_unicorn.cpp + A64/verify_unicorn.cpp + unicorn_emu/a64_unicorn.cpp + unicorn_emu/a64_unicorn.h ) - - if ("A32" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic_tests PRIVATE - A32/fuzz_arm.cpp - A32/fuzz_thumb.cpp - unicorn_emu/a32_unicorn.cpp - unicorn_emu/a32_unicorn.h - ) - endif() - - if ("A64" IN_LIST DYNARMIC_FRONTENDS) - target_sources(dynarmic_tests PRIVATE - A64/fuzz_with_unicorn.cpp - A64/verify_unicorn.cpp - unicorn_emu/a64_unicorn.cpp - unicorn_emu/a64_unicorn.h - ) - endif() endif() if ("riscv" IN_LIST ARCHITECTURE) @@ -69,9 +52,6 @@ if ("x86_64" IN_LIST ARCHITECTURE) target_link_libraries(dynarmic_tests PRIVATE xbyak::xbyak) target_architecture_specific_sources(dynarmic_tests "x86_64" x64_cpu_info.cpp - ) - - target_architecture_specific_sources(dynarmic_tests "x86_64" native/preserve_xmm.cpp ) @@ -85,47 +65,47 @@ endif() include(CreateDirectoryGroups) -if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS)) - add_executable(dynarmic_print_info - print_info.cpp - ) +# +# dynarmic_print_info +# +add_executable(dynarmic_print_info + print_info.cpp +) +create_target_directory_groups(dynarmic_print_info) +target_link_libraries(dynarmic_print_info PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_include_directories(dynarmic_print_info PRIVATE . ../src) +target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS}) +target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) - create_target_directory_groups(dynarmic_print_info) +# +# dynarmic_test_generator +# +add_executable(dynarmic_test_generator + fuzz_util.cpp + fuzz_util.h + test_generator.cpp +) - target_link_libraries(dynarmic_print_info PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) - target_include_directories(dynarmic_print_info PRIVATE . ../src) - target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS}) - target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) -endif() +create_target_directory_groups(dynarmic_test_generator) -if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS)) - add_executable(dynarmic_test_generator - fuzz_util.cpp - fuzz_util.h - test_generator.cpp - ) +target_link_libraries(dynarmic_test_generator PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_include_directories(dynarmic_test_generator PRIVATE . ../src) +target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS}) +target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) - create_target_directory_groups(dynarmic_test_generator) - - target_link_libraries(dynarmic_test_generator PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) - target_include_directories(dynarmic_test_generator PRIVATE . ../src) - target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS}) - target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) -endif() - -if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS)) - add_executable(dynarmic_test_reader - test_reader.cpp - ) - - create_target_directory_groups(dynarmic_test_reader) - - target_link_libraries(dynarmic_test_reader PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) - target_include_directories(dynarmic_test_reader PRIVATE . ../src) - target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS}) - target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) -endif() +# +# dynarmic_test_reader +# +add_executable(dynarmic_test_reader + test_reader.cpp +) +create_target_directory_groups(dynarmic_test_reader) +target_link_libraries(dynarmic_test_reader PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_include_directories(dynarmic_test_reader PRIVATE . ../src) +target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS}) +target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) +# create_target_directory_groups(dynarmic_tests) target_link_libraries(dynarmic_tests PRIVATE dynarmic Boost::headers Catch2::Catch2WithMain fmt::fmt merry::mcl) diff --git a/src/dynarmic/tests/print_info.cpp b/src/dynarmic/tests/print_info.cpp index ef8b87bbd1..3263ca729a 100644 --- a/src/dynarmic/tests/print_info.cpp +++ b/src/dynarmic/tests/print_info.cpp @@ -34,7 +34,7 @@ #include "dynarmic/interface/A32/a32.h" #include "dynarmic/interface/A32/disassembler.h" #include "dynarmic/ir/basic_block.h" -#include "dynarmic/ir/opt/passes.h" +#include "dynarmic/ir/opt_passes.h" using namespace Dynarmic; From 7bf1ef2a813ba61f66aa2f60906d4c6e1f1d8176 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 30 Aug 2025 05:57:48 +0000 Subject: [PATCH 27/47] Fix license headers --- src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp | 3 +++ src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp | 3 +++ src/dynarmic/src/dynarmic/ir/opt_passes.h | 3 +++ 3 files changed, 9 insertions(+) diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp index 8e48fa3687..01af361b27 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp index 2b50ad9ea3..c4c1c42792 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.h b/src/dynarmic/src/dynarmic/ir/opt_passes.h index 1963fad0a0..88b8020031 100644 --- a/src/dynarmic/src/dynarmic/ir/opt_passes.h +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2016 MerryMage * SPDX-License-Identifier: 0BSD From bfc1a878dcba152d6b0a2e52084d56aaf935d362 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 00:11:40 +0000 Subject: [PATCH 28/47] [dynarmic] use small vector experiment Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index bfacdcca52..66c75aead9 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -12,6 +12,7 @@ #include #include +#include "boost/container/small_vector.hpp" #include "dynarmic/common/common_types.h" #include #include @@ -83,7 +84,7 @@ public: void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept; private: //non trivial - std::vector values; //24 + boost::container::small_vector values; //24 // Block state uint16_t total_uses = 0; //8 //sometimes zeroed @@ -96,7 +97,7 @@ private: uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128 bool is_scratch : 1 = false; //1 bool is_set_last_use : 1 = false; //1 - alignas(16) uint8_t lru_counter = 0; //1 + uint8_t lru_counter = 0; //1 friend class RegAlloc; }; static_assert(sizeof(HostLocInfo) == 64); From 111b8a685f1d6e1aaaba11fcbf0687dbeb476c3a Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 06:40:07 +0000 Subject: [PATCH 29/47] [dynarmic] reduce use 2 bits for LRU and 4 bits for clog2 of bit size Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp | 2 +- src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index 29eab7908b..919e75b77b 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -118,7 +118,7 @@ void HostLocInfo::AddValue(IR::Inst* inst) noexcept { values.push_back(inst); ASSERT(size_t(total_uses) + inst->UseCount() < (std::numeric_limits::max)()); total_uses += inst->UseCount(); - max_bit_width = std::max(max_bit_width, GetBitWidth(inst->GetType())); + max_bit_width = std::max(max_bit_width, std::countr_zero(GetBitWidth(inst->GetType()))); } void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept { diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index 66c75aead9..2bd274730a 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -78,7 +78,7 @@ public: return std::find(values.begin(), values.end(), inst) != values.end(); } inline size_t GetMaxBitWidth() const noexcept { - return max_bit_width; + return 1 << max_bit_width; } void AddValue(IR::Inst* inst) noexcept; void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept; @@ -94,10 +94,10 @@ private: uint16_t is_being_used_count = 0; //8 uint16_t current_references = 0; //8 // Value state - uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128 + uint8_t max_bit_width : 4 = 0; //Valid values: log2(1,2,4,8,16,32,128) = (0, 1, 2, 3, 4, 5, 6) + uint8_t lru_counter : 2 = 0; //1 bool is_scratch : 1 = false; //1 bool is_set_last_use : 1 = false; //1 - uint8_t lru_counter = 0; //1 friend class RegAlloc; }; static_assert(sizeof(HostLocInfo) == 64); From 11683cb62ffe05552525b361309406c2dd359ed3 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 06:40:22 +0000 Subject: [PATCH 30/47] [dynarmic] fix tests_reader and tests_generator Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/CMakeLists.txt | 2 +- src/dynarmic/tests/A32/testenv.h | 1 - src/dynarmic/tests/A64/testenv.h | 1 - src/dynarmic/tests/CMakeLists.txt | 28 ++++++++++++++++++++---- 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index ee06ccf19a..d6be793dc6 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -375,7 +375,7 @@ target_link_libraries(dynarmic ) if (BOOST_NO_HEADERS) -target_link_libraries(dynarmic PRIVATE Boost::variant Boost::icl Boost::pool) + target_link_libraries(dynarmic PRIVATE Boost::variant Boost::icl Boost::pool) else() target_link_libraries(dynarmic PRIVATE Boost::headers) endif() diff --git a/src/dynarmic/tests/A32/testenv.h b/src/dynarmic/tests/A32/testenv.h index a6df2017ce..72eaafce14 100644 --- a/src/dynarmic/tests/A32/testenv.h +++ b/src/dynarmic/tests/A32/testenv.h @@ -17,7 +17,6 @@ #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" #include "dynarmic/interface/A32/a32.h" -#include "../native/testenv.h" template class A32TestEnv : public Dynarmic::A32::UserCallbacks { diff --git a/src/dynarmic/tests/A64/testenv.h b/src/dynarmic/tests/A64/testenv.h index 31e338b138..fcdadb23e6 100644 --- a/src/dynarmic/tests/A64/testenv.h +++ b/src/dynarmic/tests/A64/testenv.h @@ -12,7 +12,6 @@ #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" #include "dynarmic/interface/A64/a64.h" -#include "../native/testenv.h" using Vector = Dynarmic::A64::Vector; diff --git a/src/dynarmic/tests/CMakeLists.txt b/src/dynarmic/tests/CMakeLists.txt index 29a8f23478..f8f420e2d6 100644 --- a/src/dynarmic/tests/CMakeLists.txt +++ b/src/dynarmic/tests/CMakeLists.txt @@ -72,7 +72,12 @@ add_executable(dynarmic_print_info print_info.cpp ) create_target_directory_groups(dynarmic_print_info) -target_link_libraries(dynarmic_print_info PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_link_libraries(dynarmic_print_info PRIVATE dynarmic fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_print_info PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_print_info PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_print_info PRIVATE . ../src) target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) @@ -88,7 +93,12 @@ add_executable(dynarmic_test_generator create_target_directory_groups(dynarmic_test_generator) -target_link_libraries(dynarmic_test_generator PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_link_libraries(dynarmic_test_generator PRIVATE dynarmic fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_test_generator PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_test_generator PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_test_generator PRIVATE . ../src) target_compile_options(dynarmic_test_generator PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_test_generator PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) @@ -100,7 +110,12 @@ add_executable(dynarmic_test_reader test_reader.cpp ) create_target_directory_groups(dynarmic_test_reader) -target_link_libraries(dynarmic_test_reader PRIVATE dynarmic Boost::headers fmt::fmt merry::mcl) +target_link_libraries(dynarmic_test_reader PRIVATE dynarmic fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_test_reader PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_test_reader PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_test_reader PRIVATE . ../src) target_compile_options(dynarmic_test_reader PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) @@ -108,7 +123,12 @@ target_compile_definitions(dynarmic_test_reader PRIVATE FMT_USE_USER_DEFINED_LIT # create_target_directory_groups(dynarmic_tests) -target_link_libraries(dynarmic_tests PRIVATE dynarmic Boost::headers Catch2::Catch2WithMain fmt::fmt merry::mcl) +target_link_libraries(dynarmic_tests PRIVATE dynarmic Catch2::Catch2WithMain fmt::fmt merry::mcl) +if (BOOST_NO_HEADERS) + target_link_libraries(dynarmic_tests PRIVATE Boost::variant Boost::icl Boost::pool) +else() + target_link_libraries(dynarmic_tests PRIVATE Boost::headers) +endif() target_include_directories(dynarmic_tests PRIVATE . ../src) target_compile_options(dynarmic_tests PRIVATE ${DYNARMIC_CXX_FLAGS}) target_compile_definitions(dynarmic_tests PRIVATE FMT_USE_USER_DEFINED_LITERALS=1) From c1852e2ec95b3fa702261291152e2b767dca0702 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 09:10:18 +0000 Subject: [PATCH 31/47] [docs] fastmem draft Signed-off-by: lizzie --- src/dynarmic/docs/FastMemory.md | 5 +++++ src/dynarmic/docs/Fastmem.svg | 4 ++++ src/dynarmic/docs/HostToGuest.svg | 4 ++++ 3 files changed, 13 insertions(+) create mode 100644 src/dynarmic/docs/FastMemory.md create mode 100644 src/dynarmic/docs/Fastmem.svg create mode 100644 src/dynarmic/docs/HostToGuest.svg diff --git a/src/dynarmic/docs/FastMemory.md b/src/dynarmic/docs/FastMemory.md new file mode 100644 index 0000000000..2a7c1a2583 --- /dev/null +++ b/src/dynarmic/docs/FastMemory.md @@ -0,0 +1,5 @@ +# Fast memory (Fastmem) + +![Host to guest translation](./HostToGuest.svg) + +![Fastmem translation](./Fastmem.svg) diff --git a/src/dynarmic/docs/Fastmem.svg b/src/dynarmic/docs/Fastmem.svg new file mode 100644 index 0000000000..a3ed0bb68b --- /dev/null +++ b/src/dynarmic/docs/Fastmem.svg @@ -0,0 +1,4 @@ + + + +
Emulator
Address Space
Guest Address Space
SIGSEGV Trap
Fastmem
Only needs to linearly offset from fastmem arena
Less codegen (SIGSEGV traps)
Is fast only if SIGSEGV handlers are sufficiently fast
\ No newline at end of file diff --git a/src/dynarmic/docs/HostToGuest.svg b/src/dynarmic/docs/HostToGuest.svg new file mode 100644 index 0000000000..6a15a44b46 --- /dev/null +++ b/src/dynarmic/docs/HostToGuest.svg @@ -0,0 +1,4 @@ + + + +
Emulator
Address Space
Guest Address Space
Resolver
Host to Guest translation
Looks up correct PTE
Translates each address 
Is slow
\ No newline at end of file From 351f9ac097ff0948b67bba0afc15ca86c3f37152 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 3 Sep 2025 10:16:22 +0000 Subject: [PATCH 32/47] [dynarmic] (prolly makes MSVC crash) - use 128MiB code cache Signed-off-by: lizzie --- src/core/arm/dynarmic/arm_dynarmic_32.cpp | 6 +- src/core/arm/dynarmic/arm_dynarmic_64.cpp | 6 +- .../frontend/decoder/decoder_detail.h | 65 ++++++++----------- 3 files changed, 28 insertions(+), 49 deletions(-) diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index 2c2c54a1ad..3f7bf7f967 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -210,12 +210,8 @@ std::shared_ptr ArmDynarmic32::MakeJit(Common::PageTable* pa config.wall_clock_cntpct = m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock; - // Code cache size -#ifdef ARCHITECTURE_arm64 + // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB config.code_cache_size = std::uint32_t(128_MiB); -#else - config.code_cache_size = std::uint32_t(512_MiB); -#endif // Allow memory fault handling to work if (m_system.DebuggerEnabled()) { diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 438b7b691c..682b0cfbf6 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -269,12 +269,8 @@ std::shared_ptr ArmDynarmic64::MakeJit(Common::PageTable* pa config.wall_clock_cntpct = m_uses_wall_clock; config.enable_cycle_counting = !m_uses_wall_clock; - // Code cache size -#ifdef ARCHITECTURE_arm64 + // Code cache size - max in ARM is 128MiB, max in x86_64 is 2GiB config.code_cache_size = std::uint32_t(128_MiB); -#else - config.code_cache_size = std::uint32_t(512_MiB); -#endif // Allow memory fault handling to work if (m_system.DebuggerEnabled()) { diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h index bd76efda2a..4050033ef5 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h @@ -70,11 +70,9 @@ struct detail { return std::make_tuple(mask, expect); } - /** - * Generates the masks and shifts for each argument. - * A '-' in a bitstring indicates that we don't care about that value. - * An argument is specified by a continuous string of the same character. - */ + /// @brief Generates the masks and shifts for each argument. + /// A '-' in a bitstring indicates that we don't care about that value. + /// An argument is specified by a continuous string of the same character. template static consteval auto GetArgInfo(std::array bitstring) { std::array masks = {}; @@ -98,7 +96,6 @@ struct detail { if constexpr (N > 0) { const size_t bit_position = opcode_bitsize - i - 1; - if (arg_index >= N) throw std::out_of_range("Unexpected field"); @@ -109,20 +106,16 @@ struct detail { } } } - #if !defined(DYNARMIC_IGNORE_ASSERTS) && !defined(__ANDROID__) // Avoids a MSVC ICE, and avoids Android NDK issue. ASSERT(std::all_of(masks.begin(), masks.end(), [](auto m) { return m != 0; })); #endif - return std::make_tuple(masks, shifts); } - /** - * This struct's Make member function generates a lambda which decodes an instruction based on - * the provided arg_masks and arg_shifts. The Visitor member function to call is provided as a - * template argument. - */ + /// @brief This struct's Make member function generates a lambda which decodes an instruction + /// based on the provided arg_masks and arg_shifts. The Visitor member function to call is + /// provided as a template argument. template struct VisitorCaller; @@ -130,36 +123,36 @@ struct detail { # pragma warning(push) # pragma warning(disable : 4800) // forcing value to bool 'true' or 'false' (performance warning) #endif - template - struct VisitorCaller { + template + struct VisitorCaller { template - static auto Make(std::integer_sequence, - CallRetT (Visitor::*const fn)(Args...), + static constexpr auto Make(std::integer_sequence, + ReturnType (V::*const fn)(Args...), const std::array arg_masks, const std::array arg_shifts) { - static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); - return [fn, arg_masks, arg_shifts](Visitor& v, opcode_type instruction) { + static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); + return [fn, arg_masks, arg_shifts](V& v, opcode_type instruction) { (void)instruction; (void)arg_masks; (void)arg_shifts; - return (v.*fn)(static_cast((instruction & arg_masks[iota]) >> arg_shifts[iota])...); + return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...); }; } }; - template - struct VisitorCaller { + template + struct VisitorCaller { template - static auto Make(std::integer_sequence, - CallRetT (Visitor::*const fn)(Args...) const, + static constexpr auto Make(std::integer_sequence, + ReturnType (V::*const fn)(Args...) const, const std::array arg_masks, const std::array arg_shifts) { - static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); - return [fn, arg_masks, arg_shifts](const Visitor& v, opcode_type instruction) { + static_assert(std::is_same_v, "Member function is not from Matcher's Visitor"); + return [fn, arg_masks, arg_shifts](const V& v, opcode_type instruction) { (void)instruction; (void)arg_masks; (void)arg_shifts; - return (v.*fn)(static_cast((instruction & arg_masks[iota]) >> arg_shifts[iota])...); + return (v.*fn)(Args((instruction & arg_masks[iota]) >> arg_shifts[iota])...); }; } }; @@ -167,22 +160,16 @@ struct detail { # pragma warning(pop) #endif - /** - * Creates a matcher that can match and parse instructions based on bitstring. - * See also: GetMaskAndExpect and GetArgInfo for format of bitstring. - */ - template - static auto GetMatcher(FnT fn, const char* const name) { - constexpr size_t args_count = mcl::parameter_count_v; - + /// @brief Creates a matcher that can match and parse instructions based on bitstring. + /// See also: GetMaskAndExpect and GetArgInfo for format of bitstring. + template + static constexpr auto GetMatcher(F fn, const char* const name) { + constexpr size_t args_count = mcl::parameter_count_v; constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring)); constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring)); constexpr auto arg_masks = std::get<0>(GetArgInfo(bitstring)); constexpr auto arg_shifts = std::get<1>(GetArgInfo(bitstring)); - - using Iota = std::make_index_sequence; - - const auto proxy_fn = VisitorCaller::Make(Iota(), fn, arg_masks, arg_shifts); + const auto proxy_fn = VisitorCaller::Make(std::make_index_sequence(), fn, arg_masks, arg_shifts); return MatcherT(name, mask, expect, proxy_fn); } }; From 2665d6819d165c29e4bb8e0afa3bcf6d5affbb99 Mon Sep 17 00:00:00 2001 From: lizzie Date: Thu, 4 Sep 2025 07:24:49 +0000 Subject: [PATCH 33/47] [dynarmic] reduce matcher table noise and cache misses Signed-off-by: lizzie --- .../src/dynarmic/frontend/A32/a32_types.h | 22 +++++---------- .../src/dynarmic/frontend/A32/decoder/asimd.h | 27 +++++++++---------- .../src/dynarmic/frontend/A32/decoder/vfp.h | 12 +++------ .../src/dynarmic/frontend/A64/decoder/a64.h | 23 +++++++--------- .../frontend/decoder/decoder_detail.h | 6 ++--- .../src/dynarmic/frontend/decoder/matcher.h | 11 ++------ 6 files changed, 38 insertions(+), 63 deletions(-) diff --git a/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h b/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h index 3f4501a528..2a0cc25751 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/a32_types.h @@ -9,12 +9,9 @@ #pragma once #include -#include - #include #include "dynarmic/common/assert.h" #include "dynarmic/common/common_types.h" - #include "dynarmic/interface/A32/coprocessor_util.h" #include "dynarmic/ir/cond.h" @@ -89,24 +86,17 @@ constexpr bool IsQuadExtReg(ExtReg reg) { inline size_t RegNumber(Reg reg) { ASSERT(reg != Reg::INVALID_REG); - return static_cast(reg); + return size_t(reg); } inline size_t RegNumber(ExtReg reg) { if (IsSingleExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::S0); + return size_t(reg) - size_t(ExtReg::S0); + } else if (IsDoubleExtReg(reg)) { + return size_t(reg) - size_t(ExtReg::D0); } - - if (IsDoubleExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::D0); - } - - if (IsQuadExtReg(reg)) { - return static_cast(reg) - static_cast(ExtReg::Q0); - } - - ASSERT_MSG(false, "Invalid extended register"); - return 0; + ASSERT(IsQuadExtReg(reg)); + return size_t(reg) - size_t(ExtReg::Q0); } inline Reg operator+(Reg reg, size_t number) { diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h index f2e206695b..f63816d2b1 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h @@ -27,14 +27,11 @@ using ASIMDMatcher = Decoder::Matcher; template std::vector> GetASIMDDecodeTable() { - std::vector> table = { - -#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), + std::vector>> table = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./asimd.inc" #undef INST - }; - // Exceptions to the rule of thumb. const std::set comes_first{ "VBIC, VMOV, VMVN, VORR (immediate)", @@ -53,19 +50,21 @@ std::vector> GetASIMDDecodeTable() { "VQDMULH (scalar)", "VQRDMULH (scalar)", }; - const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) { - return comes_first.count(matcher.GetName()) > 0; + const auto sort_begin = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { + return comes_first.count(e.first) > 0; }); - const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& matcher) { - return comes_last.count(matcher.GetName()) == 0; + const auto sort_end = std::stable_partition(table.begin(), table.end(), [&](const auto& e) { + return comes_last.count(e.first) == 0; }); - // If a matcher has more bits in its mask it is more specific, so it should come first. - std::stable_sort(sort_begin, sort_end, [](const auto& matcher1, const auto& matcher2) { - return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask()); + std::stable_sort(sort_begin, sort_end, [](const auto& a, const auto& b) { + return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); }); - - return table; + std::vector> final_table; + std::transform(table.cbegin(), table.cend(), final_table.begin(), [](auto const& e) { + return e.second; + }); + return final_table; } template diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h index 5fcacd2bda..a969570547 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h @@ -30,22 +30,18 @@ std::optional>> DecodeVFP(u32 instruc static const struct Tables { Table unconditional; Table conditional; - } tables = [] { + } tables = []() { Table list = { - #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./vfp.inc" #undef INST - }; - - const auto division = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { + auto const it = std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { return (matcher.GetMask() & 0xF0000000) == 0xF0000000; }); - return Tables{ - Table{list.begin(), division}, - Table{division, list.end()}, + Table{list.begin(), it}, + Table{it, list.end()}, }; }(); diff --git a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h index c651dd7cde..c7bc981ac6 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h +++ b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h @@ -37,34 +37,31 @@ inline size_t ToFastLookupIndex(u32 instruction) { template constexpr DecodeTable GetDecodeTable() { - std::vector> list = { -#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./a64.inc" #undef INST }; - // If a matcher has more bits in its mask it is more specific, so it should come first. - std::stable_sort(list.begin(), list.end(), [](const auto& matcher1, const auto& matcher2) { + std::stable_sort(list.begin(), list.end(), [](const auto& a, const auto& b) { // If a matcher has more bits in its mask it is more specific, so it should come first. - return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask()); + return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); }); - // Exceptions to the above rule of thumb. - std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { + std::stable_partition(list.begin(), list.end(), [&](const auto& e) { return std::set{ "MOVI, MVNI, ORR, BIC (vector, immediate)", "FMOV (vector, immediate)", "Unallocated SIMD modified immediate", - }.count(matcher.GetName()) > 0; + }.count(e.first) > 0; }); - DecodeTable table{}; for (size_t i = 0; i < table.size(); ++i) { - for (auto matcher : list) { - const auto expect = detail::ToFastLookupIndex(matcher.GetExpected()); - const auto mask = detail::ToFastLookupIndex(matcher.GetMask()); + for (auto const& e : list) { + const auto expect = detail::ToFastLookupIndex(e.second.GetExpected()); + const auto mask = detail::ToFastLookupIndex(e.second.GetMask()); if ((i & mask) == expect) { - table[i].push_back(matcher); + table[i].push_back(e.second); } } } diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h index 4050033ef5..0d9da6169c 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/decoder_detail.h @@ -163,18 +163,18 @@ struct detail { /// @brief Creates a matcher that can match and parse instructions based on bitstring. /// See also: GetMaskAndExpect and GetArgInfo for format of bitstring. template - static constexpr auto GetMatcher(F fn, const char* const name) { + static constexpr auto GetMatcher(F fn) { constexpr size_t args_count = mcl::parameter_count_v; constexpr auto mask = std::get<0>(GetMaskAndExpect(bitstring)); constexpr auto expect = std::get<1>(GetMaskAndExpect(bitstring)); constexpr auto arg_masks = std::get<0>(GetArgInfo(bitstring)); constexpr auto arg_shifts = std::get<1>(GetArgInfo(bitstring)); const auto proxy_fn = VisitorCaller::Make(std::make_index_sequence(), fn, arg_masks, arg_shifts); - return MatcherT(name, mask, expect, proxy_fn); + return MatcherT(mask, expect, proxy_fn); } }; -#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail>::template GetMatcher(&V::fn, name) +#define DYNARMIC_DECODER_GET_MATCHER(MatcherT, fn, name, bitstring) Decoder::detail::detail>::template GetMatcher(&V::fn) } // namespace detail } // namespace Dynarmic::Decoder diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h index 7e5c9c5a8f..5a2afe57c5 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h @@ -31,14 +31,8 @@ public: using visitor_type = Visitor; using handler_return_type = typename Visitor::instruction_return_type; using handler_function = std::function; - - Matcher(const char* const name, opcode_type mask, opcode_type expected, handler_function func) - : name{name}, mask{mask}, expected{expected}, fn{std::move(func)} {} - - /// Gets the name of this type of instruction. - const char* GetName() const { - return name; - } + Matcher(opcode_type mask, opcode_type expected, handler_function func) + : mask{mask}, expected{expected}, fn{std::move(func)} {} /// Gets the mask for this instruction. opcode_type GetMask() const { @@ -70,7 +64,6 @@ public: } private: - const char* name; opcode_type mask; opcode_type expected; handler_function fn; From 1fcc91d741023870f392ec790bff3997798ddd3f Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 6 Sep 2025 20:59:23 +0000 Subject: [PATCH 34/47] [dynarmic] fix tests Signed-off-by: lizzie --- .../src/dynarmic/frontend/decoder/matcher.h | 40 +++++++----------- src/dynarmic/tests/A32/fuzz_arm.cpp | 1 + src/dynarmic/tests/A32/fuzz_thumb.cpp | 1 + .../tests/A32/test_arm_instructions.cpp | 1 + src/dynarmic/tests/A32/test_coprocessor.cpp | 1 + src/dynarmic/tests/A32/test_svc.cpp | 1 + .../tests/A32/test_thumb_instructions.cpp | 1 + src/dynarmic/tests/A64/a64.cpp | 1 + src/dynarmic/tests/A64/fp_min_max.cpp | 1 + src/dynarmic/tests/A64/fuzz_with_unicorn.cpp | 1 + .../tests/A64/misaligned_page_table.cpp | 1 + src/dynarmic/tests/A64/real_world.cpp | 1 + src/dynarmic/tests/A64/test_invalidation.cpp | 1 + src/dynarmic/tests/decoder_tests.cpp | 2 + src/dynarmic/tests/native/preserve_xmm.cpp | 1 + src/dynarmic/tests/print_info.cpp | 42 ++++--------------- 16 files changed, 39 insertions(+), 58 deletions(-) diff --git a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h index 5a2afe57c5..f7e2884e0c 100644 --- a/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h +++ b/src/dynarmic/src/dynarmic/frontend/decoder/matcher.h @@ -14,16 +14,12 @@ namespace Dynarmic::Decoder { -/** - * Generic instruction handling construct. - * - * @tparam Visitor An arbitrary visitor type that will be passed through - * to the function being handled. This type must be the - * type of the first parameter in a handler function. - * - * @tparam OpcodeType Type representing an opcode. This must be the - * type of the second parameter in a handler function. - */ +/// Generic instruction handling construct. +/// @tparam Visitor An arbitrary visitor type that will be passed through +/// to the function being handled. This type must be the +/// type of the first parameter in a handler function. +/// @tparam OpcodeType Type representing an opcode. This must be the +/// type of the second parameter in a handler function. template class Matcher { public: @@ -35,30 +31,26 @@ public: : mask{mask}, expected{expected}, fn{std::move(func)} {} /// Gets the mask for this instruction. - opcode_type GetMask() const { + inline opcode_type GetMask() const noexcept { return mask; } /// Gets the expected value after masking for this instruction. - opcode_type GetExpected() const { + inline opcode_type GetExpected() const noexcept { return expected; } - /** - * Tests to see if the given instruction is the instruction this matcher represents. - * @param instruction The instruction to test - * @returns true if the given instruction matches. - */ - bool Matches(opcode_type instruction) const { + /// Tests to see if the given instruction is the instruction this matcher represents. + /// @param instruction The instruction to test + /// @returns true if the given instruction matches. + inline bool Matches(opcode_type instruction) const noexcept { return (instruction & mask) == expected; } - /** - * Calls the corresponding instruction handler on visitor for this type of instruction. - * @param v The visitor to use - * @param instruction The instruction to decode. - */ - handler_return_type call(Visitor& v, opcode_type instruction) const { + /// Calls the corresponding instruction handler on visitor for this type of instruction. + /// @param v The visitor to use + /// @param instruction The instruction to decode. + inline handler_return_type call(Visitor& v, opcode_type instruction) const noexcept { ASSERT(Matches(instruction)); return fn(v, instruction); } diff --git a/src/dynarmic/tests/A32/fuzz_arm.cpp b/src/dynarmic/tests/A32/fuzz_arm.cpp index 087ce54813..5ee6d2bf02 100644 --- a/src/dynarmic/tests/A32/fuzz_arm.cpp +++ b/src/dynarmic/tests/A32/fuzz_arm.cpp @@ -24,6 +24,7 @@ #include "../rand_int.h" #include "../unicorn_emu/a32_unicorn.h" #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/llvm_disassemble.h" diff --git a/src/dynarmic/tests/A32/fuzz_thumb.cpp b/src/dynarmic/tests/A32/fuzz_thumb.cpp index ad01e5718b..7f64cb0ccb 100644 --- a/src/dynarmic/tests/A32/fuzz_thumb.cpp +++ b/src/dynarmic/tests/A32/fuzz_thumb.cpp @@ -22,6 +22,7 @@ #include "../rand_int.h" #include "../unicorn_emu/a32_unicorn.h" #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/frontend/A32/FPSCR.h" #include "dynarmic/frontend/A32/PSR.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" diff --git a/src/dynarmic/tests/A32/test_arm_instructions.cpp b/src/dynarmic/tests/A32/test_arm_instructions.cpp index 0411877823..c007a18299 100644 --- a/src/dynarmic/tests/A32/test_arm_instructions.cpp +++ b/src/dynarmic/tests/A32/test_arm_instructions.cpp @@ -6,6 +6,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/interface/A32/a32.h" diff --git a/src/dynarmic/tests/A32/test_coprocessor.cpp b/src/dynarmic/tests/A32/test_coprocessor.cpp index 98da0e5d34..49cb42bdf3 100644 --- a/src/dynarmic/tests/A32/test_coprocessor.cpp +++ b/src/dynarmic/tests/A32/test_coprocessor.cpp @@ -8,6 +8,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/interface/A32/a32.h" #include "dynarmic/interface/A32/coprocessor.h" diff --git a/src/dynarmic/tests/A32/test_svc.cpp b/src/dynarmic/tests/A32/test_svc.cpp index 8b55d6537c..998566130e 100644 --- a/src/dynarmic/tests/A32/test_svc.cpp +++ b/src/dynarmic/tests/A32/test_svc.cpp @@ -8,6 +8,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" using namespace Dynarmic; diff --git a/src/dynarmic/tests/A32/test_thumb_instructions.cpp b/src/dynarmic/tests/A32/test_thumb_instructions.cpp index 3501d5419f..d509acdd8d 100644 --- a/src/dynarmic/tests/A32/test_thumb_instructions.cpp +++ b/src/dynarmic/tests/A32/test_thumb_instructions.cpp @@ -10,6 +10,7 @@ #include "dynarmic/common/common_types.h" #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/interface/A32/a32.h" static Dynarmic::A32::UserConfig GetUserConfig(ThumbTestEnv* testenv) { diff --git a/src/dynarmic/tests/A64/a64.cpp b/src/dynarmic/tests/A64/a64.cpp index 40eff1f071..24e92d1210 100644 --- a/src/dynarmic/tests/A64/a64.cpp +++ b/src/dynarmic/tests/A64/a64.cpp @@ -7,6 +7,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/interface/exclusive_monitor.h" diff --git a/src/dynarmic/tests/A64/fp_min_max.cpp b/src/dynarmic/tests/A64/fp_min_max.cpp index d8b45db807..1669b63071 100644 --- a/src/dynarmic/tests/A64/fp_min_max.cpp +++ b/src/dynarmic/tests/A64/fp_min_max.cpp @@ -12,6 +12,7 @@ #include "dynarmic/common/common_types.h" #include "./testenv.h" +#include "../native/testenv.h" using namespace Dynarmic; diff --git a/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp b/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp index 3322af06e7..45ba728917 100644 --- a/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp +++ b/src/dynarmic/tests/A64/fuzz_with_unicorn.cpp @@ -19,6 +19,7 @@ #include "../rand_int.h" #include "../unicorn_emu/a64_unicorn.h" #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/common/fp/fpcr.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/common/llvm_disassemble.h" diff --git a/src/dynarmic/tests/A64/misaligned_page_table.cpp b/src/dynarmic/tests/A64/misaligned_page_table.cpp index 8235e14a67..ecba5a3efa 100644 --- a/src/dynarmic/tests/A64/misaligned_page_table.cpp +++ b/src/dynarmic/tests/A64/misaligned_page_table.cpp @@ -6,6 +6,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/interface/A64/a64.h" TEST_CASE("misaligned load/store do not use page_table when detect_misaligned_access_via_page_table is set", "[a64]") { diff --git a/src/dynarmic/tests/A64/real_world.cpp b/src/dynarmic/tests/A64/real_world.cpp index 07532d95af..a083f16d61 100644 --- a/src/dynarmic/tests/A64/real_world.cpp +++ b/src/dynarmic/tests/A64/real_world.cpp @@ -5,6 +5,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/interface/A64/a64.h" using namespace Dynarmic; diff --git a/src/dynarmic/tests/A64/test_invalidation.cpp b/src/dynarmic/tests/A64/test_invalidation.cpp index 168043c1cb..8a63776fa4 100644 --- a/src/dynarmic/tests/A64/test_invalidation.cpp +++ b/src/dynarmic/tests/A64/test_invalidation.cpp @@ -6,6 +6,7 @@ #include #include "./testenv.h" +#include "../native/testenv.h" #include "dynarmic/interface/A64/a64.h" using namespace Dynarmic; diff --git a/src/dynarmic/tests/decoder_tests.cpp b/src/dynarmic/tests/decoder_tests.cpp index e545309960..777ea8b510 100644 --- a/src/dynarmic/tests/decoder_tests.cpp +++ b/src/dynarmic/tests/decoder_tests.cpp @@ -20,6 +20,7 @@ using namespace Dynarmic; +/* TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { const auto table = A32::GetASIMDDecodeTable(); @@ -76,3 +77,4 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { } while (x != 0); } } +*/ diff --git a/src/dynarmic/tests/native/preserve_xmm.cpp b/src/dynarmic/tests/native/preserve_xmm.cpp index 0f69697b7a..7421252063 100644 --- a/src/dynarmic/tests/native/preserve_xmm.cpp +++ b/src/dynarmic/tests/native/preserve_xmm.cpp @@ -6,6 +6,7 @@ #include #include "../A64/testenv.h" +#include "../native/testenv.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/interface/exclusive_monitor.h" diff --git a/src/dynarmic/tests/print_info.cpp b/src/dynarmic/tests/print_info.cpp index 3263ca729a..0c89e780ad 100644 --- a/src/dynarmic/tests/print_info.cpp +++ b/src/dynarmic/tests/print_info.cpp @@ -32,6 +32,7 @@ #include "dynarmic/frontend/A64/translate/a64_translate.h" #include "dynarmic/frontend/A64/translate/impl/impl.h" #include "dynarmic/interface/A32/a32.h" +#include "dynarmic/interface/A32/config.h" #include "dynarmic/interface/A32/disassembler.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/opt_passes.h" @@ -39,20 +40,20 @@ using namespace Dynarmic; const char* GetNameOfA32Instruction(u32 instruction) { - if (auto vfp_decoder = A32::DecodeVFP(instruction)) { + /*if (auto vfp_decoder = A32::DecodeVFP(instruction)) { return vfp_decoder->get().GetName(); } else if (auto asimd_decoder = A32::DecodeASIMD(instruction)) { return asimd_decoder->get().GetName(); } else if (auto decoder = A32::DecodeArm(instruction)) { return decoder->get().GetName(); - } + }*/ return ""; } const char* GetNameOfA64Instruction(u32 instruction) { - if (auto decoder = A64::Decode(instruction)) { + /*if (auto decoder = A64::Decode(instruction)) { return decoder->get().GetName(); - } + }*/ return ""; } @@ -64,18 +65,9 @@ void PrintA32Instruction(u32 instruction) { IR::Block ir_block{location}; const bool should_continue = A32::TranslateSingleInstruction(ir_block, location, instruction); fmt::print("should_continue: {}\n\n", should_continue); - - Optimization::NamingPass(ir_block); - fmt::print("IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); - - Optimization::A32GetSetElimination(ir_block, {}); - Optimization::DeadCodeElimination(ir_block); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - Optimization::IdentityRemovalPass(ir_block); - + Optimization::Optimize(ir_block, A32::UserConfig{}, {}); fmt::print("Optimized IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); } @@ -88,18 +80,9 @@ void PrintA64Instruction(u32 instruction) { IR::Block ir_block{location}; const bool should_continue = A64::TranslateSingleInstruction(ir_block, location, instruction); fmt::print("should_continue: {}\n\n", should_continue); - - Optimization::NamingPass(ir_block); - fmt::print("IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); - - Optimization::A64GetSetElimination(ir_block); - Optimization::DeadCodeElimination(ir_block); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - Optimization::IdentityRemovalPass(ir_block); - + Optimization::Optimize(ir_block, A64::UserConfig{}, {}); fmt::print("Optimized IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); } @@ -115,18 +98,9 @@ void PrintThumbInstruction(u32 instruction) { IR::Block ir_block{location}; const bool should_continue = A32::TranslateSingleInstruction(ir_block, location, instruction); fmt::print("should_continue: {}\n\n", should_continue); - - Optimization::NamingPass(ir_block); - fmt::print("IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); - - Optimization::A32GetSetElimination(ir_block, {}); - Optimization::DeadCodeElimination(ir_block); - Optimization::ConstantPropagation(ir_block); - Optimization::DeadCodeElimination(ir_block); - Optimization::IdentityRemovalPass(ir_block); - + Optimization::Optimize(ir_block, A32::UserConfig{}, {}); fmt::print("Optimized IR:\n"); fmt::print("{}\n", IR::DumpBlock(ir_block)); } From fe7c8c9a794315984b0a0b447247d3961686000e Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 6 Sep 2025 21:07:18 +0000 Subject: [PATCH 35/47] [dynarmic] fix ASIMD execution Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h index f63816d2b1..2cea7a14c1 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h @@ -61,7 +61,7 @@ std::vector> GetASIMDDecodeTable() { return mcl::bit::count_ones(a.second.GetMask()) > mcl::bit::count_ones(b.second.GetMask()); }); std::vector> final_table; - std::transform(table.cbegin(), table.cend(), final_table.begin(), [](auto const& e) { + std::transform(table.cbegin(), table.cend(), std::back_inserter(final_table), [](auto const& e) { return e.second; }); return final_table; From ef312986d0b128ac5dde296f99331bd594f5f529 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 6 Sep 2025 21:40:13 +0000 Subject: [PATCH 36/47] [dynarmic] add back encoding names (for print_info) Signed-off-by: lizzie --- .../src/dynarmic/frontend/A32/decoder/arm.h | 20 ++++++++++--- .../src/dynarmic/frontend/A32/decoder/asimd.h | 26 ++++++++++++----- .../dynarmic/frontend/A32/decoder/thumb16.h | 24 +++++++++++----- .../dynarmic/frontend/A32/decoder/thumb32.h | 24 +++++++++++----- .../src/dynarmic/frontend/A32/decoder/vfp.h | 24 +++++++++++----- .../A32/translate/translate_thumb.cpp | 2 +- .../src/dynarmic/frontend/A64/decoder/a64.h | 20 ++++++++++--- src/dynarmic/tests/decoder_tests.cpp | 28 ++++++------------- src/dynarmic/tests/print_info.cpp | 22 +++++++-------- 9 files changed, 122 insertions(+), 68 deletions(-) diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h index 0257c28ddb..c6f034ae21 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h @@ -30,13 +30,13 @@ template using ArmDecodeTable = std::array>, 0x1000>; namespace detail { -inline size_t ToFastLookupIndexArm(u32 instruction) { +inline size_t ToFastLookupIndexArm(u32 instruction) noexcept { return ((instruction >> 4) & 0x00F) | ((instruction >> 16) & 0xFF0); } } // namespace detail template -constexpr ArmDecodeTable GetArmDecodeTable() { +constexpr ArmDecodeTable GetArmDecodeTable() noexcept { std::vector> list = { #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./arm.inc" @@ -62,15 +62,27 @@ constexpr ArmDecodeTable GetArmDecodeTable() { } template -std::optional>> DecodeArm(u32 instruction) { +std::optional>> DecodeArm(u32 instruction) noexcept { alignas(64) static const auto table = GetArmDecodeTable(); const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - const auto& subtable = table[detail::ToFastLookupIndexArm(instruction)]; auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction); return iter != subtable.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameARM(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./arm.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h index 2cea7a14c1..57e1392871 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/asimd.h @@ -26,7 +26,7 @@ template using ASIMDMatcher = Decoder::Matcher; template -std::vector> GetASIMDDecodeTable() { +std::vector> GetASIMDDecodeTable() noexcept { std::vector>> table = { #define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, #include "./asimd.inc" @@ -68,13 +68,25 @@ std::vector> GetASIMDDecodeTable() { } template -std::optional>> DecodeASIMD(u32 instruction) { - static const auto table = GetASIMDDecodeTable(); - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); +std::optional>> DecodeASIMD(u32 instruction) noexcept { + alignas(64) static const auto table = GetASIMDDecodeTable(); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameASIMD(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(ASIMDMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./asimd.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h index 8073ee5d47..16b99ba5aa 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb16.h @@ -25,18 +25,28 @@ using Thumb16Matcher = Decoder::Matcher; template std::optional>> DecodeThumb16(u16 instruction) { - static const std::vector> table = { - + alignas(64) static const std::vector> table = { #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)), #include "./thumb16.inc" #undef INST - }; - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameThumb16(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb16Matcher, fn, name, Decoder::detail::StringToArray<16>(bitstring)) }, +#include "./thumb16.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h index 86a4d767a7..19418de67c 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/thumb32.h @@ -24,18 +24,28 @@ using Thumb32Matcher = Decoder::Matcher; template std::optional>> DecodeThumb32(u32 instruction) { - static const std::vector> table = { - + alignas(64) static const std::vector> table = { #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./thumb32.inc" #undef INST - }; - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameThumb32(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Thumb32Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./thumb32.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h index a969570547..a346304a9a 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h +++ b/src/dynarmic/src/dynarmic/frontend/A32/decoder/vfp.h @@ -26,8 +26,7 @@ using VFPMatcher = Decoder::Matcher; template std::optional>> DecodeVFP(u32 instruction) { using Table = std::vector>; - - static const struct Tables { + alignas(64) static const struct Tables { Table unconditional; Table conditional; } tables = []() { @@ -44,14 +43,25 @@ std::optional>> DecodeVFP(u32 instruc Table{it, list.end()}, }; }(); - const bool is_unconditional = (instruction & 0xF0000000) == 0xF0000000; const Table& table = is_unconditional ? tables.unconditional : tables.conditional; - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); + auto iter = std::find_if(table.begin(), table.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != table.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetNameVFP(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(VFPMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./vfp.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A32 diff --git a/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp b/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp index 5bb516ccfd..0381c984cc 100644 --- a/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A32/translate/translate_thumb.cpp @@ -97,7 +97,7 @@ u32 ConvertASIMDInstruction(u32 thumb_instruction) { return 0xF7F0A000; // UDF } -bool MaybeVFPOrASIMDInstruction(u32 thumb_instruction) { +inline bool MaybeVFPOrASIMDInstruction(u32 thumb_instruction) noexcept { return (thumb_instruction & 0xEC000000) == 0xEC000000 || (thumb_instruction & 0xFF100000) == 0xF9000000; } diff --git a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h index c7bc981ac6..7e6cdc3935 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h +++ b/src/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h @@ -71,12 +71,24 @@ constexpr DecodeTable GetDecodeTable() { template std::optional>> Decode(u32 instruction) { alignas(64) static const auto table = GetDecodeTable(); - const auto matches_instruction = [instruction](const auto& matcher) { - return matcher.Matches(instruction); - }; const auto& subtable = table[detail::ToFastLookupIndex(instruction)]; - auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction); + auto iter = std::find_if(subtable.begin(), subtable.end(), [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }); return iter != subtable.end() ? std::optional>>(*iter) : std::nullopt; } +template +std::optional GetName(u32 inst) noexcept { + std::vector>> list = { +#define INST(fn, name, bitstring) { name, DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)) }, +#include "./a64.inc" +#undef INST + }; + auto const iter = std::find_if(list.cbegin(), list.cend(), [inst](auto const& m) { + return m.second.Matches(inst); + }); + return iter != list.cend() ? std::optional{iter->first} : std::nullopt; +} + } // namespace Dynarmic::A64 diff --git a/src/dynarmic/tests/decoder_tests.cpp b/src/dynarmic/tests/decoder_tests.cpp index 777ea8b510..4ad9d90833 100644 --- a/src/dynarmic/tests/decoder_tests.cpp +++ b/src/dynarmic/tests/decoder_tests.cpp @@ -20,7 +20,6 @@ using namespace Dynarmic; -/* TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { const auto table = A32::GetASIMDDecodeTable(); @@ -37,22 +36,12 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { const auto is_decode_error = [&get_ir](const A32::ASIMDMatcher& matcher, u32 instruction) { const auto block = get_ir(matcher, instruction); - - for (const auto& ir_inst : block) { - if (ir_inst.GetOpcode() == IR::Opcode::A32ExceptionRaised) { - if (static_cast(ir_inst.GetArg(1).GetU64()) == A32::Exception::DecodeError) { - return true; - } - } - } - return false; + return std::find_if(block.cbegin(), block.cend(), [](auto const& e) { + return e.GetOpcode() == IR::Opcode::A32ExceptionRaised && A32::Exception(e.GetArg(1).GetU64()) == A32::Exception::DecodeError; + }) != block.cend(); }; for (auto iter = table.cbegin(); iter != table.cend(); ++iter) { - if (std::strncmp(iter->GetName(), "UNALLOCATED", 11) == 0) { - continue; - } - const u32 expect = iter->GetExpected(); const u32 mask = iter->GetMask(); u32 x = 0; @@ -60,15 +49,17 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { const u32 instruction = expect | x; const bool iserr = is_decode_error(*iter, instruction); - const auto alternative = std::find_if(table.cbegin(), iter, [instruction](const auto& m) { return m.Matches(instruction); }); + const auto alternative = std::find_if(table.cbegin(), iter, [instruction](const auto& m) { + return m.Matches(instruction); + }); const bool altiserr = is_decode_error(*alternative, instruction); INFO("Instruction: " << std::hex << std::setfill('0') << std::setw(8) << instruction); INFO("Expect: " << std::hex << std::setfill('0') << std::setw(8) << expect); INFO("Fill: " << std::hex << std::setfill('0') << std::setw(8) << x); - INFO("Name: " << iter->GetName()); + INFO("Name: " << *A32::GetNameASIMD(instruction)); INFO("iserr: " << iserr); - INFO("alternative: " << alternative->GetName()); + //INFO("alternative: " << alternative->GetName()); INFO("altiserr: " << altiserr); REQUIRE(((!iserr && alternative == iter) || (iserr && alternative != iter && !altiserr))); @@ -76,5 +67,4 @@ TEST_CASE("ASIMD Decoder: Ensure table order correctness", "[decode][a32][.]") { x = ((x | mask) + 1) & ~mask; } while (x != 0); } -} -*/ +} \ No newline at end of file diff --git a/src/dynarmic/tests/print_info.cpp b/src/dynarmic/tests/print_info.cpp index 0c89e780ad..8936f32bd3 100644 --- a/src/dynarmic/tests/print_info.cpp +++ b/src/dynarmic/tests/print_info.cpp @@ -39,21 +39,19 @@ using namespace Dynarmic; -const char* GetNameOfA32Instruction(u32 instruction) { - /*if (auto vfp_decoder = A32::DecodeVFP(instruction)) { - return vfp_decoder->get().GetName(); - } else if (auto asimd_decoder = A32::DecodeASIMD(instruction)) { - return asimd_decoder->get().GetName(); - } else if (auto decoder = A32::DecodeArm(instruction)) { - return decoder->get().GetName(); - }*/ +std::string_view GetNameOfA32Instruction(u32 instruction) { + if (auto const vfp_decoder = A32::DecodeVFP(instruction)) + return *A32::GetNameVFP(instruction); + else if (auto const asimd_decoder = A32::DecodeASIMD(instruction)) + return *A32::GetNameASIMD(instruction); + else if (auto const decoder = A32::DecodeArm(instruction)) + return *A32::GetNameARM(instruction); return ""; } -const char* GetNameOfA64Instruction(u32 instruction) { - /*if (auto decoder = A64::Decode(instruction)) { - return decoder->get().GetName(); - }*/ +std::string_view GetNameOfA64Instruction(u32 instruction) { + if (auto const decoder = A64::Decode(instruction)) + return *A64::GetName(instruction); return ""; } From 49c6096526da1571d49a8dfce9ddd8089fe1b366 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sun, 7 Sep 2025 14:08:17 +0000 Subject: [PATCH 37/47] [dynarmic] use ARCHITECTURE_ macros instead of MCL ones Signed-off-by: lizzie --- .../src/dynarmic/backend/exception_handler.h | 18 +-- .../backend/exception_handler_generic.cpp | 6 +- .../backend/exception_handler_macos.cpp | 12 +- .../backend/exception_handler_macos_mig.c | 4 +- .../backend/exception_handler_posix.cpp | 130 ++---------------- .../backend/exception_handler_windows.cpp | 4 +- src/dynarmic/src/dynarmic/common/context.h | 120 ++++++++++++++++ 7 files changed, 157 insertions(+), 137 deletions(-) create mode 100644 src/dynarmic/src/dynarmic/common/context.h diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler.h b/src/dynarmic/src/dynarmic/backend/exception_handler.h index 173949628c..cd274b111f 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler.h +++ b/src/dynarmic/src/dynarmic/backend/exception_handler.h @@ -15,15 +15,15 @@ #include #include "dynarmic/common/common_types.h" -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) namespace Dynarmic::Backend::X64 { class BlockOfCode; } // namespace Dynarmic::Backend::X64 -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) namespace oaknut { class CodeBlock; } // namespace oaknut -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) namespace Dynarmic::Backend::RV64 { class CodeBlock; } // namespace Dynarmic::Backend::RV64 @@ -33,16 +33,16 @@ class CodeBlock; namespace Dynarmic::Backend { -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) struct FakeCall { u64 call_rip; u64 ret_rip; }; -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) struct FakeCall { u64 call_pc; }; -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) struct FakeCall { }; #else @@ -54,11 +54,11 @@ public: ExceptionHandler(); ~ExceptionHandler(); -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) void Register(X64::BlockOfCode& code); -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) void Register(oaknut::CodeBlock& mem, std::size_t mem_size); -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) void Register(RV64::CodeBlock& mem, std::size_t mem_size); #else # error "Invalid architecture" diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_generic.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_generic.cpp index ad7df25ca6..985536d9f0 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_generic.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_generic.cpp @@ -13,15 +13,15 @@ struct ExceptionHandler::Impl final { ExceptionHandler::ExceptionHandler() = default; ExceptionHandler::~ExceptionHandler() = default; -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) void ExceptionHandler::Register(X64::BlockOfCode&) { // Do nothing } -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) void ExceptionHandler::Register(oaknut::CodeBlock&, std::size_t) { // Do nothing } -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) void ExceptionHandler::Register(RV64::CodeBlock&, std::size_t) { // Do nothing } diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp index 52bcf5972f..76e517f05b 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_macos.cpp @@ -25,7 +25,7 @@ #include "dynarmic/backend/exception_handler.h" -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) # include "dynarmic/backend/x64/block_of_code.h" # define mig_external extern "C" @@ -36,7 +36,7 @@ using dynarmic_thread_state_t = x86_thread_state64_t; -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) # include # define mig_external extern "C" @@ -133,7 +133,7 @@ void MachHandler::MessagePump() { } } -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) kern_return_t MachHandler::HandleRequest(x86_thread_state64_t* ts) { std::lock_guard guard(code_block_infos_mutex); @@ -151,7 +151,7 @@ kern_return_t MachHandler::HandleRequest(x86_thread_state64_t* ts) { return KERN_SUCCESS; } -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) kern_return_t MachHandler::HandleRequest(arm_thread_state64_t* ts) { std::lock_guard guard(code_block_infos_mutex); @@ -269,13 +269,13 @@ private: ExceptionHandler::ExceptionHandler() = default; ExceptionHandler::~ExceptionHandler() = default; -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) void ExceptionHandler::Register(X64::BlockOfCode& code) { const u64 code_begin = mcl::bit_cast(code.getCode()); const u64 code_end = code_begin + code.GetTotalCodeSize(); impl = std::make_unique(code_begin, code_end); } -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) void ExceptionHandler::Register(oaknut::CodeBlock& mem, std::size_t size) { const u64 code_begin = mcl::bit_cast(mem.ptr()); const u64 code_end = code_begin + size; diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_macos_mig.c b/src/dynarmic/src/dynarmic/backend/exception_handler_macos_mig.c index 762a80ca42..25678ab115 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_macos_mig.c +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_macos_mig.c @@ -5,9 +5,9 @@ #include -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) # include "dynarmic/backend/x64/mig/mach_exc_server.c" -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) # include "dynarmic/backend/arm64/mig/mach_exc_server.c" #else # error "Invalid architecture" diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp index 7695df57d2..ef28d99279 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp @@ -8,19 +8,6 @@ #include "dynarmic/backend/exception_handler.h" -#ifdef __APPLE__ -# include -# include -#else -# include -# ifndef __OpenBSD__ -# include -# endif -# ifdef __sun__ -# include -# endif -#endif - #include #include #include @@ -29,16 +16,17 @@ #include #include "dynarmic/common/assert.h" +#include "dynarmic/common/context.h" #include #include "dynarmic/common/common_types.h" -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) # include "dynarmic/backend/x64/block_of_code.h" -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) # include # include "dynarmic/backend/arm64/abi.h" -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) # include "dynarmic/backend/riscv64/code_block.h" #else # error "Invalid architecture" @@ -131,139 +119,51 @@ SigHandler::~SigHandler() { void SigHandler::AddCodeBlock(CodeBlockInfo cbi) { std::lock_guard guard(code_block_infos_mutex); - if (auto iter = FindCodeBlockInfo(cbi.code_begin); iter != code_block_infos.end()) { + if (auto const iter = FindCodeBlockInfo(cbi.code_begin); iter != code_block_infos.end()) code_block_infos.erase(iter); - } code_block_infos.push_back(cbi); } void SigHandler::RemoveCodeBlock(u64 host_pc) { std::lock_guard guard(code_block_infos_mutex); const auto iter = FindCodeBlockInfo(host_pc); - if (iter == code_block_infos.end()) { - return; - } - code_block_infos.erase(iter); + if (iter != code_block_infos.end()) + code_block_infos.erase(iter); } void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { ASSERT(sig == SIGSEGV || sig == SIGBUS); - -#ifndef MCL_ARCHITECTURE_RISCV - ucontext_t* ucontext = reinterpret_cast(raw_context); -#ifndef __OpenBSD__ - auto& mctx = ucontext->uc_mcontext; -#endif -#endif - -#if defined(MCL_ARCHITECTURE_X86_64) - -# if defined(__APPLE__) -# define CTX_RIP (mctx->__ss.__rip) -# define CTX_RSP (mctx->__ss.__rsp) -# elif defined(__linux__) -# define CTX_RIP (mctx.gregs[REG_RIP]) -# define CTX_RSP (mctx.gregs[REG_RSP]) -# elif defined(__FreeBSD__) -# define CTX_RIP (mctx.mc_rip) -# define CTX_RSP (mctx.mc_rsp) -# elif defined(__NetBSD__) -# define CTX_RIP (mctx.__gregs[_REG_RIP]) -# define CTX_RSP (mctx.__gregs[_REG_RSP]) -# elif defined(__OpenBSD__) -# define CTX_RIP (ucontext->sc_rip) -# define CTX_RSP (ucontext->sc_rsp) -# elif defined(__sun__) -# define CTX_RIP (mctx.gregs[REG_RIP]) -# define CTX_RSP (mctx.gregs[REG_RSP]) -# else -# error "Unknown platform" -# endif - + CTX_DECLARE(raw_context); +#if defined(ARCHITECTURE_x86_64) { std::lock_guard guard(sig_handler->code_block_infos_mutex); const auto iter = sig_handler->FindCodeBlockInfo(CTX_RIP); if (iter != sig_handler->code_block_infos.end()) { FakeCall fc = iter->cb(CTX_RIP); - CTX_RSP -= sizeof(u64); *mcl::bit_cast(CTX_RSP) = fc.ret_rip; CTX_RIP = fc.call_rip; - return; } } - fmt::print(stderr, "Unhandled {} at rip {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_RIP); - -#elif defined(MCL_ARCHITECTURE_ARM64) - -# if defined(__APPLE__) -# define CTX_PC (mctx->__ss.__pc) -# define CTX_SP (mctx->__ss.__sp) -# define CTX_LR (mctx->__ss.__lr) -# define CTX_X(i) (mctx->__ss.__x[i]) -# define CTX_Q(i) (mctx->__ns.__v[i]) -# elif defined(__linux__) -# define CTX_PC (mctx.pc) -# define CTX_SP (mctx.sp) -# define CTX_LR (mctx.regs[30]) -# define CTX_X(i) (mctx.regs[i]) -# define CTX_Q(i) (fpctx->vregs[i]) - [[maybe_unused]] const auto fpctx = [&mctx] { - _aarch64_ctx* header = (_aarch64_ctx*)&mctx.__reserved; - while (header->magic != FPSIMD_MAGIC) { - ASSERT(header->magic && header->size); - header = (_aarch64_ctx*)((char*)header + header->size); - } - return (fpsimd_context*)header; - }(); -# elif defined(__FreeBSD__) -# define CTX_PC (mctx.mc_gpregs.gp_elr) -# define CTX_SP (mctx.mc_gpregs.gp_sp) -# define CTX_LR (mctx.mc_gpregs.gp_lr) -# define CTX_X(i) (mctx.mc_gpregs.gp_x[i]) -# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i]) -# elif defined(__NetBSD__) -# define CTX_PC (mctx.mc_gpregs.gp_elr) -# define CTX_SP (mctx.mc_gpregs.gp_sp) -# define CTX_LR (mctx.mc_gpregs.gp_lr) -# define CTX_X(i) (mctx.mc_gpregs.gp_x[i]) -# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i]) -# elif defined(__OpenBSD__) -# define CTX_PC (ucontext->sc_elr) -# define CTX_SP (ucontext->sc_sp) -# define CTX_LR (ucontext->sc_lr) -# define CTX_X(i) (ucontext->sc_x[i]) -# define CTX_Q(i) (ucontext->sc_q[i]) -# else -# error "Unknown platform" -# endif - +#elif defined(ARCHITECTURE_arm64) + CTX_DECLARE(raw_context); { std::lock_guard guard(sig_handler->code_block_infos_mutex); - const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); if (iter != sig_handler->code_block_infos.end()) { FakeCall fc = iter->cb(CTX_PC); - CTX_PC = fc.call_pc; - return; } } - fmt::print(stderr, "Unhandled {} at pc {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_PC); - -#elif defined(MCL_ARCHITECTURE_RISCV) - +#elif defined(ARCHITECTURE_riscv64) ASSERT_FALSE("Unimplemented"); - #else - # error "Invalid architecture" - #endif struct sigaction* retry_sa = sig == SIGSEGV ? &sig_handler->old_sa_segv : &sig_handler->old_sa_bus; @@ -309,19 +209,19 @@ private: ExceptionHandler::ExceptionHandler() = default; ExceptionHandler::~ExceptionHandler() = default; -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) void ExceptionHandler::Register(X64::BlockOfCode& code) { const u64 code_begin = mcl::bit_cast(code.getCode()); const u64 code_end = code_begin + code.GetTotalCodeSize(); impl = std::make_unique(code_begin, code_end); } -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) void ExceptionHandler::Register(oaknut::CodeBlock& mem, std::size_t size) { const u64 code_begin = mcl::bit_cast(mem.ptr()); const u64 code_end = code_begin + size; impl = std::make_unique(code_begin, code_end); } -#elif defined(MCL_ARCHITECTURE_RISCV) +#elif defined(ARCHITECTURE_riscv64) void ExceptionHandler::Register(RV64::CodeBlock& mem, std::size_t size) { const u64 code_begin = mcl::bit_cast(mem.ptr()); const u64 code_end = code_begin + size; diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_windows.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_windows.cpp index 719c007594..9e76cae912 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_windows.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_windows.cpp @@ -5,9 +5,9 @@ #include -#if defined(MCL_ARCHITECTURE_X86_64) +#if defined(ARCHITECTURE_x86_64) # include "dynarmic/backend/x64/exception_handler_windows.cpp" -#elif defined(MCL_ARCHITECTURE_ARM64) +#elif defined(ARCHITECTURE_arm64) # include "dynarmic/backend/exception_handler_generic.cpp" #else # error "Invalid architecture" diff --git a/src/dynarmic/src/dynarmic/common/context.h b/src/dynarmic/src/dynarmic/common/context.h new file mode 100644 index 0000000000..0eb128449c --- /dev/null +++ b/src/dynarmic/src/dynarmic/common/context.h @@ -0,0 +1,120 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#ifdef __APPLE__ +# include +# include +#else +# include +# ifndef __OpenBSD__ +# include +# endif +# ifdef __sun__ +# include +# endif +# ifdef __linux__ +# include +# endif +#endif + +#ifdef ARCHITECTURE_x86_64 +# ifdef __OpenBSD__ +# define CTX_DECLARE(raw_context) ucontext_t* ucontext = reinterpret_cast(raw_context); +# else +# define CTX_DECLARE(raw_context) \ + ucontext_t* ucontext = reinterpret_cast(raw_context); \ + [[maybe_unused]] auto& mctx = ucontext->uc_mcontext; +# endif +#elif defined(ARCHITECTURE_arm64) +# ifdef __OpenBSD__ +# define CTX_DECLARE(raw_context) ucontext_t* ucontext = reinterpret_cast(raw_context); +# else +# define CTX_DECLARE(raw_context) \ + ucontext_t* ucontext = reinterpret_cast(raw_context); \ + [[maybe_unused]] auto& mctx = ucontext->uc_mcontext; \ + [[maybe_unused]] const auto fpctx = GetFloatingPointState(mctx); +# endif +#endif + +#if defined(ARCHITECTURE_x86_64) +# if defined(__APPLE__) +# define CTX_RIP (mctx->__ss.__rip) +# define CTX_RSP (mctx->__ss.__rsp) +# elif defined(__linux__) +# define CTX_RIP (mctx.gregs[REG_RIP]) +# define CTX_RSP (mctx.gregs[REG_RSP]) +# elif defined(__FreeBSD__) +# define CTX_RIP (mctx.mc_rip) +# define CTX_RSP (mctx.mc_rsp) +# elif defined(__NetBSD__) +# define CTX_RIP (mctx.__gregs[_REG_RIP]) +# define CTX_RSP (mctx.__gregs[_REG_RSP]) +# elif defined(__OpenBSD__) +# define CTX_RIP (ucontext->sc_rip) +# define CTX_RSP (ucontext->sc_rsp) +# elif defined(__sun__) +# define CTX_RIP (mctx.gregs[REG_RIP]) +# define CTX_RSP (mctx.gregs[REG_RSP]) +# else +# error "Unknown platform" +# endif +#elif defined(ARCHITECTURE_arm64) +# if defined(__APPLE__) +# define CTX_PC (mctx->__ss.__pc) +# define CTX_SP (mctx->__ss.__sp) +# define CTX_LR (mctx->__ss.__lr) +# define CTX_PSTATE (mctx->__ss.__cpsr) +# define CTX_X(i) (mctx->__ss.__x[i]) +# define CTX_Q(i) (mctx->__ns.__v[i]) +# define CTX_FPSR (mctx->__ns.__fpsr) +# define CTX_FPCR (mctx->__ns.__fpcr) +# elif defined(__linux__) +# define CTX_PC (mctx.pc) +# define CTX_SP (mctx.sp) +# define CTX_LR (mctx.regs[30]) +# define CTX_PSTATE (mctx.pstate) +# define CTX_X(i) (mctx.regs[i]) +# define CTX_Q(i) (fpctx->vregs[i]) +# define CTX_FPSR (fpctx->fpsr) +# define CTX_FPCR (fpctx->fpcr) +# elif defined(__FreeBSD__) +# define CTX_PC (mctx.mc_gpregs.gp_elr) +# define CTX_SP (mctx.mc_gpregs.gp_sp) +# define CTX_LR (mctx.mc_gpregs.gp_lr) +# define CTX_X(i) (mctx.mc_gpregs.gp_x[i]) +# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i]) +# elif defined(__NetBSD__) +# define CTX_PC (mctx.mc_gpregs.gp_elr) +# define CTX_SP (mctx.mc_gpregs.gp_sp) +# define CTX_LR (mctx.mc_gpregs.gp_lr) +# define CTX_X(i) (mctx.mc_gpregs.gp_x[i]) +# define CTX_Q(i) (mctx.mc_fpregs.fp_q[i]) +# elif defined(__OpenBSD__) +# define CTX_PC (ucontext->sc_elr) +# define CTX_SP (ucontext->sc_sp) +# define CTX_LR (ucontext->sc_lr) +# define CTX_X(i) (ucontext->sc_x[i]) +# define CTX_Q(i) (ucontext->sc_q[i]) +# else +# error "Unknown platform" +# endif +#else +# error "unimplemented" +#endif + +#ifdef ARCHITECTURE_arm64 +#ifdef __APPLE__ +inline _STRUCT_ARM_NEON_STATE64* GetFloatingPointState(mcontext_t& host_ctx) { + return &(host_ctx->__ns); +} +#elif defined(__linux__) +inline fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) { + _aarch64_ctx* header = reinterpret_cast<_aarch64_ctx*>(&host_ctx.__reserved); + while (header->magic != FPSIMD_MAGIC) + header = reinterpret_cast<_aarch64_ctx*>(reinterpret_cast(header) + header->size); + return reinterpret_cast(header); +} +#endif +#endif From c2c19e8bcb67bedceb4afb3b20c7efdf2652d252 Mon Sep 17 00:00:00 2001 From: lizzie Date: Sun, 7 Sep 2025 17:03:36 +0000 Subject: [PATCH 38/47] [dynarmic] fix android Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp index ef28d99279..b22befaff9 100644 --- a/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp +++ b/src/dynarmic/src/dynarmic/backend/exception_handler_posix.cpp @@ -149,7 +149,6 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) { } fmt::print(stderr, "Unhandled {} at rip {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_RIP); #elif defined(ARCHITECTURE_arm64) - CTX_DECLARE(raw_context); { std::lock_guard guard(sig_handler->code_block_infos_mutex); const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); From 5a44dd9bafdb6e475f1a33458279ae567c9a31f7 Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 8 Sep 2025 02:47:04 +0000 Subject: [PATCH 39/47] [dynarmic, cmake][ remove unusd frontends var Signed-off-by: lizzie --- src/dynarmic/CMakeLists.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/dynarmic/CMakeLists.txt b/src/dynarmic/CMakeLists.txt index 38457deb50..5fbb03b80e 100644 --- a/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/CMakeLists.txt @@ -39,9 +39,6 @@ option(DYNARMIC_INSTALL "Install dynarmic headers and CMake files" OFF) option(DYNARMIC_USE_BUNDLED_EXTERNALS "Use all bundled externals (useful when e.g. cross-compiling)" OFF) option(DYNARMIC_WARNINGS_AS_ERRORS "Warnings as errors" ${MASTER_PROJECT}) option(DYNARMIC_ENABLE_LTO "Enable LTO" OFF) -if (NOT DEFINED DYNARMIC_FRONTENDS) - set(DYNARMIC_FRONTENDS "A32;A64" CACHE STRING "Selects which frontends to enable") -endif() # Default to a Release build if (NOT CMAKE_BUILD_TYPE) From ca10d3b95daa268474f6cae1c9dfc7b96396dc8d Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 8 Sep 2025 03:36:35 +0000 Subject: [PATCH 40/47] [dynarmic, docs] fastmem docs Signed-off-by: lizzie --- src/dynarmic/docs/FastMemory.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/dynarmic/docs/FastMemory.md b/src/dynarmic/docs/FastMemory.md index 2a7c1a2583..c4f57996ba 100644 --- a/src/dynarmic/docs/FastMemory.md +++ b/src/dynarmic/docs/FastMemory.md @@ -1,5 +1,19 @@ # Fast memory (Fastmem) +The main way of accessing memory in JITed programs is via an invoked function, say "Read()" and "Write()". On our translator, such functions usually take a sizable amounts of code space (push + call + pop). Trash the i-cache (due to an indirect call) and overall make code emission more bloated. + +The solution? Delegate invalid accesses to a dedicated arena, similar to a swap. The main idea behind such mechanism is to allow the OS to transmit page faults from invalid accesses into the JIT translator directly, bypassing address space calls, while this sacrifices i-cache coherency, it allows for smaller code-size and "faster" throguhput. + +Many kernels however, do not support fast signal dispatching (Solaris, OpenBSD, FreeBSD). Only Linux and Windows support relatively "fast" signal dispatching. Hence this feature is better suited for them only. + ![Host to guest translation](./HostToGuest.svg) ![Fastmem translation](./Fastmem.svg) + +In x86_64 for example, when a page fault occurs, the CPU will transmit via control registers and the stack (see `IRETQ`) the appropriate arguments for a page fault handler, the OS then will transform that into something that can be sent into userspace. + +Most modern OSes implement kernel-page-table-isolation, which means a set of system calls will invoke a context switch (not often used syscalls), whereas others are handled by the same process address space (the smaller kernel portion, often used syscalls) without needing a context switch. This effect can be negated on systems with PCID (up to 4096 unique IDs). + +Signal dispatching takes a performance hit from reloading `%cr3` - but Linux does something more clever to avoid reloads: VDSO will take care of the entire thing in the same address space. Making dispatching as costly as an indirect call - without the hazards of increased code size. + +The main downside from this is the constant i-cache trashing and pipeline hazards introduced by the VDSO signal handlers. However on most benchmarks fastmem does perform faster than without (Linux only). This also abuses the fact of continous address space emulation by using an arena - which can then be potentially transparently mapped into a hugepage, reducing TLB walk times. From 60785c5b466059beadb242d22b11a5b4d38872c6 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 03:28:29 +0000 Subject: [PATCH 41/47] [dynarmic] remove use of mcl reverse iterator Signed-off-by: lizzie --- .../src/dynarmic/backend/x64/a64_emit_x64.cpp | 2 +- src/dynarmic/src/dynarmic/backend/x64/abi.cpp | 8 ++- src/dynarmic/src/dynarmic/ir/opt_passes.cpp | 65 ++++++++----------- 3 files changed, 33 insertions(+), 42 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp index 1e673338a8..1691bbb3b7 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp @@ -764,7 +764,7 @@ void A64EmitX64::EmitPatchMovRcx(CodePtr target_code_ptr) { target_code_ptr = code.GetReturnFromRunCodeAddress(); } const CodePtr patch_location = code.getCurr(); - code.mov(code.rcx, reinterpret_cast(target_code_ptr)); + code.mov(code.rcx, u64(target_code_ptr)); code.EnsurePatchLocationSize(patch_location, 10); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/abi.cpp b/src/dynarmic/src/dynarmic/backend/x64/abi.cpp index a9bbab3d10..299bf1d1d6 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/abi.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/abi.cpp @@ -10,7 +10,6 @@ #include -#include #include "dynarmic/common/common_types.h" #include @@ -76,7 +75,8 @@ void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size, const FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size); size_t xmm_offset = frame_info.xmm_offset + (num_xmms * XMM_SIZE); - for (auto const xmm : mcl::iterator::reverse(regs)) { + for (auto it = regs.rbegin(); it != regs.rend(); ++it) { + auto const xmm = *it; if (HostLocIsXMM(xmm)) { xmm_offset -= XMM_SIZE; if (code.HasHostFeature(HostFeature::AVX)) { @@ -88,9 +88,11 @@ void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size, } if (frame_info.stack_subtraction != 0) code.add(rsp, u32(frame_info.stack_subtraction)); - for (auto const gpr : mcl::iterator::reverse(regs)) + for (auto it = regs.rbegin(); it != regs.rend(); ++it) { + auto const gpr = *it; if (HostLocIsGPR(gpr)) code.pop(HostLocToReg64(gpr)); + } } void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, const std::size_t frame_size) { diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp index 383b915839..88fc34fa10 100644 --- a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp @@ -28,7 +28,6 @@ #include "dynarmic/ir/type.h" #include "mcl/bit/swap.hpp" #include "mcl/bit/rotate.hpp" -#include "mcl/iterator/reverse.hpp" namespace Dynarmic::Optimization { @@ -36,50 +35,42 @@ static void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { for (auto& inst : block) { switch (inst.GetOpcode()) { case IR::Opcode::A32ReadMemory8: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u8 value_from_memory = cb->MemoryRead8(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); + if (inst.AreAllArgsImmediates()) { + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u8 value_from_memory = cb->MemoryRead8(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } } break; } case IR::Opcode::A32ReadMemory16: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u16 value_from_memory = cb->MemoryRead16(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); + if (inst.AreAllArgsImmediates()) { + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u16 value_from_memory = cb->MemoryRead16(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } } break; } case IR::Opcode::A32ReadMemory32: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u32 value_from_memory = cb->MemoryRead32(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); + if (inst.AreAllArgsImmediates()) { + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u32 value_from_memory = cb->MemoryRead32(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } } break; } case IR::Opcode::A32ReadMemory64: { - if (!inst.AreAllArgsImmediates()) { - break; - } - - const u32 vaddr = inst.GetArg(1).GetU32(); - if (cb->IsReadOnlyMemory(vaddr)) { - const u64 value_from_memory = cb->MemoryRead64(vaddr); - inst.ReplaceUsesWith(IR::Value{value_from_memory}); + if (inst.AreAllArgsImmediates()) { + const u32 vaddr = inst.GetArg(1).GetU32(); + if (cb->IsReadOnlyMemory(vaddr)) { + const u64 value_from_memory = cb->MemoryRead64(vaddr); + inst.ReplaceUsesWith(IR::Value{value_from_memory}); + } } break; } @@ -1205,11 +1196,9 @@ static void ConstantPropagation(IR::Block& block) { static void DeadCodeElimination(IR::Block& block) { // We iterate over the instructions in reverse order. // This is because removing an instruction reduces the number of uses for earlier instructions. - for (auto& inst : mcl::iterator::reverse(block)) { - if (!inst.HasUses() && !MayHaveSideEffects(inst.GetOpcode())) { - inst.Invalidate(); - } - } + for (auto it = block.rbegin(); it != block.rend(); ++it) + if (!it->HasUses() && !MayHaveSideEffects(it->GetOpcode())) + it->Invalidate(); } static void IdentityRemovalPass(IR::Block& block) { From ac439e5c9546888767c475abdd8f010247c8e796 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 03:28:41 +0000 Subject: [PATCH 42/47] [dynarmic] checked code alignment Signed-off-by: lizzie --- .../src/dynarmic/backend/x64/a64_interface.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp index c65b582982..33acc46dfa 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_interface.cpp @@ -80,16 +80,16 @@ public: }; // TODO: Check code alignment - - const CodePtr current_code_ptr = [this] { + const CodePtr aligned_code_ptr = CodePtr((uintptr_t(GetCurrentBlock()) + 15) & ~uintptr_t(15)); + const CodePtr current_code_ptr = [this, aligned_code_ptr] { // RSB optimization const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A64JitState::RSBPtrMask; if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) { jit_state.rsb_ptr = new_rsb_ptr; - return reinterpret_cast(jit_state.rsb_codeptrs[new_rsb_ptr]); + return CodePtr(jit_state.rsb_codeptrs[new_rsb_ptr]); } - - return GetCurrentBlock(); + return aligned_code_ptr; + //return GetCurrentBlock(); }(); const HaltReason hr = block_of_code.RunCode(&jit_state, current_code_ptr); From 036c946e0fb581a0ab70e08d26801d01615ca661 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 03:31:12 +0000 Subject: [PATCH 43/47] [dynarmic] fix hardcoded AVX512 registers, use xmm0 instead of xmm16 to align with spec Signed-off-by: lizzie --- .../backend/x64/emit_x64_floating_point.cpp | 7 ++----- .../src/dynarmic/backend/x64/emit_x64_vector.cpp | 16 ++++++++-------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp index 47e51acb03..c7d1f8aa1c 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp @@ -92,13 +92,10 @@ void ForceDenormalsToZero(BlockOfCode& code, std::initializer_list t FpFixup::Norm_Src, FpFixup::Norm_Src, FpFixup::Norm_Src); - - const Xbyak::Xmm tmp = xmm16; + const Xbyak::Xmm tmp = xmm0; FCODE(vmovap)(tmp, code.BConst(xword, denormal_to_zero)); - - for (const Xbyak::Xmm& xmm : to_daz) { + for (const Xbyak::Xmm& xmm : to_daz) FCODE(vfixupimms)(xmm, xmm, tmp, u8(0)); - } return; } diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp index 99000c2a57..c6b0e3b864 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -609,8 +609,8 @@ void EmitX64::EmitVectorArithmeticVShift16(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm right_shift = xmm16; - const Xbyak::Xmm tmp = xmm17; + const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.vpxord(right_shift, right_shift, right_shift); @@ -674,8 +674,8 @@ void EmitX64::EmitVectorArithmeticVShift64(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm right_shift = xmm16; - const Xbyak::Xmm tmp = xmm17; + const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); code.vmovdqa32(tmp, code.Const(xword, 0x00000000000000FF, 0x00000000000000FF)); code.vpxorq(right_shift, right_shift, right_shift); @@ -1955,8 +1955,8 @@ void EmitX64::EmitVectorLogicalVShift16(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm right_shift = xmm16; - const Xbyak::Xmm tmp = xmm17; + const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.vpxord(right_shift, right_shift, right_shift); @@ -2737,7 +2737,7 @@ void EmitX64::EmitVectorPairedAddSignedWiden32(EmitContext& ctx, IR::Inst* inst) const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - const Xbyak::Xmm c = xmm16; + const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); code.vpsraq(c, a, 32); code.vpsllq(a, a, 32); code.vpsraq(a, a, 32); @@ -5461,7 +5461,7 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW)) { const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(args[2]); const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm masked = xmm16; + const Xbyak::Xmm masked = ctx.reg_alloc.ScratchXmm(); code.vpandd(masked, indicies, code.Const(xword_b, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); From 32b61e3ecf8eb68f33d48c6f0884719dd2f078dd Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 10:33:12 +0000 Subject: [PATCH 44/47] [dynarmic] unconditional branches always take Signed-off-by: lizzie --- src/dynarmic/docs/Design.md | 49 +++++++++++++------ src/dynarmic/docs/RegisterAllocator.md | 3 ++ .../dynarmic/backend/x64/block_of_code.cpp | 4 +- .../src/dynarmic/backend/x64/reg_alloc.cpp | 7 +-- .../A64/translate/impl/a64_branch.cpp | 9 ++-- 5 files changed, 49 insertions(+), 23 deletions(-) diff --git a/src/dynarmic/docs/Design.md b/src/dynarmic/docs/Design.md index 3c0deb5972..ffa8ccecdb 100644 --- a/src/dynarmic/docs/Design.md +++ b/src/dynarmic/docs/Design.md @@ -273,52 +273,73 @@ Exclusive OR (i.e.: XOR) ### Callback: {Read,Write}Memory{8,16,32,64} - ReadMemory8( vaddr) - ReadMemory16( vaddr) - ReadMemory32( vaddr) - ReadMemory64( vaddr) - WriteMemory8( vaddr, value_to_store) - WriteMemory16( vaddr, value_to_store) - WriteMemory32( vaddr, value_to_store) - WriteMemory64( vaddr, value_to_store) +```c++ + ReadMemory8( vaddr) + ReadMemory16( vaddr) + ReadMemory32( vaddr) + ReadMemory64( vaddr) + WriteMemory8( vaddr, value_to_store) + WriteMemory16( vaddr, value_to_store) + WriteMemory32( vaddr, value_to_store) + WriteMemory64( vaddr, value_to_store) +``` Memory access. ### Terminal: Interpret - SetTerm(IR::Term::Interpret{next}) +```c++ +SetTerm(IR::Term::Interpret{next}) +``` This terminal instruction calls the interpreter, starting at `next`. The interpreter must interpret exactly one instruction. ### Terminal: ReturnToDispatch - SetTerm(IR::Term::ReturnToDispatch{}) +```c++ +SetTerm(IR::Term::ReturnToDispatch{}) +``` This terminal instruction returns control to the dispatcher. The dispatcher will use the value in R15 to determine what comes next. ### Terminal: LinkBlock - SetTerm(IR::Term::LinkBlock{next}) +```c++ +SetTerm(IR::Term::LinkBlock{next}) +``` This terminal instruction jumps to the basic block described by `next` if we have enough cycles remaining. If we do not have enough cycles remaining, we return to the dispatcher, which will return control to the host. +### Terminal: LinkBlockFast + +```c++ +SetTerm(IR::Term::LinkBlockFast{next}) +``` + +This terminal instruction jumps to the basic block described by `next` unconditionally. +This promises guarantees that must be held at runtime - i.e that the program wont hang, + ### Terminal: PopRSBHint - SetTerm(IR::Term::PopRSBHint{}) +```c++ +SetTerm(IR::Term::PopRSBHint{}) +``` This terminal instruction checks the top of the Return Stack Buffer against R15. If RSB lookup fails, control is returned to the dispatcher. This is an optimization for faster function calls. A backend that doesn't support this optimization or doesn't have a RSB may choose to implement this exactly as -ReturnToDispatch. +`ReturnToDispatch`. ### Terminal: If - SetTerm(IR::Term::If{cond, term_then, term_else}) +```c++ +SetTerm(IR::Term::If{cond, term_then, term_else}) +``` This terminal instruction conditionally executes one terminal or another depending on the run-time state of the ARM flags. diff --git a/src/dynarmic/docs/RegisterAllocator.md b/src/dynarmic/docs/RegisterAllocator.md index 5a7210c791..f5bbaaf168 100644 --- a/src/dynarmic/docs/RegisterAllocator.md +++ b/src/dynarmic/docs/RegisterAllocator.md @@ -19,6 +19,9 @@ The member functions on `RegAlloc` are just a combination of the above concepts. The following registers are reserved for internal use and should NOT participate in register allocation: - `%xmm0`, `%xmm1`, `%xmm2`: Used as scratch in exclusive memory access. - `%rsp`: Stack pointer. +- `%r15`: JIT pointer +- `%r14`: Page table pointer. +- `%r13`: Fastmem pointer. The layout convenes `%r15` as the JIT state pointer - while it may be tempting to turn it into a synthetic pointer, keeping an entire register (out of 12 available) is preferable over inlining a directly computed immediate. diff --git a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp index d5d5f089ff..31b10ec6d5 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp @@ -364,8 +364,7 @@ void BlockOfCode::GenRunCode(std::function rcp) { cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0); jne(return_to_caller_mxcsr_already_exited, T_NEAR); - lock(); - or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast(HaltReason::Step)); + lock(); or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast(HaltReason::Step)); SwitchMxcsrOnEntry(); jmp(ABI_PARAM2); @@ -415,7 +414,6 @@ void BlockOfCode::GenRunCode(std::function rcp) { } xor_(eax, eax); - lock(); xchg(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], eax); ABI_PopCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout)); diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index 919e75b77b..502a093d08 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -9,6 +9,7 @@ #include "dynarmic/backend/x64/reg_alloc.h" #include +#include #include #include @@ -152,19 +153,19 @@ bool Argument::GetImmediateU1() const noexcept { u8 Argument::GetImmediateU8() const noexcept { const u64 imm = value.GetImmediateAsU64(); - ASSERT(imm < 0x100); + ASSERT(imm <= u64(std::numeric_limits::max())); return u8(imm); } u16 Argument::GetImmediateU16() const noexcept { const u64 imm = value.GetImmediateAsU64(); - ASSERT(imm < 0x10000); + ASSERT(imm <= u64(std::numeric_limits::max())); return u16(imm); } u32 Argument::GetImmediateU32() const noexcept { const u64 imm = value.GetImmediateAsU64(); - ASSERT(imm < 0x100000000); + ASSERT(imm <= u64(std::numeric_limits::max())); return u32(imm); } diff --git a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp index faf0686231..1bb0be823a 100644 --- a/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp +++ b/src/dynarmic/src/dynarmic/frontend/A64/translate/impl/a64_branch.cpp @@ -20,9 +20,12 @@ bool TranslatorVisitor::B_cond(Imm<19> imm19, Cond cond) { bool TranslatorVisitor::B_uncond(Imm<26> imm26) { const s64 offset = concatenate(imm26, Imm<2>{0}).SignExtend(); const u64 target = ir.PC() + offset; - - //ir.SetTerm(IR::Term::LinkBlockFast{ir.current_location->SetPC(target)}); - ir.SetTerm(IR::Term::LinkBlock{ir.current_location->SetPC(target)}); + // Pattern to halt execution (B .) + if (target == ir.PC()) { + ir.SetTerm(IR::Term::LinkBlock{ir.current_location->SetPC(target)}); + return false; + } + ir.SetTerm(IR::Term::LinkBlockFast{ir.current_location->SetPC(target)}); return false; } From cf5e8b7b17c3a7bfa5d03dcd76c998cb3c31406c Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 11:04:34 +0000 Subject: [PATCH 45/47] [dynarmic] Implement constant folding for CountLeadingZeros, add readXX constnat folding for A64 Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/ir/opt_passes.cpp | 48 +++++++++++++++------ 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp index 88fc34fa10..750a8a496b 100644 --- a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp @@ -31,10 +31,11 @@ namespace Dynarmic::Optimization { -static void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { +static void ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { for (auto& inst : block) { switch (inst.GetOpcode()) { - case IR::Opcode::A32ReadMemory8: { + case IR::Opcode::A32ReadMemory8: + case IR::Opcode::A64ReadMemory8: { if (inst.AreAllArgsImmediates()) { const u32 vaddr = inst.GetArg(1).GetU32(); if (cb->IsReadOnlyMemory(vaddr)) { @@ -44,7 +45,8 @@ static void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { } break; } - case IR::Opcode::A32ReadMemory16: { + case IR::Opcode::A32ReadMemory16: + case IR::Opcode::A64ReadMemory16: { if (inst.AreAllArgsImmediates()) { const u32 vaddr = inst.GetArg(1).GetU32(); if (cb->IsReadOnlyMemory(vaddr)) { @@ -54,7 +56,8 @@ static void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { } break; } - case IR::Opcode::A32ReadMemory32: { + case IR::Opcode::A32ReadMemory32: + case IR::Opcode::A64ReadMemory32: { if (inst.AreAllArgsImmediates()) { const u32 vaddr = inst.GetArg(1).GetU32(); if (cb->IsReadOnlyMemory(vaddr)) { @@ -64,7 +67,8 @@ static void A32ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) { } break; } - case IR::Opcode::A32ReadMemory64: { + case IR::Opcode::A32ReadMemory64: + case IR::Opcode::A64ReadMemory64: { if (inst.AreAllArgsImmediates()) { const u32 vaddr = inst.GetArg(1).GetU32(); if (cb->IsReadOnlyMemory(vaddr)) { @@ -667,14 +671,14 @@ using Op = Dynarmic::IR::Opcode; // bit size all over the place within folding functions. static void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) { if (is_32_bit) { - inst.ReplaceUsesWith(IR::Value{static_cast(value)}); + inst.ReplaceUsesWith(IR::Value{u32(value)}); } else { inst.ReplaceUsesWith(IR::Value{value}); } } static IR::Value Value(bool is_32_bit, u64 value) { - return is_32_bit ? IR::Value{static_cast(value)} : IR::Value{value}; + return is_32_bit ? IR::Value{u32(value)} : IR::Value{value}; } template @@ -800,6 +804,23 @@ static void FoldByteReverse(IR::Inst& inst, Op op) { } } +/// Folds leading zero population count +/// +/// 1. imm -> countl_zero(imm) +/// +static void FoldCountLeadingZeros(IR::Inst& inst, bool is_32_bit) { + const auto operand = inst.GetArg(0); + if (operand.IsImmediate()) { + if (is_32_bit) { + const u32 result = std::countl_zero(u32(operand.GetImmediateAsU64())); + inst.ReplaceUsesWith(IR::Value{result}); + } else { + const u64 result = std::countl_zero(operand.GetImmediateAsU64()); + inst.ReplaceUsesWith(IR::Value{result}); + } + } +} + /// Folds division operations based on the following: /// /// 1. x / 0 -> 0 (NOTE: This is an ARM-specific behavior defined in the architecture reference manual) @@ -1020,8 +1041,7 @@ static void FoldZeroExtendXToLong(IR::Inst& inst) { static void ConstantPropagation(IR::Block& block) { for (auto& inst : block) { - const auto opcode = inst.GetOpcode(); - + auto const opcode = inst.GetOpcode(); switch (opcode) { case Op::LeastSignificantWord: FoldLeastSignificantWord(inst); @@ -1110,12 +1130,12 @@ static void ConstantPropagation(IR::Block& block) { break; case Op::ArithmeticShiftRightMasked32: if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, true, static_cast(inst.GetArg(0).GetU32()) >> (inst.GetArg(1).GetU32() & 0x1f)); + ReplaceUsesWith(inst, true, s32(inst.GetArg(0).GetU32()) >> (inst.GetArg(1).GetU32() & 0x1f)); } break; case Op::ArithmeticShiftRightMasked64: if (inst.AreAllArgsImmediates()) { - ReplaceUsesWith(inst, false, static_cast(inst.GetArg(0).GetU64()) >> (inst.GetArg(1).GetU64() & 0x3f)); + ReplaceUsesWith(inst, false, s64(inst.GetArg(0).GetU64()) >> (inst.GetArg(1).GetU64() & 0x3f)); } break; case Op::RotateRightMasked32: @@ -1187,6 +1207,10 @@ static void ConstantPropagation(IR::Block& block) { case Op::ByteReverseDual: FoldByteReverse(inst, opcode); break; + case Op::CountLeadingZeros32: + case Op::CountLeadingZeros64: + FoldCountLeadingZeros(inst, opcode == Op::CountLeadingZeros32); + break; default: break; } @@ -1462,7 +1486,7 @@ void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization: Optimization::DeadCodeElimination(block); } if (conf.HasOptimization(OptimizationFlag::ConstProp)) [[likely]] { - Optimization::A32ConstantMemoryReads(block, conf.callbacks); + Optimization::ConstantMemoryReads(block, conf.callbacks); Optimization::ConstantPropagation(block); Optimization::DeadCodeElimination(block); } From 6241b6d794fed4fc3f4c977babbc58c2cf26734c Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 14:46:35 +0000 Subject: [PATCH 46/47] [dynarmic] regalloc use scratchimpl that uses all instead of iteraiting Signed-off-by: lizzie --- .../src/dynarmic/backend/x64/a64_emit_x64.cpp | 4 +-- .../src/dynarmic/backend/x64/reg_alloc.cpp | 30 ++++++++++--------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp index 1691bbb3b7..fa1f071fbf 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp @@ -122,9 +122,9 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) noexcept { auto const opcode = inst.GetOpcode(); // Call the relevant Emit* member function. switch (opcode) { -#define OPCODE(name, type, ...) [[likely]] case IR::Opcode::name: goto opcode_branch; +#define OPCODE(name, type, ...) case IR::Opcode::name: goto opcode_branch; #define A32OPC(name, type, ...) -#define A64OPC(name, type, ...) [[likely]] case IR::Opcode::A64##name: goto a64_branch; +#define A64OPC(name, type, ...) case IR::Opcode::A64##name: goto a64_branch; #include "dynarmic/ir/opcodes.inc" #undef OPCODE #undef A32OPC diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index 502a093d08..2db817a90f 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -367,10 +367,20 @@ void RegAlloc::HostCall(IR::Inst* result_def, if (result_def) { DefineValueImpl(result_def, ABI_RETURN); } - + for (size_t i = 0; i < args.size(); i++) { + if (args[i]) { + UseScratch(*args[i], args_hostloc[i]); + } else { + ScratchGpr(args_hostloc[i]); // TODO: Force spill + } + } + // Must match with with ScratchImpl + for (auto const gpr : other_caller_save) { + MoveOutOfTheWay(gpr); + LocInfo(gpr).WriteLock(); + } for (size_t i = 0; i < args.size(); i++) { if (args[i] && !args[i]->get().IsVoid()) { - UseScratch(*args[i], args_hostloc[i]); // LLVM puts the burden of zero-extension of 8 and 16 bit values on the caller instead of the callee const Xbyak::Reg64 reg = HostLocToReg64(args_hostloc[i]); switch (args[i]->get().GetType()) { @@ -390,14 +400,6 @@ void RegAlloc::HostCall(IR::Inst* result_def, } } } - - for (size_t i = 0; i < args.size(); i++) - if (!args[i]) { - // TODO: Force spill - ScratchGpr(args_hostloc[i]); - } - for (auto const caller_saved : other_caller_save) - ScratchImpl({caller_saved}); } void RegAlloc::AllocStackSpace(const size_t stack_space) noexcept { @@ -560,13 +562,12 @@ void RegAlloc::SpillRegister(HostLoc loc) noexcept { } HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept { -#if 0 // TODO(lizzie): Ok, Windows hates XMM spills, this means less perf for windows // but it's fine anyways. We can find other ways to cheat it later - but which?!?! // we should NOT save xmm each block entering... MAYBE xbyak has a bug on start/end? // TODO(lizzie): This needs to be investigated further later. // Do not spill XMM into other XMM silly - if (!is_xmm) { + /*if (!is_xmm) { // TODO(lizzie): Using lower (xmm0 and such) registers results in issues/crashes - INVESTIGATE WHY // Intel recommends to spill GPR onto XMM registers IF POSSIBLE // TODO(lizzie): Issues on DBZ, theory: Scratch XMM not properly restored after a function call? @@ -574,8 +575,9 @@ HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept { for (size_t i = size_t(HostLoc::XMM15); i >= size_t(HostLoc::XMM3); --i) if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty()) return loc; - } -#endif + }*/ + // TODO: Doing this would mean saving XMM on each call... need to benchmark the benefits + // of spilling on XMM versus the potential cost of using XMM registers..... // Otherwise go to stack spilling for (size_t i = size_t(HostLoc::FirstSpill); i < hostloc_info.size(); ++i) if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty()) From f1b848d6e9b3d61066853b893d7206b8e96ef665 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 9 Sep 2025 18:29:39 +0000 Subject: [PATCH 47/47] [dynarmic] use better boost::visitor Signed-off-by: lizzie --- src/dynarmic/src/dynarmic/CMakeLists.txt | 1 - .../src/dynarmic/backend/x64/a32_emit_x64.cpp | 1 - .../src/dynarmic/backend/x64/emit_x64.cpp | 6 ++-- .../src/dynarmic/common/variant_util.h | 29 ------------------- src/dynarmic/tests/A32/fuzz_arm.cpp | 4 +-- 5 files changed, 5 insertions(+), 36 deletions(-) delete mode 100644 src/dynarmic/src/dynarmic/common/variant_util.h diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index d6be793dc6..3d85f1de29 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -61,7 +61,6 @@ add_library(dynarmic common/string_util.h common/u128.cpp common/u128.h - common/variant_util.h frontend/A32/a32_types.cpp frontend/A32/a32_types.h frontend/A64/a64_types.cpp diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp index fb306336cf..3186758380 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp @@ -28,7 +28,6 @@ #include "dynarmic/backend/x64/nzcv_util.h" #include "dynarmic/backend/x64/perf_map.h" #include "dynarmic/backend/x64/stack_layout.h" -#include "dynarmic/common/variant_util.h" #include "dynarmic/frontend/A32/a32_location_descriptor.h" #include "dynarmic/frontend/A32/a32_types.h" #include "dynarmic/interface/A32/coprocessor.h" diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp index 3bc93e6fd5..0bb2604025 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp @@ -11,6 +11,7 @@ #include #include "dynarmic/common/assert.h" +#include #include #include #include "dynarmic/common/common_types.h" @@ -21,7 +22,6 @@ #include "dynarmic/backend/x64/perf_map.h" #include "dynarmic/backend/x64/stack_layout.h" #include "dynarmic/backend/x64/verbose_debugging_output.h" -#include "dynarmic/common/variant_util.h" #include "dynarmic/ir/basic_block.h" #include "dynarmic/ir/microinstruction.h" #include "dynarmic/ir/opcodes.h" @@ -347,14 +347,14 @@ EmitX64::BlockDescriptor EmitX64::RegisterBlock(const IR::LocationDescriptor& de } void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location, bool is_single_step) { - Common::VisitVariant(terminal, [this, initial_location, is_single_step](auto x) { + boost::apply_visitor([this, initial_location, is_single_step](auto x) { using T = std::decay_t; if constexpr (!std::is_same_v) { this->EmitTerminalImpl(x, initial_location, is_single_step); } else { ASSERT_MSG(false, "Invalid terminal"); } - }); + }, terminal); } void EmitX64::Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr) { diff --git a/src/dynarmic/src/dynarmic/common/variant_util.h b/src/dynarmic/src/dynarmic/common/variant_util.h deleted file mode 100644 index 4dd7f67167..0000000000 --- a/src/dynarmic/src/dynarmic/common/variant_util.h +++ /dev/null @@ -1,29 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -#include - -namespace Dynarmic::Common { -namespace detail { - -template -struct VariantVisitor : boost::static_visitor - , Lambda { - VariantVisitor(Lambda&& lambda) - : Lambda(std::move(lambda)) {} - - using Lambda::operator(); -}; - -} // namespace detail - -template -inline ReturnT VisitVariant(Variant&& variant, Lambda&& lambda) { - return boost::apply_visitor(detail::VariantVisitor(std::move(lambda)), variant); -} - -} // namespace Dynarmic::Common diff --git a/src/dynarmic/tests/A32/fuzz_arm.cpp b/src/dynarmic/tests/A32/fuzz_arm.cpp index 5ee6d2bf02..bef473a491 100644 --- a/src/dynarmic/tests/A32/fuzz_arm.cpp +++ b/src/dynarmic/tests/A32/fuzz_arm.cpp @@ -47,7 +47,7 @@ using namespace Dynarmic; template bool AnyLocationDescriptorForTerminalHas(IR::Terminal terminal, Fn fn) { - return Common::VisitVariant(terminal, [&](auto t) -> bool { + return boost::apply_visitor([&](auto t) -> bool { using T = std::decay_t; if constexpr (std::is_same_v) { return false; @@ -73,7 +73,7 @@ bool AnyLocationDescriptorForTerminalHas(IR::Terminal terminal, Fn fn) { ASSERT_MSG(false, "Invalid terminal type"); return false; } - }); + }, terminal); } bool ShouldTestInst(u32 instruction, u32 pc, bool is_thumb, bool is_last_inst, A32::ITState it_state = {}) {