From 9e15e31dd3d3c5261c24216c10633ad833c0c06b Mon Sep 17 00:00:00 2001
From: lizzie <lizzie@eden-emu.dev>
Date: Tue, 9 Sep 2025 14:46:35 +0000
Subject: [PATCH] [dynarmic] regalloc use scratchimpl that uses all instead of
 iteraiting

Signed-off-by: lizzie <lizzie@eden-emu.dev>
---
 .../src/dynarmic/backend/x64/a64_emit_x64.cpp |  4 +--
 .../src/dynarmic/backend/x64/reg_alloc.cpp    | 30 ++++++++++---------
 2 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp
index 1691bbb3b7..fa1f071fbf 100644
--- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp
+++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp
@@ -122,9 +122,9 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) noexcept {
         auto const opcode = inst.GetOpcode();
         // Call the relevant Emit* member function.
         switch (opcode) {
-#define OPCODE(name, type, ...) [[likely]] case IR::Opcode::name: goto opcode_branch;
+#define OPCODE(name, type, ...) case IR::Opcode::name: goto opcode_branch;
 #define A32OPC(name, type, ...)
-#define A64OPC(name, type, ...) [[likely]] case IR::Opcode::A64##name: goto a64_branch;
+#define A64OPC(name, type, ...) case IR::Opcode::A64##name: goto a64_branch;
 #include "dynarmic/ir/opcodes.inc"
 #undef OPCODE
 #undef A32OPC
diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp
index 502a093d08..2db817a90f 100644
--- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp
+++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp
@@ -367,10 +367,20 @@ void RegAlloc::HostCall(IR::Inst* result_def,
     if (result_def) {
         DefineValueImpl(result_def, ABI_RETURN);
     }
-
+    for (size_t i = 0; i < args.size(); i++) {
+        if (args[i]) {
+            UseScratch(*args[i], args_hostloc[i]);
+        } else {
+            ScratchGpr(args_hostloc[i]); // TODO: Force spill
+        }
+    }
+    // Must match with with ScratchImpl
+    for (auto const gpr : other_caller_save) {
+        MoveOutOfTheWay(gpr);
+        LocInfo(gpr).WriteLock();
+    }
     for (size_t i = 0; i < args.size(); i++) {
         if (args[i] && !args[i]->get().IsVoid()) {
-            UseScratch(*args[i], args_hostloc[i]);
             // LLVM puts the burden of zero-extension of 8 and 16 bit values on the caller instead of the callee
             const Xbyak::Reg64 reg = HostLocToReg64(args_hostloc[i]);
             switch (args[i]->get().GetType()) {
@@ -390,14 +400,6 @@ void RegAlloc::HostCall(IR::Inst* result_def,
             }
         }
     }
-
-    for (size_t i = 0; i < args.size(); i++)
-        if (!args[i]) {
-            // TODO: Force spill
-            ScratchGpr(args_hostloc[i]);
-        }
-    for (auto const caller_saved : other_caller_save)
-        ScratchImpl({caller_saved});
 }
 
 void RegAlloc::AllocStackSpace(const size_t stack_space) noexcept {
@@ -560,13 +562,12 @@ void RegAlloc::SpillRegister(HostLoc loc) noexcept {
 }
 
 HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept {
-#if 0
     // TODO(lizzie): Ok, Windows hates XMM spills, this means less perf for windows
     // but it's fine anyways. We can find other ways to cheat it later - but which?!?!
     // we should NOT save xmm each block entering... MAYBE xbyak has a bug on start/end?
     // TODO(lizzie): This needs to be investigated further later.
     // Do not spill XMM into other XMM silly
-    if (!is_xmm) {
+    /*if (!is_xmm) {
         // TODO(lizzie): Using lower (xmm0 and such) registers results in issues/crashes - INVESTIGATE WHY
         // Intel recommends to spill GPR onto XMM registers IF POSSIBLE
         // TODO(lizzie): Issues on DBZ, theory: Scratch XMM not properly restored after a function call?
@@ -574,8 +575,9 @@ HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept {
         for (size_t i = size_t(HostLoc::XMM15); i >= size_t(HostLoc::XMM3); --i)
             if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty())
                 return loc;
-    }
-#endif
+    }*/
+    // TODO: Doing this would mean saving XMM on each call... need to benchmark the benefits
+    // of spilling on XMM versus the potential cost of using XMM registers.....
     // Otherwise go to stack spilling
     for (size_t i = size_t(HostLoc::FirstSpill); i < hostloc_info.size(); ++i)
         if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty())