From 623baac26ba610f4f213e747f37b167167ba39fe Mon Sep 17 00:00:00 2001 From: Gamer64 <76565986+Gamer64ytb@users.noreply.github.com> Date: Fri, 1 Aug 2025 18:30:45 +0200 Subject: [PATCH 1/2] Use NEON intrinsics in Vec4 dot operation PabloMK7: Changes the Vec4 dot operation to use NEON intrinsics on ARM devices. This function is used every time a triangle is added to the rendered, so it can be considered hot code. The other vector operations are not used as much, so there is no gain to provide NEON operations for them. The improvements from this change are most likely minimal. Co-authored-by: PabloMK7 --- src/common/vector_math.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/common/vector_math.h b/src/common/vector_math.h index b4885835df..a72d033f63 100644 --- a/src/common/vector_math.h +++ b/src/common/vector_math.h @@ -1,9 +1,16 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: 2014 Tony Wasserka // SPDX-FileCopyrightText: 2014 Dolphin Emulator Project // SPDX-License-Identifier: BSD-3-Clause AND GPL-2.0-or-later #pragma once +#ifdef __ARM_NEON +#include +#endif + #include #include @@ -641,6 +648,23 @@ template return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; } +template <> +[[nodiscard]] inline float Dot(const Vec4& a, const Vec4& b) { +#ifdef __ARM_NEON + float32x4_t va = vld1q_f32(a.AsArray()); + float32x4_t vb = vld1q_f32(b.AsArray()); + float32x4_t result = vmulq_f32(va, vb); +#if defined(__aarch64__) // Use vaddvq_f32 in ARMv8 architectures + return vaddvq_f32(result); +#else // Use manual addition for older architectures + float32x2_t sum2 = vadd_f32(vget_high_f32(result), vget_low_f32(result)); + return vget_lane_f32(vpadd_f32(sum2, sum2), 0); +#endif +#else + return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; +#endif +} + template [[nodiscard]] constexpr Vec3 Cross(const Vec3& a, const Vec3& b) { From 16258e497e5529ef6eeae58c659e441d6590c16b Mon Sep 17 00:00:00 2001 From: Gamer64 <76565986+Gamer64ytb@users.noreply.github.com> Date: Fri, 1 Aug 2025 19:22:15 +0200 Subject: [PATCH 2/2] [microprofile]: Disable microprofile Testing it out to see how it impacts. --- src/common/microprofile.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/microprofile.h b/src/common/microprofile.h index 56ef0a2dcf..4babcfb0ee 100644 --- a/src/common/microprofile.h +++ b/src/common/microprofile.h @@ -5,7 +5,7 @@ // Uncomment this to disable microprofile. This will get you cleaner profiles when using // external sampling profilers like "Very Sleepy", and will improve performance somewhat. -// #define MICROPROFILE_ENABLED 0 +#define MICROPROFILE_ENABLED 0 // Customized Citra settings. // This file wraps the MicroProfile header so that these are consistent everywhere.