forked from eden-emu/eden
		
	
		
			
				
	
	
		
			152 lines
		
	
	
	
		
			4.1 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			152 lines
		
	
	
	
		
			4.1 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /* This file is part of the dynarmic project.
 | |
|  * Copyright (c) 2021 MerryMage
 | |
|  * SPDX-License-Identifier: 0BSD
 | |
|  */
 | |
| 
 | |
| #include <catch2/benchmark/catch_benchmark.hpp>
 | |
| #include <catch2/catch_test_macros.hpp>
 | |
| #include <fmt/printf.h>
 | |
| #include <mcl/stdint.hpp>
 | |
| 
 | |
| #include "dynarmic/common/fp/fpcr.h"
 | |
| #include "dynarmic/common/fp/fpsr.h"
 | |
| #include "dynarmic/common/fp/op/FPRSqrtEstimate.h"
 | |
| 
 | |
| extern "C" u32 rsqrt_inaccurate(u32);
 | |
| extern "C" u32 rsqrt_full(u32);
 | |
| extern "C" u32 rsqrt_full_gpr(u32);
 | |
| extern "C" u32 rsqrt_full_nb(u32);
 | |
| extern "C" u32 rsqrt_full_nb2(u32);
 | |
| extern "C" u32 rsqrt_full_nb_gpr(u32);
 | |
| extern "C" u32 rsqrt_newton(u32);
 | |
| extern "C" u32 rsqrt_hack(u32);
 | |
| 
 | |
| using namespace Dynarmic;
 | |
| 
 | |
| extern "C" u32 rsqrt_fallback(u32 value) {
 | |
|     FP::FPCR fpcr;
 | |
|     FP::FPSR fpsr;
 | |
|     return FP::FPRSqrtEstimate(value, fpcr, fpsr);
 | |
| }
 | |
| extern "C" u32 _rsqrt_fallback(u32 value) {
 | |
|     return rsqrt_fallback(value);
 | |
| }
 | |
| 
 | |
| void Test(u32 value) {
 | |
|     FP::FPCR fpcr;
 | |
|     FP::FPSR fpsr;
 | |
| 
 | |
|     const u32 expect = FP::FPRSqrtEstimate(value, fpcr, fpsr);
 | |
|     const u32 full = rsqrt_full(value);
 | |
|     const u32 full_gpr = rsqrt_full_gpr(value);
 | |
|     const u32 newton = rsqrt_newton(value);
 | |
|     const u32 hack = rsqrt_hack(value);
 | |
| 
 | |
|     if (expect != full || expect != full_gpr || expect != newton || expect != hack) {
 | |
|         fmt::print("{:08x} = {:08x} : {:08x} : {:08x} : {:08x} : {:08x}\n", value, expect, full, full_gpr, newton, hack);
 | |
| 
 | |
|         REQUIRE(expect == full);
 | |
|         REQUIRE(expect == full_gpr);
 | |
|         REQUIRE(expect == newton);
 | |
|         REQUIRE(expect == hack);
 | |
|     }
 | |
| }
 | |
| 
 | |
| TEST_CASE("RSqrt Tests", "[fp][.]") {
 | |
|     Test(0x00000000);
 | |
|     Test(0x80000000);
 | |
|     Test(0x7f8b7201);
 | |
|     Test(0x7f800000);
 | |
|     Test(0x7fc00000);
 | |
|     Test(0xff800000);
 | |
|     Test(0xffc00000);
 | |
|     Test(0xff800001);
 | |
| 
 | |
|     for (u64 i = 0; i < 0x1'0000'0000; i++) {
 | |
|         const u32 value = static_cast<u32>(i);
 | |
|         Test(value);
 | |
|     }
 | |
| }
 | |
| 
 | |
| TEST_CASE("Benchmark RSqrt", "[fp][.]") {
 | |
|     BENCHMARK("Inaccurate") {
 | |
|         u64 total = 0;
 | |
|         for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
 | |
|             const u32 value = static_cast<u32>(i);
 | |
|             total += rsqrt_inaccurate(value);
 | |
|         }
 | |
|         return total;
 | |
|     };
 | |
| 
 | |
|     BENCHMARK("Full divss") {
 | |
|         u64 total = 0;
 | |
|         for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
 | |
|             const u32 value = static_cast<u32>(i);
 | |
|             total += rsqrt_full(value);
 | |
|         }
 | |
|         return total;
 | |
|     };
 | |
| 
 | |
|     BENCHMARK("Full divss (GPR)") {
 | |
|         u64 total = 0;
 | |
|         for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
 | |
|             const u32 value = static_cast<u32>(i);
 | |
|             total += rsqrt_full_gpr(value);
 | |
|         }
 | |
|         return total;
 | |
|     };
 | |
| 
 | |
|     BENCHMARK("Full divss (NB)") {
 | |
|         u64 total = 0;
 | |
|         for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
 | |
|             const u32 value = static_cast<u32>(i);
 | |
|             total += rsqrt_full_nb(value);
 | |
|         }
 | |
|         return total;
 | |
|     };
 | |
| 
 | |
|     BENCHMARK("Full divss (NB2)") {
 | |
|         u64 total = 0;
 | |
|         for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
 | |
|             const u32 value = static_cast<u32>(i);
 | |
|             total += rsqrt_full_nb2(value);
 | |
|         }
 | |
|         return total;
 | |
|     };
 | |
| 
 | |
|     BENCHMARK("Full divss (NB + GPR)") {
 | |
|         u64 total = 0;
 | |
|         for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
 | |
|             const u32 value = static_cast<u32>(i);
 | |
|             total += rsqrt_full_nb_gpr(value);
 | |
|         }
 | |
|         return total;
 | |
|     };
 | |
| 
 | |
|     BENCHMARK("One Newton iteration") {
 | |
|         u64 total = 0;
 | |
|         for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
 | |
|             const u32 value = static_cast<u32>(i);
 | |
|             total += rsqrt_newton(value);
 | |
|         }
 | |
|         return total;
 | |
|     };
 | |
| 
 | |
|     BENCHMARK("Ugly Hack") {
 | |
|         u64 total = 0;
 | |
|         for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
 | |
|             const u32 value = static_cast<u32>(i);
 | |
|             total += rsqrt_hack(value);
 | |
|         }
 | |
|         return total;
 | |
|     };
 | |
| 
 | |
|     BENCHMARK("Softfloat") {
 | |
|         u64 total = 0;
 | |
|         for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
 | |
|             const u32 value = static_cast<u32>(i);
 | |
|             total += rsqrt_fallback(value);
 | |
|         }
 | |
|         return total;
 | |
|     };
 | |
| }
 |