forked from eden-emu/eden
		
	Merge pull request #10935 from Morph1984/mwaitx
x64: Make use of monitorx instructions for power efficient sleeps (AMD)
This commit is contained in:
		
						commit
						5e70db0d43
					
				
					 4 changed files with 41 additions and 14 deletions
				
			
		|  | @ -93,6 +93,7 @@ void AppendCPUInfo(FieldCollection& fc) { | ||||||
|     add_field("CPU_Extension_x64_GFNI", caps.gfni); |     add_field("CPU_Extension_x64_GFNI", caps.gfni); | ||||||
|     add_field("CPU_Extension_x64_INVARIANT_TSC", caps.invariant_tsc); |     add_field("CPU_Extension_x64_INVARIANT_TSC", caps.invariant_tsc); | ||||||
|     add_field("CPU_Extension_x64_LZCNT", caps.lzcnt); |     add_field("CPU_Extension_x64_LZCNT", caps.lzcnt); | ||||||
|  |     add_field("CPU_Extension_x64_MONITORX", caps.monitorx); | ||||||
|     add_field("CPU_Extension_x64_MOVBE", caps.movbe); |     add_field("CPU_Extension_x64_MOVBE", caps.movbe); | ||||||
|     add_field("CPU_Extension_x64_PCLMULQDQ", caps.pclmulqdq); |     add_field("CPU_Extension_x64_PCLMULQDQ", caps.pclmulqdq); | ||||||
|     add_field("CPU_Extension_x64_POPCNT", caps.popcnt); |     add_field("CPU_Extension_x64_POPCNT", caps.popcnt); | ||||||
|  |  | ||||||
|  | @ -168,6 +168,7 @@ static CPUCaps Detect() { | ||||||
|         __cpuid(cpu_id, 0x80000001); |         __cpuid(cpu_id, 0x80000001); | ||||||
|         caps.lzcnt = Common::Bit<5>(cpu_id[2]); |         caps.lzcnt = Common::Bit<5>(cpu_id[2]); | ||||||
|         caps.fma4 = Common::Bit<16>(cpu_id[2]); |         caps.fma4 = Common::Bit<16>(cpu_id[2]); | ||||||
|  |         caps.monitorx = Common::Bit<29>(cpu_id[2]); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     if (max_ex_fn >= 0x80000007) { |     if (max_ex_fn >= 0x80000007) { | ||||||
|  |  | ||||||
|  | @ -63,6 +63,7 @@ struct CPUCaps { | ||||||
|     bool gfni : 1; |     bool gfni : 1; | ||||||
|     bool invariant_tsc : 1; |     bool invariant_tsc : 1; | ||||||
|     bool lzcnt : 1; |     bool lzcnt : 1; | ||||||
|  |     bool monitorx : 1; | ||||||
|     bool movbe : 1; |     bool movbe : 1; | ||||||
|     bool pclmulqdq : 1; |     bool pclmulqdq : 1; | ||||||
|     bool popcnt : 1; |     bool popcnt : 1; | ||||||
|  |  | ||||||
|  | @ -13,36 +13,60 @@ | ||||||
| 
 | 
 | ||||||
| namespace Common::X64 { | namespace Common::X64 { | ||||||
| 
 | 
 | ||||||
|  | namespace { | ||||||
|  | 
 | ||||||
|  | // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
 | ||||||
|  | // For reference:
 | ||||||
|  | // At 1 GHz, 100K cycles is 100us
 | ||||||
|  | // At 2 GHz, 100K cycles is 50us
 | ||||||
|  | // At 4 GHz, 100K cycles is 25us
 | ||||||
|  | constexpr auto PauseCycles = 100'000U; | ||||||
|  | 
 | ||||||
|  | } // Anonymous namespace
 | ||||||
|  | 
 | ||||||
| #ifdef _MSC_VER | #ifdef _MSC_VER | ||||||
| __forceinline static void TPAUSE() { | __forceinline static void TPAUSE() { | ||||||
|     // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
 |     static constexpr auto RequestC02State = 0U; | ||||||
|     // For reference:
 |     _tpause(RequestC02State, FencedRDTSC() + PauseCycles); | ||||||
|     // At 1 GHz, 100K cycles is 100us
 | } | ||||||
|     // At 2 GHz, 100K cycles is 50us
 | 
 | ||||||
|     // At 4 GHz, 100K cycles is 25us
 | __forceinline static void MWAITX() { | ||||||
|     static constexpr auto PauseCycles = 100'000; |     static constexpr auto EnableWaitTimeFlag = 1U << 1; | ||||||
|     _tpause(0, FencedRDTSC() + PauseCycles); |     static constexpr auto RequestC1State = 0U; | ||||||
|  | 
 | ||||||
|  |     // monitor_var should be aligned to a cache line.
 | ||||||
|  |     alignas(64) u64 monitor_var{}; | ||||||
|  |     _mm_monitorx(&monitor_var, 0, 0); | ||||||
|  |     _mm_mwaitx(EnableWaitTimeFlag, RequestC1State, PauseCycles); | ||||||
| } | } | ||||||
| #else | #else | ||||||
| static void TPAUSE() { | static void TPAUSE() { | ||||||
|     // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
 |     static constexpr auto RequestC02State = 0U; | ||||||
|     // For reference:
 |  | ||||||
|     // At 1 GHz, 100K cycles is 100us
 |  | ||||||
|     // At 2 GHz, 100K cycles is 50us
 |  | ||||||
|     // At 4 GHz, 100K cycles is 25us
 |  | ||||||
|     static constexpr auto PauseCycles = 100'000; |  | ||||||
|     const auto tsc = FencedRDTSC() + PauseCycles; |     const auto tsc = FencedRDTSC() + PauseCycles; | ||||||
|     const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF); |     const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF); | ||||||
|     const auto edx = static_cast<u32>(tsc >> 32); |     const auto edx = static_cast<u32>(tsc >> 32); | ||||||
|     asm volatile("tpause %0" : : "r"(0), "d"(edx), "a"(eax)); |     asm volatile("tpause %0" : : "r"(RequestC02State), "d"(edx), "a"(eax)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static void MWAITX() { | ||||||
|  |     static constexpr auto EnableWaitTimeFlag = 1U << 1; | ||||||
|  |     static constexpr auto RequestC1State = 0U; | ||||||
|  | 
 | ||||||
|  |     // monitor_var should be aligned to a cache line.
 | ||||||
|  |     alignas(64) u64 monitor_var{}; | ||||||
|  |     asm volatile("monitorx" : : "a"(&monitor_var), "c"(0), "d"(0)); | ||||||
|  |     asm volatile("mwaitx" : : "a"(RequestC1State), "b"(PauseCycles), "c"(EnableWaitTimeFlag)); | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| void MicroSleep() { | void MicroSleep() { | ||||||
|     static const bool has_waitpkg = GetCPUCaps().waitpkg; |     static const bool has_waitpkg = GetCPUCaps().waitpkg; | ||||||
|  |     static const bool has_monitorx = GetCPUCaps().monitorx; | ||||||
| 
 | 
 | ||||||
|     if (has_waitpkg) { |     if (has_waitpkg) { | ||||||
|         TPAUSE(); |         TPAUSE(); | ||||||
|  |     } else if (has_monitorx) { | ||||||
|  |         MWAITX(); | ||||||
|     } else { |     } else { | ||||||
|         std::this_thread::yield(); |         std::this_thread::yield(); | ||||||
|     } |     } | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 liamwhite
						liamwhite