diff options
Diffstat (limited to 'src/common/x64/cpu_wait.cpp')
-rw-r--r-- | src/common/x64/cpu_wait.cpp | 70 |
1 files changed, 38 insertions, 32 deletions
diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp index cfeef6a3d..41d385f59 100644 --- a/src/common/x64/cpu_wait.cpp +++ b/src/common/x64/cpu_wait.cpp @@ -9,58 +9,64 @@ #include "common/x64/cpu_detect.h" #include "common/x64/cpu_wait.h" +#include "common/x64/rdtsc.h" namespace Common::X64 { +namespace { + +// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. +// For reference: +// At 1 GHz, 100K cycles is 100us +// At 2 GHz, 100K cycles is 50us +// At 4 GHz, 100K cycles is 25us +constexpr auto PauseCycles = 100'000U; + +} // Anonymous namespace + #ifdef _MSC_VER -__forceinline static u64 FencedRDTSC() { - _mm_lfence(); - _ReadWriteBarrier(); - const u64 result = __rdtsc(); - _mm_lfence(); - _ReadWriteBarrier(); - return result; +__forceinline static void TPAUSE() { + static constexpr auto RequestC02State = 0U; + _tpause(RequestC02State, FencedRDTSC() + PauseCycles); } -__forceinline static void TPAUSE() { - // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. - // For reference: - // At 1 GHz, 100K cycles is 100us - // At 2 GHz, 100K cycles is 50us - // At 4 GHz, 100K cycles is 25us - static constexpr auto PauseCycles = 100'000; - _tpause(0, FencedRDTSC() + PauseCycles); +__forceinline static void MWAITX() { + static constexpr auto EnableWaitTimeFlag = 1U << 1; + static constexpr auto RequestC1State = 0U; + + // monitor_var should be aligned to a cache line. + alignas(64) u64 monitor_var{}; + _mm_monitorx(&monitor_var, 0, 0); + _mm_mwaitx(EnableWaitTimeFlag, RequestC1State, PauseCycles); } #else -static u64 FencedRDTSC() { - u64 eax; - u64 edx; - asm volatile("lfence\n\t" - "rdtsc\n\t" - "lfence\n\t" - : "=a"(eax), "=d"(edx)); - return (edx << 32) | eax; -} - static void TPAUSE() { - // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. - // For reference: - // At 1 GHz, 100K cycles is 100us - // At 2 GHz, 100K cycles is 50us - // At 4 GHz, 100K cycles is 25us - static constexpr auto PauseCycles = 100'000; + static constexpr auto RequestC02State = 0U; const auto tsc = FencedRDTSC() + PauseCycles; const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF); const auto edx = static_cast<u32>(tsc >> 32); - asm volatile("tpause %0" : : "r"(0), "d"(edx), "a"(eax)); + asm volatile("tpause %0" : : "r"(RequestC02State), "d"(edx), "a"(eax)); +} + +static void MWAITX() { + static constexpr auto EnableWaitTimeFlag = 1U << 1; + static constexpr auto RequestC1State = 0U; + + // monitor_var should be aligned to a cache line. + alignas(64) u64 monitor_var{}; + asm volatile("monitorx" : : "a"(&monitor_var), "c"(0), "d"(0)); + asm volatile("mwaitx" : : "a"(RequestC1State), "b"(PauseCycles), "c"(EnableWaitTimeFlag)); } #endif void MicroSleep() { static const bool has_waitpkg = GetCPUCaps().waitpkg; + static const bool has_monitorx = GetCPUCaps().monitorx; if (has_waitpkg) { TPAUSE(); + } else if (has_monitorx) { + MWAITX(); } else { std::this_thread::yield(); } |