diff options
author | Fernando Sahmkow <fsahmkow27@gmail.com> | 2021-01-02 02:24:49 +0100 |
---|---|---|
committer | Fernando Sahmkow <fsahmkow27@gmail.com> | 2021-01-02 04:00:27 +0100 |
commit | 53d92318b82cd4a9e08f814fcb8aab624d795c6c (patch) | |
tree | 4f5236ffebdcf947297d8ace42151b36810f3144 | |
parent | X86/NativeClock: Improve performance of clock calculations on hot path. (diff) | |
download | yuzu-53d92318b82cd4a9e08f814fcb8aab624d795c6c.tar yuzu-53d92318b82cd4a9e08f814fcb8aab624d795c6c.tar.gz yuzu-53d92318b82cd4a9e08f814fcb8aab624d795c6c.tar.bz2 yuzu-53d92318b82cd4a9e08f814fcb8aab624d795c6c.tar.lz yuzu-53d92318b82cd4a9e08f814fcb8aab624d795c6c.tar.xz yuzu-53d92318b82cd4a9e08f814fcb8aab624d795c6c.tar.zst yuzu-53d92318b82cd4a9e08f814fcb8aab624d795c6c.zip |
-rw-r--r-- | src/common/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/common/atomic_ops.cpp | 75 | ||||
-rw-r--r-- | src/common/atomic_ops.h | 71 | ||||
-rw-r--r-- | src/common/x64/native_clock.cpp | 41 | ||||
-rw-r--r-- | src/common/x64/native_clock.h | 22 |
5 files changed, 107 insertions, 103 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 2c2bd2ee8..abe62543e 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -98,7 +98,6 @@ add_library(common STATIC algorithm.h alignment.h assert.h - atomic_ops.cpp atomic_ops.h detached_tasks.cpp detached_tasks.h diff --git a/src/common/atomic_ops.cpp b/src/common/atomic_ops.cpp deleted file mode 100644 index 1612d0e67..000000000 --- a/src/common/atomic_ops.cpp +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <cstring> - -#include "common/atomic_ops.h" - -#if _MSC_VER -#include <intrin.h> -#endif - -namespace Common { - -#if _MSC_VER - -bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected) { - const u8 result = - _InterlockedCompareExchange8(reinterpret_cast<volatile char*>(pointer), value, expected); - return result == expected; -} - -bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected) { - const u16 result = - _InterlockedCompareExchange16(reinterpret_cast<volatile short*>(pointer), value, expected); - return result == expected; -} - -bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected) { - const u32 result = - _InterlockedCompareExchange(reinterpret_cast<volatile long*>(pointer), value, expected); - return result == expected; -} - -bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected) { - const u64 result = _InterlockedCompareExchange64(reinterpret_cast<volatile __int64*>(pointer), - value, expected); - return result == expected; -} - -bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected) { - return _InterlockedCompareExchange128(reinterpret_cast<volatile __int64*>(pointer), value[1], - value[0], - reinterpret_cast<__int64*>(expected.data())) != 0; -} - -#else - -bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected) { - return __sync_bool_compare_and_swap(pointer, expected, value); -} - -bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected) { - return __sync_bool_compare_and_swap(pointer, expected, value); -} - -bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected) { - return __sync_bool_compare_and_swap(pointer, expected, value); -} - -bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected) { - return __sync_bool_compare_and_swap(pointer, expected, value); -} - -bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected) { - unsigned __int128 value_a; - unsigned __int128 expected_a; - std::memcpy(&value_a, value.data(), sizeof(u128)); - std::memcpy(&expected_a, expected.data(), sizeof(u128)); - return __sync_bool_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a); -} - -#endif - -} // namespace Common diff --git a/src/common/atomic_ops.h b/src/common/atomic_ops.h index b46888589..2b1f515e8 100644 --- a/src/common/atomic_ops.h +++ b/src/common/atomic_ops.h @@ -4,14 +4,75 @@ #pragma once +#include <cstring> +#include <memory> + #include "common/common_types.h" +#if _MSC_VER +#include <intrin.h> +#endif + namespace Common { -[[nodiscard]] bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected); -[[nodiscard]] bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected); -[[nodiscard]] bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected); -[[nodiscard]] bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected); -[[nodiscard]] bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected); +#if _MSC_VER + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected) { + const u8 result = + _InterlockedCompareExchange8(reinterpret_cast<volatile char*>(pointer), value, expected); + return result == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected) { + const u16 result = + _InterlockedCompareExchange16(reinterpret_cast<volatile short*>(pointer), value, expected); + return result == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected) { + const u32 result = + _InterlockedCompareExchange(reinterpret_cast<volatile long*>(pointer), value, expected); + return result == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected) { + const u64 result = _InterlockedCompareExchange64(reinterpret_cast<volatile __int64*>(pointer), + value, expected); + return result == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected) { + return _InterlockedCompareExchange128(reinterpret_cast<volatile __int64*>(pointer), value[1], + value[0], + reinterpret_cast<__int64*>(expected.data())) != 0; +} + +#else + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected) { + return __sync_bool_compare_and_swap(pointer, expected, value); +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected) { + return __sync_bool_compare_and_swap(pointer, expected, value); +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected) { + return __sync_bool_compare_and_swap(pointer, expected, value); +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected) { + return __sync_bool_compare_and_swap(pointer, expected, value); +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected) { + unsigned __int128 value_a; + unsigned __int128 expected_a; + std::memcpy(&value_a, value.data(), sizeof(u128)); + std::memcpy(&expected_a, expected.data(), sizeof(u128)); + return __sync_bool_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a); +} + +#endif } // namespace Common diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index e246432d0..a65f6b832 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -17,6 +17,7 @@ #include <x86intrin.h> #endif +#include "common/atomic_ops.h" #include "common/uint128.h" #include "common/x64/native_clock.h" @@ -102,8 +103,8 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{ rtsc_frequency_} { _mm_mfence(); - last_measure = __rdtsc(); - accumulated_ticks = 0U; + time_point.inner.last_measure = __rdtsc(); + time_point.inner.accumulated_ticks = 0U; ns_rtsc_factor = GetFixedPoint64Factor(1000000000, rtsc_frequency); us_rtsc_factor = GetFixedPoint64Factor(1000000, rtsc_frequency); ms_rtsc_factor = GetFixedPoint64Factor(1000, rtsc_frequency); @@ -112,23 +113,35 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen } u64 NativeClock::GetRTSC() { - std::scoped_lock scope{rtsc_serialize}; - _mm_mfence(); - const u64 current_measure = __rdtsc(); - u64 diff = current_measure - last_measure; - diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0) - if (current_measure > last_measure) { - last_measure = current_measure; - } - accumulated_ticks += diff; + TimePoint new_time_point{}; + TimePoint current_time_point{}; + do { + current_time_point.pack = time_point.pack; + _mm_mfence(); + const u64 current_measure = __rdtsc(); + u64 diff = current_measure - current_time_point.inner.last_measure; + diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0) + new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure + ? current_measure + : current_time_point.inner.last_measure; + new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff; + } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, + current_time_point.pack)); /// The clock cannot be more precise than the guest timer, remove the lower bits - return accumulated_ticks & inaccuracy_mask; + return new_time_point.inner.accumulated_ticks & inaccuracy_mask; } void NativeClock::Pause(bool is_paused) { if (!is_paused) { - _mm_mfence(); - last_measure = __rdtsc(); + TimePoint current_time_point{}; + TimePoint new_time_point{}; + do { + current_time_point.pack = time_point.pack; + new_time_point.pack = current_time_point.pack; + _mm_mfence(); + new_time_point.inner.last_measure = __rdtsc(); + } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, + current_time_point.pack)); } } diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h index a7b1ee9e0..7cbd400d2 100644 --- a/src/common/x64/native_clock.h +++ b/src/common/x64/native_clock.h @@ -6,7 +6,6 @@ #include <optional> -#include "common/spin_lock.h" #include "common/wall_clock.h" namespace Common { @@ -32,22 +31,29 @@ public: private: u64 GetRTSC(); + union alignas(16) TimePoint { + TimePoint() : pack{} {} + u128 pack{}; + struct Inner { + u64 last_measure{}; + u64 accumulated_ticks{}; + } inner; + }; + /// value used to reduce the native clocks accuracy as some apss rely on /// undefined behavior where the level of accuracy in the clock shouldn't /// be higher. static constexpr u64 inaccuracy_mask = ~(UINT64_C(0x400) - 1); - SpinLock rtsc_serialize{}; - u64 last_measure{}; - u64 accumulated_ticks{}; - u64 rtsc_frequency; - + TimePoint time_point; // factors + u64 clock_rtsc_factor{}; + u64 cpu_rtsc_factor{}; u64 ns_rtsc_factor{}; u64 us_rtsc_factor{}; u64 ms_rtsc_factor{}; - u64 clock_rtsc_factor{}; - u64 cpu_rtsc_factor{}; + + u64 rtsc_frequency; }; } // namespace X64 |