mirror of
https://git.h3cjp.net/H3cJP/citra.git
synced 2025-01-07 07:46:59 +00:00
x64: cpu_wait: Make use of MWAITX in MicroSleep
MWAITX is equivalent to UMWAIT on Intel's Alder Lake CPUs. We can emulate TPAUSE by using MONITORX in conjunction with MWAITX to wait for 100K cycles.
This commit is contained in:
parent
4303ed614d
commit
3d868baaa4
|
@ -13,24 +13,30 @@
|
||||||
|
|
||||||
namespace Common::X64 {
|
namespace Common::X64 {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
|
||||||
|
// For reference:
|
||||||
|
// At 1 GHz, 100K cycles is 100us
|
||||||
|
// At 2 GHz, 100K cycles is 50us
|
||||||
|
// At 4 GHz, 100K cycles is 25us
|
||||||
|
constexpr auto PauseCycles = 100'000U;
|
||||||
|
|
||||||
|
} // Anonymous namespace
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
__forceinline static void TPAUSE() {
|
__forceinline static void TPAUSE() {
|
||||||
// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
|
|
||||||
// For reference:
|
|
||||||
// At 1 GHz, 100K cycles is 100us
|
|
||||||
// At 2 GHz, 100K cycles is 50us
|
|
||||||
// At 4 GHz, 100K cycles is 25us
|
|
||||||
static constexpr auto PauseCycles = 100'000;
|
|
||||||
_tpause(0, FencedRDTSC() + PauseCycles);
|
_tpause(0, FencedRDTSC() + PauseCycles);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__forceinline static void MWAITX() {
|
||||||
|
// monitor_var should be aligned to a cache line.
|
||||||
|
alignas(64) u64 monitor_var{};
|
||||||
|
_mm_monitorx(&monitor_var, 0, 0);
|
||||||
|
_mm_mwaitx(/* extensions*/ 2, /* hints */ 0, /* cycles */ PauseCycles);
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
static void TPAUSE() {
|
static void TPAUSE() {
|
||||||
// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
|
|
||||||
// For reference:
|
|
||||||
// At 1 GHz, 100K cycles is 100us
|
|
||||||
// At 2 GHz, 100K cycles is 50us
|
|
||||||
// At 4 GHz, 100K cycles is 25us
|
|
||||||
static constexpr auto PauseCycles = 100'000;
|
|
||||||
const auto tsc = FencedRDTSC() + PauseCycles;
|
const auto tsc = FencedRDTSC() + PauseCycles;
|
||||||
const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF);
|
const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF);
|
||||||
const auto edx = static_cast<u32>(tsc >> 32);
|
const auto edx = static_cast<u32>(tsc >> 32);
|
||||||
|
@ -40,9 +46,12 @@ static void TPAUSE() {
|
||||||
|
|
||||||
void MicroSleep() {
|
void MicroSleep() {
|
||||||
static const bool has_waitpkg = GetCPUCaps().waitpkg;
|
static const bool has_waitpkg = GetCPUCaps().waitpkg;
|
||||||
|
static const bool has_monitorx = GetCPUCaps().monitorx;
|
||||||
|
|
||||||
if (has_waitpkg) {
|
if (has_waitpkg) {
|
||||||
TPAUSE();
|
TPAUSE();
|
||||||
|
} else if (has_monitorx) {
|
||||||
|
MWAITX();
|
||||||
} else {
|
} else {
|
||||||
std::this_thread::yield();
|
std::this_thread::yield();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue