chromium/base/allocator/partition_allocator/src/partition_alloc/partition_alloc_base/time/time_win.cc

// Copyright 2012 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// Windows Timer Primer
//
// A good article:  http://www.ddj.com/windows/184416651
// A good mozilla bug:  http://bugzilla.mozilla.org/show_bug.cgi?id=363258
//
// The default windows timer, GetSystemTimeAsFileTime is not very precise.
// It is only good to ~15.5ms.
//
// QueryPerformanceCounter is the logical choice for a high-precision timer.
// However, it is known to be buggy on some hardware.  Specifically, it can
// sometimes "jump".  On laptops, QPC can also be very expensive to call.
// It's 3-4x slower than timeGetTime() on desktops, but can be 10x slower
// on laptops.  A unittest exists which will show the relative cost of various
// timers on any system.
//
// The next logical choice is timeGetTime().  timeGetTime has a precision of
// 1ms, but only if you call APIs (timeBeginPeriod()) which affect all other
// applications on the system.  By default, precision is only 15.5ms.
// Unfortunately, we don't want to call timeBeginPeriod because we don't
// want to affect other applications.  Further, on mobile platforms, use of
// faster multimedia timers can hurt battery life.  See the intel
// article about this here:
// http://softwarecommunity.intel.com/articles/eng/1086.htm
//
// To work around all this, we're going to generally use timeGetTime().  We
// will only increase the system-wide timer if we're not running on battery
// power.

#include "partition_alloc/partition_alloc_base/time/time.h"

#include <windows.foundation.h>
// clang-format off
#include <windows.h> // Must be included before <mmsystem.h>
#include <mmsystem.h>
// clang-format on

#include <atomic>
#include <cstdint>

#include "partition_alloc/build_config.h"
#include "partition_alloc/partition_alloc_base/bit_cast.h"
#include "partition_alloc/partition_alloc_base/check.h"
#include "partition_alloc/partition_alloc_base/cpu.h"
#include "partition_alloc/partition_alloc_base/threading/platform_thread.h"
#include "partition_alloc/partition_alloc_base/time/time_override.h"

namespace partition_alloc::internal::base {

namespace {

// From MSDN, FILETIME "Contains a 64-bit value representing the number of
// 100-nanosecond intervals since January 1, 1601 (UTC)."
int64_t FileTimeToMicroseconds(const FILETIME& ft) {
  // Need to bit_cast to fix alignment, then divide by 10 to convert
  // 100-nanoseconds to microseconds. This only works on little-endian
  // machines.
  return bit_cast<int64_t, FILETIME>(ft) / 10;
}

bool CanConvertToFileTime(int64_t us) {
  return us >= 0 && us <= (std::numeric_limits<int64_t>::max() / 10);
}

FILETIME MicrosecondsToFileTime(int64_t us) {
  PA_BASE_DCHECK(CanConvertToFileTime(us))
      << "Out-of-range: Cannot convert " << us
      << " microseconds to FILETIME units.";

  // Multiply by 10 to convert microseconds to 100-nanoseconds. Bit_cast will
  // handle alignment problems. This only works on little-endian machines.
  return bit_cast<FILETIME, int64_t>(us * 10);
}

int64_t CurrentWallclockMicroseconds() {
  FILETIME ft;
  ::GetSystemTimeAsFileTime(&ft);
  return FileTimeToMicroseconds(ft);
}

// Time between resampling the un-granular clock for this API.
constexpr TimeDelta kMaxTimeToAvoidDrift = Seconds(60);

int64_t g_initial_time = 0;
TimeTicks g_initial_ticks;

void InitializeClock() {
  g_initial_ticks = subtle::TimeTicksNowIgnoringOverride();
  g_initial_time = CurrentWallclockMicroseconds();
}

// Returns the current value of the performance counter.
uint64_t QPCNowRaw() {
  LARGE_INTEGER perf_counter_now = {};
  // According to the MSDN documentation for QueryPerformanceCounter(), this
  // will never fail on systems that run XP or later.
  // https://msdn.microsoft.com/library/windows/desktop/ms644904.aspx
  ::QueryPerformanceCounter(&perf_counter_now);
  return perf_counter_now.QuadPart;
}

}  // namespace

// Time -----------------------------------------------------------------------

namespace subtle {
Time TimeNowIgnoringOverride() {
  if (g_initial_time == 0) {
    InitializeClock();
  }

  // We implement time using the high-resolution timers so that we can get
  // timeouts which are smaller than 10-15ms.  If we just used
  // CurrentWallclockMicroseconds(), we'd have the less-granular timer.
  //
  // To make this work, we initialize the clock (g_initial_time) and the
  // counter (initial_ctr).  To compute the initial time, we can check
  // the number of ticks that have elapsed, and compute the delta.
  //
  // To avoid any drift, we periodically resync the counters to the system
  // clock.
  while (true) {
    TimeTicks ticks = TimeTicksNowIgnoringOverride();

    // Calculate the time elapsed since we started our timer
    TimeDelta elapsed = ticks - g_initial_ticks;

    // Check if enough time has elapsed that we need to resync the clock.
    if (elapsed > kMaxTimeToAvoidDrift) {
      InitializeClock();
      continue;
    }

    return Time() + elapsed + Microseconds(g_initial_time);
  }
}

Time TimeNowFromSystemTimeIgnoringOverride() {
  // Force resync.
  InitializeClock();
  return Time() + Microseconds(g_initial_time);
}
}  // namespace subtle

// static
Time Time::FromFileTime(FILETIME ft) {
  if (bit_cast<int64_t, FILETIME>(ft) == 0) {
    return Time();
  }
  if (ft.dwHighDateTime == std::numeric_limits<DWORD>::max() &&
      ft.dwLowDateTime == std::numeric_limits<DWORD>::max()) {
    return Max();
  }
  return Time(FileTimeToMicroseconds(ft));
}

FILETIME Time::ToFileTime() const {
  if (is_null()) {
    return bit_cast<FILETIME, int64_t>(0);
  }
  if (is_max()) {
    FILETIME result;
    result.dwHighDateTime = std::numeric_limits<DWORD>::max();
    result.dwLowDateTime = std::numeric_limits<DWORD>::max();
    return result;
  }
  return MicrosecondsToFileTime(us_);
}

// TimeTicks ------------------------------------------------------------------

namespace {

// We define a wrapper to adapt between the __stdcall and __cdecl call of the
// mock function, and to avoid a static constructor.  Assigning an import to a
// function pointer directly would require setup code to fetch from the IAT.
DWORD timeGetTimeWrapper() {
  return timeGetTime();
}

DWORD (*g_tick_function)(void) = &timeGetTimeWrapper;

// A structure holding the most significant bits of "last seen" and a
// "rollover" counter.
union LastTimeAndRolloversState {
  // The state as a single 32-bit opaque value.
  std::atomic<int32_t> as_opaque_32{0};

  // The state as usable values.
  struct {
    // The top 8-bits of the "last" time. This is enough to check for rollovers
    // and the small bit-size means fewer CompareAndSwap operations to store
    // changes in state, which in turn makes for fewer retries.
    uint8_t last_8;
    // A count of the number of detected rollovers. Using this as bits 47-32
    // of the upper half of a 64-bit value results in a 48-bit tick counter.
    // This extends the total rollover period from about 49 days to about 8800
    // years while still allowing it to be stored with last_8 in a single
    // 32-bit value.
    uint16_t rollovers;
  } as_values;
};
std::atomic<int32_t> g_last_time_and_rollovers = 0;
static_assert(sizeof(LastTimeAndRolloversState) <=
                  sizeof(g_last_time_and_rollovers),
              "LastTimeAndRolloversState does not fit in a single atomic word");

// We use timeGetTime() to implement TimeTicks::Now().  This can be problematic
// because it returns the number of milliseconds since Windows has started,
// which will roll over the 32-bit value every ~49 days.  We try to track
// rollover ourselves, which works if TimeTicks::Now() is called at least every
// 48.8 days (not 49 days because only changes in the top 8 bits get noticed).
TimeTicks RolloverProtectedNow() {
  LastTimeAndRolloversState state;
  DWORD now;  // DWORD is always unsigned 32 bits.

  while (true) {
    // Fetch the "now" and "last" tick values, updating "last" with "now" and
    // incrementing the "rollovers" counter if the tick-value has wrapped back
    // around. Atomic operations ensure that both "last" and "rollovers" are
    // always updated together.
    int32_t original =
        g_last_time_and_rollovers.load(std::memory_order_acquire);
    state.as_opaque_32 = original;
    now = g_tick_function();
    uint8_t now_8 = static_cast<uint8_t>(now >> 24);
    if (now_8 < state.as_values.last_8) {
      ++state.as_values.rollovers;
    }
    state.as_values.last_8 = now_8;

    // If the state hasn't changed, exit the loop.
    if (state.as_opaque_32 == original) {
      break;
    }

    // Save the changed state. If the existing value is unchanged from the
    // original, exit the loop.
    int32_t check = g_last_time_and_rollovers.compare_exchange_strong(
        original, state.as_opaque_32, std::memory_order_release);
    if (check == original) {
      break;
    }

    // Another thread has done something in between so retry from the top.
  }

  return TimeTicks() +
         Milliseconds(now +
                      (static_cast<uint64_t>(state.as_values.rollovers) << 32));
}

// Discussion of tick counter options on Windows:
//
// (1) CPU cycle counter. (Retrieved via RDTSC)
// The CPU counter provides the highest resolution time stamp and is the least
// expensive to retrieve. However, on older CPUs, two issues can affect its
// reliability: First it is maintained per processor and not synchronized
// between processors. Also, the counters will change frequency due to thermal
// and power changes, and stop in some states.
//
// (2) QueryPerformanceCounter (QPC). The QPC counter provides a high-
// resolution (<1 microsecond) time stamp. On most hardware running today, it
// auto-detects and uses the constant-rate RDTSC counter to provide extremely
// efficient and reliable time stamps.
//
// On older CPUs where RDTSC is unreliable, it falls back to using more
// expensive (20X to 40X more costly) alternate clocks, such as HPET or the ACPI
// PM timer, and can involve system calls; and all this is up to the HAL (with
// some help from ACPI). According to
// http://blogs.msdn.com/oldnewthing/archive/2005/09/02/459952.aspx, in the
// worst case, it gets the counter from the rollover interrupt on the
// programmable interrupt timer. In best cases, the HAL may conclude that the
// RDTSC counter runs at a constant frequency, then it uses that instead. On
// multiprocessor machines, it will try to verify the values returned from
// RDTSC on each processor are consistent with each other, and apply a handful
// of workarounds for known buggy hardware. In other words, QPC is supposed to
// give consistent results on a multiprocessor computer, but for older CPUs it
// can be unreliable due bugs in BIOS or HAL.
//
// (3) System time. The system time provides a low-resolution (from ~1 to ~15.6
// milliseconds) time stamp but is comparatively less expensive to retrieve and
// more reliable. Time::EnableHighResolutionTimer() and
// Time::ActivateHighResolutionTimer() can be called to alter the resolution of
// this timer; and also other Windows applications can alter it, affecting this
// one.

TimeTicks InitialNowFunction();

// See "threading notes" in InitializeNowFunctionPointer() for details on how
// concurrent reads/writes to these globals has been made safe.
std::atomic<TimeTicksNowFunction> g_time_ticks_now_ignoring_override_function{
    &InitialNowFunction};
int64_t g_qpc_ticks_per_second = 0;

TimeDelta QPCValueToTimeDelta(LONGLONG qpc_value) {
  // Ensure that the assignment to |g_qpc_ticks_per_second|, made in
  // InitializeNowFunctionPointer(), has happened by this point.
  std::atomic_thread_fence(std::memory_order_acquire);

  PA_BASE_DCHECK(g_qpc_ticks_per_second > 0);

  // If the QPC Value is below the overflow threshold, we proceed with
  // simple multiply and divide.
  if (qpc_value < Time::kQPCOverflowThreshold) {
    return Microseconds(qpc_value * Time::kMicrosecondsPerSecond /
                        g_qpc_ticks_per_second);
  }
  // Otherwise, calculate microseconds in a round about manner to avoid
  // overflow and precision issues.
  int64_t whole_seconds = qpc_value / g_qpc_ticks_per_second;
  int64_t leftover_ticks = qpc_value - (whole_seconds * g_qpc_ticks_per_second);
  return Microseconds((whole_seconds * Time::kMicrosecondsPerSecond) +
                      ((leftover_ticks * Time::kMicrosecondsPerSecond) /
                       g_qpc_ticks_per_second));
}

TimeTicks QPCNow() {
  return TimeTicks() + QPCValueToTimeDelta(QPCNowRaw());
}

void InitializeNowFunctionPointer() {
  LARGE_INTEGER ticks_per_sec = {};
  if (!QueryPerformanceFrequency(&ticks_per_sec)) {
    ticks_per_sec.QuadPart = 0;
  }

  // If Windows cannot provide a QPC implementation, TimeTicks::Now() must use
  // the low-resolution clock.
  //
  // If the QPC implementation is expensive and/or unreliable, TimeTicks::Now()
  // will still use the low-resolution clock. A CPU lacking a non-stop time
  // counter will cause Windows to provide an alternate QPC implementation that
  // works, but is expensive to use.
  //
  // Otherwise, Now uses the high-resolution QPC clock. As of 21 August 2015,
  // ~72% of users fall within this category.
  CPU cpu;
  const TimeTicksNowFunction now_function =
      (ticks_per_sec.QuadPart <= 0 || !cpu.has_non_stop_time_stamp_counter())
          ? &RolloverProtectedNow
          : &QPCNow;

  // Threading note 1: In an unlikely race condition, it's possible for two or
  // more threads to enter InitializeNowFunctionPointer() in parallel. This is
  // not a problem since all threads end up writing out the same values
  // to the global variables, and those variable being atomic are safe to read
  // from other threads.
  //
  // Threading note 2: A release fence is placed here to ensure, from the
  // perspective of other threads using the function pointers, that the
  // assignment to |g_qpc_ticks_per_second| happens before the function pointers
  // are changed.
  g_qpc_ticks_per_second = ticks_per_sec.QuadPart;
  std::atomic_thread_fence(std::memory_order_release);
  // Also set g_time_ticks_now_function to avoid the additional indirection via
  // TimeTicksNowIgnoringOverride() for future calls to TimeTicks::Now(), only
  // if it wasn't already overridden to a different value. memory_order_relaxed
  // is sufficient since an explicit fence was inserted above.
  base::TimeTicksNowFunction initial_time_ticks_now_function =
      &subtle::TimeTicksNowIgnoringOverride;
  internal::g_time_ticks_now_function.compare_exchange_strong(
      initial_time_ticks_now_function, now_function, std::memory_order_relaxed);
  g_time_ticks_now_ignoring_override_function.store(now_function,
                                                    std::memory_order_relaxed);
}

TimeTicks InitialNowFunction() {
  InitializeNowFunctionPointer();
  return g_time_ticks_now_ignoring_override_function.load(
      std::memory_order_relaxed)();
}

}  // namespace

// static
TimeTicks::TickFunctionType TimeTicks::SetMockTickFunction(
    TickFunctionType ticker) {
  TickFunctionType old = g_tick_function;
  g_tick_function = ticker;
  g_last_time_and_rollovers.store(0, std::memory_order_relaxed);
  return old;
}

namespace subtle {
TimeTicks TimeTicksNowIgnoringOverride() {
  return g_time_ticks_now_ignoring_override_function.load(
      std::memory_order_relaxed)();
}
}  // namespace subtle

// static
TimeTicks::Clock TimeTicks::GetClock() {
  return Clock::WIN_ROLLOVER_PROTECTED_TIME_GET_TIME;
}

// ThreadTicks ----------------------------------------------------------------

namespace subtle {
ThreadTicks ThreadTicksNowIgnoringOverride() {
  return ThreadTicks::GetForThread(PlatformThread::CurrentHandle());
}
}  // namespace subtle

// static
ThreadTicks ThreadTicks::GetForThread(
    const PlatformThreadHandle& thread_handle) {
  PA_BASE_DCHECK(IsSupported());

#if PA_BUILDFLAG(PA_ARCH_CPU_ARM64)
  // QueryThreadCycleTime versus TSCTicksPerSecond doesn't have much relation to
  // actual elapsed time on Windows on Arm, because QueryThreadCycleTime is
  // backed by the actual number of CPU cycles executed, rather than a
  // constant-rate timer like Intel. To work around this, use GetThreadTimes
  // (which isn't as accurate but is meaningful as a measure of elapsed
  // per-thread time).
  FILETIME creation_time, exit_time, kernel_time, user_time;
  ::GetThreadTimes(thread_handle.platform_handle(), &creation_time, &exit_time,
                   &kernel_time, &user_time);

  const int64_t us = FileTimeToMicroseconds(user_time);
#else
  // Get the number of TSC ticks used by the current thread.
  ULONG64 thread_cycle_time = 0;
  ::QueryThreadCycleTime(thread_handle.platform_handle(), &thread_cycle_time);

  // Get the frequency of the TSC.
  const double tsc_ticks_per_second = time_internal::TSCTicksPerSecond();
  if (tsc_ticks_per_second == 0) {
    return ThreadTicks();
  }

  // Return the CPU time of the current thread.
  const double thread_time_seconds = thread_cycle_time / tsc_ticks_per_second;
  const int64_t us =
      static_cast<int64_t>(thread_time_seconds * Time::kMicrosecondsPerSecond);
#endif

  return ThreadTicks(us);
}

// static
bool ThreadTicks::IsSupportedWin() {
#if PA_BUILDFLAG(PA_ARCH_CPU_ARM64)
  // The Arm implementation does not use QueryThreadCycleTime and therefore does
  // not care about the time stamp counter.
  return true;
#else
  return time_internal::HasConstantRateTSC();
#endif
}

// static
void ThreadTicks::WaitUntilInitializedWin() {
#if !PA_BUILDFLAG(PA_ARCH_CPU_ARM64)
  while (time_internal::TSCTicksPerSecond() == 0) {
    ::Sleep(10);
  }
#endif
}

// static
TimeTicks TimeTicks::FromQPCValue(LONGLONG qpc_value) {
  return TimeTicks() + QPCValueToTimeDelta(qpc_value);
}

// TimeDelta ------------------------------------------------------------------

// static
TimeDelta TimeDelta::FromQPCValue(LONGLONG qpc_value) {
  return QPCValueToTimeDelta(qpc_value);
}

// static
TimeDelta TimeDelta::FromFileTime(FILETIME ft) {
  return Microseconds(FileTimeToMicroseconds(ft));
}

// static
TimeDelta TimeDelta::FromWinrtDateTime(ABI::Windows::Foundation::DateTime dt) {
  // UniversalTime is 100 ns intervals since January 1, 1601 (UTC)
  return Microseconds(dt.UniversalTime / 10);
}

ABI::Windows::Foundation::DateTime TimeDelta::ToWinrtDateTime() const {
  ABI::Windows::Foundation::DateTime date_time;
  date_time.UniversalTime = InMicroseconds() * 10;
  return date_time;
}

#if !PA_BUILDFLAG(PA_ARCH_CPU_ARM64)
namespace time_internal {

bool HasConstantRateTSC() {
  static bool is_supported = CPU().has_non_stop_time_stamp_counter();
  return is_supported;
}

double TSCTicksPerSecond() {
  PA_BASE_DCHECK(HasConstantRateTSC());
  // The value returned by QueryPerformanceFrequency() cannot be used as the TSC
  // frequency, because there is no guarantee that the TSC frequency is equal to
  // the performance counter frequency.
  // The TSC frequency is cached in a static variable because it takes some time
  // to compute it.
  static double tsc_ticks_per_second = 0;
  if (tsc_ticks_per_second != 0) {
    return tsc_ticks_per_second;
  }

  // Increase the thread priority to reduces the chances of having a context
  // switch during a reading of the TSC and the performance counter.
  const int previous_priority = ::GetThreadPriority(::GetCurrentThread());
  ::SetThreadPriority(::GetCurrentThread(), THREAD_PRIORITY_HIGHEST);

  // The first time that this function is called, make an initial reading of the
  // TSC and the performance counter.

  static const uint64_t tsc_initial = __rdtsc();
  static const uint64_t perf_counter_initial = QPCNowRaw();

  // Make a another reading of the TSC and the performance counter every time
  // that this function is called.
  const uint64_t tsc_now = __rdtsc();
  const uint64_t perf_counter_now = QPCNowRaw();

  // Reset the thread priority.
  ::SetThreadPriority(::GetCurrentThread(), previous_priority);

  // Make sure that at least 50 ms elapsed between the 2 readings. The first
  // time that this function is called, we don't expect this to be the case.
  // Note: The longer the elapsed time between the 2 readings is, the more
  //   accurate the computed TSC frequency will be. The 50 ms value was
  //   chosen because local benchmarks show that it allows us to get a
  //   stddev of less than 1 tick/us between multiple runs.
  // Note: According to the MSDN documentation for QueryPerformanceFrequency(),
  //   this will never fail on systems that run XP or later.
  //   https://msdn.microsoft.com/library/windows/desktop/ms644905.aspx
  LARGE_INTEGER perf_counter_frequency = {};
  ::QueryPerformanceFrequency(&perf_counter_frequency);
  PA_BASE_DCHECK(perf_counter_now >= perf_counter_initial);
  const uint64_t perf_counter_ticks = perf_counter_now - perf_counter_initial;
  const double elapsed_time_seconds =
      perf_counter_ticks / static_cast<double>(perf_counter_frequency.QuadPart);

  constexpr double kMinimumEvaluationPeriodSeconds = 0.05;
  if (elapsed_time_seconds < kMinimumEvaluationPeriodSeconds) {
    return 0;
  }

  // Compute the frequency of the TSC.
  PA_BASE_DCHECK(tsc_now >= tsc_initial);
  const uint64_t tsc_ticks = tsc_now - tsc_initial;
  tsc_ticks_per_second = tsc_ticks / elapsed_time_seconds;

  return tsc_ticks_per_second;
}

}  // namespace time_internal
#endif  // PA_BUILDFLAG(PA_ARCH_CPU_ARM64)

}  // namespace partition_alloc::internal::base