chromium/third_party/perfetto/src/base/time.cc

/*
 * Copyright (C) 2018 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <atomic>

#include "perfetto/base/time.h"

#include "perfetto/base/build_config.h"
#include "perfetto/base/logging.h"
#include "perfetto/ext/base/string_utils.h"

#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
#include <Windows.h>
#else
#include <unistd.h>
#endif

namespace perfetto {
namespace base {

#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
#if !PERFETTO_BUILDFLAG(PERFETTO_ARCH_CPU_ARM64)
namespace {

// Returns the current value of the performance counter.
int64_t QPCNowRaw() {
  LARGE_INTEGER perf_counter_now = {};
  // According to the MSDN documentation for QueryPerformanceCounter(), this
  // will never fail on systems that run XP or later.
  // https://msdn.microsoft.com/library/windows/desktop/ms644904.aspx
  ::QueryPerformanceCounter(&perf_counter_now);
  return perf_counter_now.QuadPart;
}

double TSCTicksPerSecond() {
  // The value returned by QueryPerformanceFrequency() cannot be used as the TSC
  // frequency, because there is no guarantee that the TSC frequency is equal to
  // the performance counter frequency.
  // The TSC frequency is cached in a static variable because it takes some time
  // to compute it.
  static std::atomic<double> tsc_ticks_per_second = 0;
  double value = tsc_ticks_per_second.load(std::memory_order_relaxed);
  if (value != 0)
    return value;

  // Increase the thread priority to reduces the chances of having a context
  // switch during a reading of the TSC and the performance counter.
  const int previous_priority = ::GetThreadPriority(::GetCurrentThread());
  ::SetThreadPriority(::GetCurrentThread(), THREAD_PRIORITY_HIGHEST);

  // The first time that this function is called, make an initial reading of the
  // TSC and the performance counter. Initialization of static variable is
  // thread-safe. Threads can race initializing tsc_initial vs
  // perf_counter_initial, although they should be storing very similar values.

  static const uint64_t tsc_initial = __rdtsc();
  static const int64_t perf_counter_initial = QPCNowRaw();

  // Make a another reading of the TSC and the performance counter every time
  // that this function is called.
  const uint64_t tsc_now = __rdtsc();
  const int64_t perf_counter_now = QPCNowRaw();

  // Reset the thread priority.
  ::SetThreadPriority(::GetCurrentThread(), previous_priority);

  // Make sure that at least 50 ms elapsed between the 2 readings. The first
  // time that this function is called, we don't expect this to be the case.
  // Note: The longer the elapsed time between the 2 readings is, the more
  //   accurate the computed TSC frequency will be. The 50 ms value was
  //   chosen because local benchmarks show that it allows us to get a
  //   stddev of less than 1 tick/us between multiple runs.
  // Note: According to the MSDN documentation for QueryPerformanceFrequency(),
  //   this will never fail on systems that run XP or later.
  //   https://msdn.microsoft.com/library/windows/desktop/ms644905.aspx
  LARGE_INTEGER perf_counter_frequency = {};
  ::QueryPerformanceFrequency(&perf_counter_frequency);
  PERFETTO_CHECK(perf_counter_now >= perf_counter_initial);
  const int64_t perf_counter_ticks = perf_counter_now - perf_counter_initial;
  const double elapsed_time_seconds =
      static_cast<double>(perf_counter_ticks) /
      static_cast<double>(perf_counter_frequency.QuadPart);

  constexpr double kMinimumEvaluationPeriodSeconds = 0.05;
  if (elapsed_time_seconds < kMinimumEvaluationPeriodSeconds)
    return 0;

  // Compute the frequency of the TSC.
  PERFETTO_CHECK(tsc_now >= tsc_initial);
  const uint64_t tsc_ticks = tsc_now - tsc_initial;
  // Racing with another thread to write |tsc_ticks_per_second| is benign
  // because both threads will write a valid result.
  tsc_ticks_per_second.store(
      static_cast<double>(tsc_ticks) / elapsed_time_seconds,
      std::memory_order_relaxed);

  return tsc_ticks_per_second.load(std::memory_order_relaxed);
}

}  // namespace
#endif  // !PERFETTO_BUILDFLAG(PERFETTO_ARCH_CPU_ARM64)

TimeNanos GetWallTimeNs() {
  LARGE_INTEGER freq;
  ::QueryPerformanceFrequency(&freq);
  LARGE_INTEGER counter;
  ::QueryPerformanceCounter(&counter);
  double elapsed_nanoseconds = (1e9 * static_cast<double>(counter.QuadPart)) /
                               static_cast<double>(freq.QuadPart);
  return TimeNanos(static_cast<uint64_t>(elapsed_nanoseconds));
}

TimeNanos GetThreadCPUTimeNs() {
#if PERFETTO_BUILDFLAG(PERFETTO_ARCH_CPU_ARM64)
  // QueryThreadCycleTime versus TSCTicksPerSecond doesn't have much relation to
  // actual elapsed time on Windows on Arm, because QueryThreadCycleTime is
  // backed by the actual number of CPU cycles executed, rather than a
  // constant-rate timer like Intel. To work around this, use GetThreadTimes
  // (which isn't as accurate but is meaningful as a measure of elapsed
  // per-thread time).
  FILETIME dummy, kernel_ftime, user_ftime;
  ::GetThreadTimes(GetCurrentThread(), &dummy, &dummy, &kernel_ftime,
                   &user_ftime);
  uint64_t kernel_time =
      kernel_ftime.dwHighDateTime * 0x100000000 + kernel_ftime.dwLowDateTime;
  uint64_t user_time =
      user_ftime.dwHighDateTime * 0x100000000 + user_ftime.dwLowDateTime;

  return TimeNanos((kernel_time + user_time) * 100);
#else   // !PERFETTO_BUILDFLAG(PERFETTO_ARCH_CPU_ARM64)
  // Get the number of TSC ticks used by the current thread.
  ULONG64 thread_cycle_time = 0;
  ::QueryThreadCycleTime(GetCurrentThread(), &thread_cycle_time);

  // Get the frequency of the TSC.
  const double tsc_ticks_per_second = TSCTicksPerSecond();
  if (tsc_ticks_per_second == 0)
    return TimeNanos();

  // Return the CPU time of the current thread.
  const double thread_time_seconds =
      static_cast<double>(thread_cycle_time) / tsc_ticks_per_second;
  constexpr int64_t kNanosecondsPerSecond = 1000 * 1000 * 1000;
  return TimeNanos(
      static_cast<int64_t>(thread_time_seconds * kNanosecondsPerSecond));
#endif  // !PERFETTO_BUILDFLAG(PERFETTO_ARCH_CPU_ARM64)
}

void SleepMicroseconds(unsigned interval_us) {
  // The Windows Sleep function takes a millisecond count. Round up so that
  // short sleeps don't turn into a busy wait. Note that the sleep granularity
  // on Windows can dynamically vary from 1 ms to ~16 ms, so don't count on this
  // being a short sleep.
  ::Sleep(static_cast<DWORD>((interval_us + 999) / 1000));
}

void InitializeTime() {
#if !PERFETTO_BUILDFLAG(PERFETTO_ARCH_CPU_ARM64)
  // Make an early first call to TSCTicksPerSecond() to start 50 ms elapsed time
  // (see comment in TSCTicksPerSecond()).
  TSCTicksPerSecond();
#endif  // !PERFETTO_BUILDFLAG(PERFETTO_ARCH_CPU_ARM64)
}

#else  // PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)

void SleepMicroseconds(unsigned interval_us) {}

void InitializeTime() {}

#endif  // PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)

std::string GetTimeFmt(const std::string& fmt) {}

std::optional<int32_t> GetTimezoneOffsetMins() {}

}  // namespace base
}  // namespace perfetto