// SPDX-License-Identifier: GPL-2.0-only #define pr_fmt(fmt) … #include <linux/kernel.h> #include <linux/sched.h> #include <linux/sched/clock.h> #include <linux/init.h> #include <linux/export.h> #include <linux/timer.h> #include <linux/acpi_pmtmr.h> #include <linux/cpufreq.h> #include <linux/delay.h> #include <linux/clocksource.h> #include <linux/percpu.h> #include <linux/timex.h> #include <linux/static_key.h> #include <linux/static_call.h> #include <asm/hpet.h> #include <asm/timer.h> #include <asm/vgtod.h> #include <asm/time.h> #include <asm/delay.h> #include <asm/hypervisor.h> #include <asm/nmi.h> #include <asm/x86_init.h> #include <asm/geode.h> #include <asm/apic.h> #include <asm/cpu_device_id.h> #include <asm/i8259.h> #include <asm/uv/uv.h> unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(…); unsigned int __read_mostly tsc_khz; EXPORT_SYMBOL(…); #define KHZ … /* * TSC can be unstable due to cpufreq or due to unsynced TSCs */ static int __read_mostly tsc_unstable; static unsigned int __initdata tsc_early_khz; static DEFINE_STATIC_KEY_FALSE_RO(__use_tsc); int tsc_clocksource_reliable; static int __read_mostly tsc_force_recalibrate; static struct clocksource_base art_base_clk = …; static bool have_art; struct cyc2ns { … }; /* fits one cacheline */ static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns); static int __init tsc_early_khz_setup(char *buf) { … } early_param(…); __always_inline void __cyc2ns_read(struct cyc2ns_data *data) { … } __always_inline void cyc2ns_read_begin(struct cyc2ns_data *data) { … } __always_inline void cyc2ns_read_end(void) { … } /* * Accelerators for sched_clock() * convert from cycles(64bits) => nanoseconds (64bits) * basic equation: * ns = cycles / (freq / ns_per_sec) * ns = cycles * (ns_per_sec / freq) * ns = cycles * (10^9 / (cpu_khz * 10^3)) * ns = cycles * (10^6 / cpu_khz) * * Then we use scaling math (suggested by [email protected]) to get: * ns = cycles * (10^6 * SC / cpu_khz) / SC * ns = cycles * cyc2ns_scale / SC * * And since SC is a constant power of two, we can convert the div * into a shift. The larger SC is, the more accurate the conversion, but * cyc2ns_scale needs to be a 32-bit value so that 32-bit multiplication * (64-bit result) can be used. * * We can use khz divisor instead of mhz to keep a better precision. * ([email protected]) * * [email protected] "math is hard, lets go shopping!" */ static __always_inline unsigned long long __cycles_2_ns(unsigned long long cyc) { … } static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc) { … } static void __set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now) { … } static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now) { … } /* * Initialize cyc2ns for boot cpu */ static void __init cyc2ns_init_boot_cpu(void) { … } /* * Secondary CPUs do not run through tsc_init(), so set up * all the scale factors for all CPUs, assuming the same * speed as the bootup CPU. */ static void __init cyc2ns_init_secondary_cpus(void) { … } /* * Scheduler clock - returns current time in nanosec units. */ noinstr u64 native_sched_clock(void) { … } /* * Generate a sched_clock if you already have a TSC value. */ u64 native_sched_clock_from_tsc(u64 tsc) { … } /* We need to define a real function for sched_clock, to override the weak default version */ #ifdef CONFIG_PARAVIRT noinstr u64 sched_clock_noinstr(void) { … } bool using_native_sched_clock(void) { … } #else u64 sched_clock_noinstr(void) __attribute__((alias("native_sched_clock"))); bool using_native_sched_clock(void) { return true; } #endif notrace u64 sched_clock(void) { … } int check_tsc_unstable(void) { … } EXPORT_SYMBOL_GPL(…); #ifdef CONFIG_X86_TSC int __init notsc_setup(char *str) { … } #else /* * disable flag for tsc. Takes effect by clearing the TSC cpu flag * in cpu/common.c */ int __init notsc_setup(char *str) { setup_clear_cpu_cap(X86_FEATURE_TSC); return 1; } #endif __setup(…); static int no_sched_irq_time; static int no_tsc_watchdog; static int tsc_as_watchdog; static int __init tsc_setup(char *str) { … } __setup(…); #define MAX_RETRIES … #define TSC_DEFAULT_THRESHOLD … /* * Read TSC and the reference counters. Take care of any disturbances */ static u64 tsc_read_refs(u64 *p, int hpet) { … } /* * Calculate the TSC frequency from HPET reference */ static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2) { … } /* * Calculate the TSC frequency from PMTimer reference */ static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2) { … } #define CAL_MS … #define CAL_LATCH … #define CAL_PIT_LOOPS … #define CAL2_MS … #define CAL2_LATCH … #define CAL2_PIT_LOOPS … /* * Try to calibrate the TSC against the Programmable * Interrupt Timer and return the frequency of the TSC * in kHz. * * Return ULONG_MAX on failure to calibrate. */ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) { … } /* * This reads the current MSB of the PIT counter, and * checks if we are running on sufficiently fast and * non-virtualized hardware. * * Our expectations are: * * - the PIT is running at roughly 1.19MHz * * - each IO is going to take about 1us on real hardware, * but we allow it to be much faster (by a factor of 10) or * _slightly_ slower (ie we allow up to a 2us read+counter * update - anything else implies a unacceptably slow CPU * or PIT for the fast calibration to work. * * - with 256 PIT ticks to read the value, we have 214us to * see the same MSB (and overhead like doing a single TSC * read per MSB value etc). * * - We're doing 2 reads per loop (LSB, MSB), and we expect * them each to take about a microsecond on real hardware. * So we expect a count value of around 100. But we'll be * generous, and accept anything over 50. * * - if the PIT is stuck, and we see *many* more reads, we * return early (and the next caller of pit_expect_msb() * then consider it a failure when they don't see the * next expected value). * * These expectations mean that we know that we have seen the * transition from one expected value to another with a fairly * high accuracy, and we didn't miss any events. We can thus * use the TSC value at the transitions to calculate a pretty * good value for the TSC frequency. */ static inline int pit_verify_msb(unsigned char val) { … } static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap) { … } /* * How many MSB values do we want to see? We aim for * a maximum error rate of 500ppm (in practice the * real error is much smaller), but refuse to spend * more than 50ms on it. */ #define MAX_QUICK_PIT_MS … #define MAX_QUICK_PIT_ITERATIONS … static unsigned long quick_pit_calibrate(void) { … } /** * native_calibrate_tsc - determine TSC frequency * Determine TSC frequency via CPUID, else return 0. */ unsigned long native_calibrate_tsc(void) { … } static unsigned long cpu_khz_from_cpuid(void) { … } /* * calibrate cpu using pit, hpet, and ptimer methods. They are available * later in boot after acpi is initialized. */ static unsigned long pit_hpet_ptimer_calibrate_cpu(void) { … } /** * native_calibrate_cpu_early - can calibrate the cpu early in boot */ unsigned long native_calibrate_cpu_early(void) { … } /** * native_calibrate_cpu - calibrate the cpu */ static unsigned long native_calibrate_cpu(void) { … } void recalibrate_cpu_khz(void) { … } EXPORT_SYMBOL_GPL(…); static unsigned long long cyc2ns_suspend; void tsc_save_sched_clock_state(void) { … } /* * Even on processors with invariant TSC, TSC gets reset in some the * ACPI system sleep states. And in some systems BIOS seem to reinit TSC to * arbitrary value (still sync'd across cpu's) during resume from such sleep * states. To cope up with this, recompute the cyc2ns_offset for each cpu so * that sched_clock() continues from the point where it was left off during * suspend. */ void tsc_restore_sched_clock_state(void) { … } #ifdef CONFIG_CPU_FREQ /* * Frequency scaling support. Adjust the TSC based timer when the CPU frequency * changes. * * NOTE: On SMP the situation is not fixable in general, so simply mark the TSC * as unstable and give up in those cases. * * Should fix up last_tsc too. Currently gettimeofday in the * first tick after the change will be slightly wrong. */ static unsigned int ref_freq; static unsigned long loops_per_jiffy_ref; static unsigned long tsc_khz_ref; static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) { … } static struct notifier_block time_cpufreq_notifier_block = …; static int __init cpufreq_register_tsc_scaling(void) { … } core_initcall(cpufreq_register_tsc_scaling); #endif /* CONFIG_CPU_FREQ */ #define ART_CPUID_LEAF … #define ART_MIN_DENOMINATOR … /* * If ART is present detect the numerator:denominator to convert to TSC */ static void __init detect_art(void) { … } /* clocksource code */ static void tsc_resume(struct clocksource *cs) { … } /* * We used to compare the TSC to the cycle_last value in the clocksource * structure to avoid a nasty time-warp. This can be observed in a * very small window right after one CPU updated cycle_last under * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which * is smaller than the cycle_last reference value due to a TSC which * is slightly behind. This delta is nowhere else observable, but in * that case it results in a forward time jump in the range of hours * due to the unsigned delta calculation of the time keeping core * code, which is necessary to support wrapping clocksources like pm * timer. * * This sanity check is now done in the core timekeeping code. * checking the result of read_tsc() - cycle_last for being negative. * That works because CLOCKSOURCE_MASK(64) does not mask out any bit. */ static u64 read_tsc(struct clocksource *cs) { … } static void tsc_cs_mark_unstable(struct clocksource *cs) { … } static void tsc_cs_tick_stable(struct clocksource *cs) { … } static int tsc_cs_enable(struct clocksource *cs) { … } /* * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc() */ static struct clocksource clocksource_tsc_early = …; /* * Must mark VALID_FOR_HRES early such that when we unregister tsc_early * this one will immediately take over. We will only register if TSC has * been found good. */ static struct clocksource clocksource_tsc = …; void mark_tsc_unstable(char *reason) { … } EXPORT_SYMBOL_GPL(…); static void __init tsc_disable_clocksource_watchdog(void) { … } bool tsc_clocksource_watchdog_disabled(void) { … } static void __init check_system_tsc_reliable(void) { … } /* * Make an educated guess if the TSC is trustworthy and synchronized * over all CPUs. */ int unsynchronized_tsc(void) { … } static void tsc_refine_calibration_work(struct work_struct *work); static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work); /** * tsc_refine_calibration_work - Further refine tsc freq calibration * @work: ignored. * * This functions uses delayed work over a period of a * second to further refine the TSC freq value. Since this is * timer based, instead of loop based, we don't block the boot * process while this longer calibration is done. * * If there are any calibration anomalies (too many SMIs, etc), * or the refined calibration is off by 1% of the fast early * calibration, we throw out the new calibration and use the * early calibration. */ static void tsc_refine_calibration_work(struct work_struct *work) { … } static int __init init_tsc_clocksource(void) { … } /* * We use device_initcall here, to ensure we run after the hpet * is fully initialized, which may occur at fs_initcall time. */ device_initcall(init_tsc_clocksource); static bool __init determine_cpu_tsc_frequencies(bool early) { … } static unsigned long __init get_loops_per_jiffy(void) { … } static void __init tsc_enable_sched_clock(void) { … } void __init tsc_early_init(void) { … } void __init tsc_init(void) { … } #ifdef CONFIG_SMP /* * Check whether existing calibration data can be reused. */ unsigned long calibrate_delay_is_known(void) { … } #endif