// SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2009-2010 Intel Corporation * * Authors: * Jesse Barnes <[email protected]> */ /* * Some Intel Ibex Peak based platforms support so-called "intelligent * power sharing", which allows the CPU and GPU to cooperate to maximize * performance within a given TDP (thermal design point). This driver * performs the coordination between the CPU and GPU, monitors thermal and * power statistics in the platform, and initializes power monitoring * hardware. It also provides a few tunables to control behavior. Its * primary purpose is to safely allow CPU and GPU turbo modes to be enabled * by tracking power and thermal budget; secondarily it can boost turbo * performance by allocating more power or thermal budget to the CPU or GPU * based on available headroom and activity. * * The basic algorithm is driven by a 5s moving average of temperature. If * thermal headroom is available, the CPU and/or GPU power clamps may be * adjusted upwards. If we hit the thermal ceiling or a thermal trigger, * we scale back the clamp. Aside from trigger events (when we're critically * close or over our TDP) we don't adjust the clamps more than once every * five seconds. * * The thermal device (device 31, function 6) has a set of registers that * are updated by the ME firmware. The ME should also take the clamp values * written to those registers and write them to the CPU, but we currently * bypass that functionality and write the CPU MSR directly. * * UNSUPPORTED: * - dual MCP configs * * TODO: * - handle CPU hotplug * - provide turbo enable/disable api * * Related documents: * - CDI 403777, 403778 - Auburndale EDS vol 1 & 2 * - CDI 401376 - Ibex Peak EDS * - ref 26037, 26641 - IPS BIOS spec * - ref 26489 - Nehalem BIOS writer's guide * - ref 26921 - Ibex Peak BIOS Specification */ #include <linux/debugfs.h> #include <linux/delay.h> #include <linux/interrupt.h> #include <linux/kernel.h> #include <linux/kthread.h> #include <linux/module.h> #include <linux/pci.h> #include <linux/sched.h> #include <linux/sched/loadavg.h> #include <linux/seq_file.h> #include <linux/string.h> #include <linux/tick.h> #include <linux/timer.h> #include <linux/dmi.h> #include <drm/intel/i915_drm.h> #include <asm/msr.h> #include <asm/processor.h> #include <asm/cpu_device_id.h> #include "intel_ips.h" #include <linux/io-64-nonatomic-lo-hi.h> #define PCI_DEVICE_ID_INTEL_THERMAL_SENSOR … /* * Package level MSRs for monitor/control */ #define PLATFORM_INFO … #define PLATFORM_TDP … #define PLATFORM_RATIO … #define IA32_MISC_ENABLE … #define IA32_MISC_TURBO_EN … #define TURBO_POWER_CURRENT_LIMIT … #define TURBO_TDC_OVR_EN … #define TURBO_TDC_MASK … #define TURBO_TDC_SHIFT … #define TURBO_TDP_OVR_EN … #define TURBO_TDP_MASK … /* * Core/thread MSRs for monitoring */ #define IA32_PERF_CTL … #define IA32_PERF_TURBO_DIS … /* * Thermal PCI device regs */ #define THM_CFG_TBAR … #define THM_CFG_TBAR_HI … #define THM_TSIU … #define THM_TSE … #define TSE_EN … #define THM_TSS … #define THM_TSTR … #define THM_TSTTP … #define THM_TSCO … #define THM_TSES … #define THM_TSGPEN … #define TSGPEN_HOT_LOHI … #define TSGPEN_CRIT_LOHI … #define THM_TSPC … #define THM_PPEC … #define THM_CTA … #define THM_PTA … #define PTA_SLOPE_MASK … #define PTA_SLOPE_SHIFT … #define PTA_OFFSET_MASK … #define THM_MGTA … #define MGTA_SLOPE_MASK … #define MGTA_SLOPE_SHIFT … #define MGTA_OFFSET_MASK … #define THM_TRC … #define TRC_CORE2_EN … #define TRC_THM_EN … #define TRC_C6_WAR … #define TRC_CORE1_EN … #define TRC_CORE_PWR … #define TRC_PCH_EN … #define TRC_MCH_EN … #define TRC_DIMM4 … #define TRC_DIMM3 … #define TRC_DIMM2 … #define TRC_DIMM1 … #define THM_TES … #define THM_TEN … #define TEN_UPDATE_EN … #define THM_PSC … #define PSC_NTG … #define PSC_NTPC … #define PSC_PP_DEF … #define PSP_PP_PC … #define PSP_PP_BAL … #define PSP_PP_GFX … #define PSP_PBRT … #define THM_CTV1 … #define CTV_TEMP_ERROR … #define CTV_TEMP_MASK … #define CTV_ #define THM_CTV2 … #define THM_CEC … #define THM_AE … #define THM_HTS … #define HTS_PCPL_MASK … #define HTS_PCPL_SHIFT … #define HTS_GPL_MASK … #define HTS_GPL_SHIFT … #define HTS_PP_MASK … #define HTS_PP_SHIFT … #define HTS_PP_DEF … #define HTS_PP_PROC … #define HTS_PP_BAL … #define HTS_PP_GFX … #define HTS_PCTD_DIS … #define HTS_GTD_DIS … #define HTS_PTL_MASK … #define HTS_PTL_SHIFT … #define HTS_NVV … #define THM_HTSHI … #define HTS2_PPL_MASK … #define HTS2_PRST_MASK … #define HTS2_PRST_SHIFT … #define HTS2_PRST_UNLOADED … #define HTS2_PRST_RUNNING … #define HTS2_PRST_TDISOP … #define HTS2_PRST_TDISHT … #define HTS2_PRST_TDISUSR … #define HTS2_PRST_TDISPLAT … #define HTS2_PRST_TDISPM … #define HTS2_PRST_TDISERR … #define THM_PTL … #define THM_MGTV … #define TV_MASK … #define TV_SHIFT … #define THM_PTV … #define PTV_MASK … #define THM_MMGPC … #define THM_MPPC … #define THM_MPCPC … #define THM_TSPIEN … #define TSPIEN_AUX_LOHI … #define TSPIEN_HOT_LOHI … #define TSPIEN_CRIT_LOHI … #define TSPIEN_AUX2_LOHI … #define THM_TSLOCK … #define THM_ATR … #define THM_TOF … #define THM_STS … #define STS_PCPL_MASK … #define STS_PCPL_SHIFT … #define STS_GPL_MASK … #define STS_GPL_SHIFT … #define STS_PP_MASK … #define STS_PP_SHIFT … #define STS_PP_DEF … #define STS_PP_PROC … #define STS_PP_BAL … #define STS_PP_GFX … #define STS_PCTD_DIS … #define STS_GTD_DIS … #define STS_PTL_MASK … #define STS_PTL_SHIFT … #define STS_NVV … #define THM_SEC … #define SEC_ACK … #define THM_TC3 … #define THM_TC1 … #define STS_PPL_MASK … #define STS_PPL_SHIFT … #define THM_TC2 … #define THM_DTV … #define THM_ITV … #define ITV_ME_SEQNO_MASK … #define ITV_ME_SEQNO_SHIFT … #define ITV_MCH_TEMP_MASK … #define ITV_MCH_TEMP_SHIFT … #define ITV_PCH_TEMP_MASK … #define thm_readb(off) … #define thm_readw(off) … #define thm_readl(off) … #define thm_readq(off) … #define thm_writeb(off, val) … #define thm_writew(off, val) … #define thm_writel(off, val) … static const int IPS_ADJUST_PERIOD = …; /* ms */ static bool late_i915_load = …; /* For initial average collection */ static const int IPS_SAMPLE_PERIOD = …; /* ms */ static const int IPS_SAMPLE_WINDOW = …; /* 5s moving window of samples */ #define IPS_SAMPLE_COUNT … /* Per-SKU limits */ struct ips_mcp_limits { … }; /* Max temps are -10 degrees C to avoid PROCHOT# */ static struct ips_mcp_limits ips_sv_limits = …; static struct ips_mcp_limits ips_lv_limits = …; static struct ips_mcp_limits ips_ulv_limits = …; struct ips_driver { … }; static bool ips_gpu_turbo_enabled(struct ips_driver *ips); /** * ips_cpu_busy - is CPU busy? * @ips: IPS driver struct * * Check CPU for load to see whether we should increase its thermal budget. * * RETURNS: * True if the CPU could use more power, false otherwise. */ static bool ips_cpu_busy(struct ips_driver *ips) { … } /** * ips_cpu_raise - raise CPU power clamp * @ips: IPS driver struct * * Raise the CPU power clamp by %IPS_CPU_STEP, in accordance with TDP for * this platform. * * We do this by adjusting the TURBO_POWER_CURRENT_LIMIT MSR upwards (as * long as we haven't hit the TDP limit for the SKU). */ static void ips_cpu_raise(struct ips_driver *ips) { … } /** * ips_cpu_lower - lower CPU power clamp * @ips: IPS driver struct * * Lower CPU power clamp b %IPS_CPU_STEP if possible. * * We do this by adjusting the TURBO_POWER_CURRENT_LIMIT MSR down, going * as low as the platform limits will allow (though we could go lower there * wouldn't be much point). */ static void ips_cpu_lower(struct ips_driver *ips) { … } /** * do_enable_cpu_turbo - internal turbo enable function * @data: unused * * Internal function for actually updating MSRs. When we enable/disable * turbo, we need to do it on each CPU; this function is the one called * by on_each_cpu() when needed. */ static void do_enable_cpu_turbo(void *data) { … } /** * ips_enable_cpu_turbo - enable turbo mode on all CPUs * @ips: IPS driver struct * * Enable turbo mode by clearing the disable bit in IA32_PERF_CTL on * all logical threads. */ static void ips_enable_cpu_turbo(struct ips_driver *ips) { … } /** * do_disable_cpu_turbo - internal turbo disable function * @data: unused * * Internal function for actually updating MSRs. When we enable/disable * turbo, we need to do it on each CPU; this function is the one called * by on_each_cpu() when needed. */ static void do_disable_cpu_turbo(void *data) { … } /** * ips_disable_cpu_turbo - disable turbo mode on all CPUs * @ips: IPS driver struct * * Disable turbo mode by setting the disable bit in IA32_PERF_CTL on * all logical threads. */ static void ips_disable_cpu_turbo(struct ips_driver *ips) { … } /** * ips_gpu_busy - is GPU busy? * @ips: IPS driver struct * * Check GPU for load to see whether we should increase its thermal budget. * We need to call into the i915 driver in this case. * * RETURNS: * True if the GPU could use more power, false otherwise. */ static bool ips_gpu_busy(struct ips_driver *ips) { … } /** * ips_gpu_raise - raise GPU power clamp * @ips: IPS driver struct * * Raise the GPU frequency/power if possible. We need to call into the * i915 driver in this case. */ static void ips_gpu_raise(struct ips_driver *ips) { … } /** * ips_gpu_lower - lower GPU power clamp * @ips: IPS driver struct * * Lower GPU frequency/power if possible. Need to call i915. */ static void ips_gpu_lower(struct ips_driver *ips) { … } /** * ips_enable_gpu_turbo - notify the gfx driver turbo is available * @ips: IPS driver struct * * Call into the graphics driver indicating that it can safely use * turbo mode. */ static void ips_enable_gpu_turbo(struct ips_driver *ips) { … } /** * ips_disable_gpu_turbo - notify the gfx driver to disable turbo mode * @ips: IPS driver struct * * Request that the graphics driver disable turbo mode. */ static void ips_disable_gpu_turbo(struct ips_driver *ips) { … } /** * mcp_exceeded - check whether we're outside our thermal & power limits * @ips: IPS driver struct * * Check whether the MCP is over its thermal or power budget. * * Returns: %true if the temp or power has exceeded its maximum, else %false */ static bool mcp_exceeded(struct ips_driver *ips) { … } /** * cpu_exceeded - check whether a CPU core is outside its limits * @ips: IPS driver struct * @cpu: CPU number to check * * Check a given CPU's average temp or power is over its limit. * * Returns: %true if the temp or power has exceeded its maximum, else %false */ static bool cpu_exceeded(struct ips_driver *ips, int cpu) { … } /** * mch_exceeded - check whether the GPU is over budget * @ips: IPS driver struct * * Check the MCH temp & power against their maximums. * * Returns: %true if the temp or power has exceeded its maximum, else %false */ static bool mch_exceeded(struct ips_driver *ips) { … } /** * verify_limits - verify BIOS provided limits * @ips: IPS structure * * BIOS can optionally provide non-default limits for power and temp. Check * them here and use the defaults if the BIOS values are not provided or * are otherwise unusable. */ static void verify_limits(struct ips_driver *ips) { … } /** * update_turbo_limits - get various limits & settings from regs * @ips: IPS driver struct * * Update the IPS power & temp limits, along with turbo enable flags, * based on latest register contents. * * Used at init time and for runtime BIOS support, which requires polling * the regs for updates (as a result of AC->DC transition for example). * * LOCKING: * Caller must hold turbo_status_lock (outside of init) */ static void update_turbo_limits(struct ips_driver *ips) { … } /** * ips_adjust - adjust power clamp based on thermal state * @data: ips driver structure * * Wake up every 5s or so and check whether we should adjust the power clamp. * Check CPU and GPU load to determine which needs adjustment. There are * several things to consider here: * - do we need to adjust up or down? * - is CPU busy? * - is GPU busy? * - is CPU in turbo? * - is GPU in turbo? * - is CPU or GPU preferred? (CPU is default) * * So, given the above, we do the following: * - up (TDP available) * - CPU not busy, GPU not busy - nothing * - CPU busy, GPU not busy - adjust CPU up * - CPU not busy, GPU busy - adjust GPU up * - CPU busy, GPU busy - adjust preferred unit up, taking headroom from * non-preferred unit if necessary * - down (at TDP limit) * - adjust both CPU and GPU down if possible * * |cpu+ gpu+ cpu+gpu- cpu-gpu+ cpu-gpu- * cpu < gpu < |cpu+gpu+ cpu+ gpu+ nothing * cpu < gpu >= |cpu+gpu-(mcp<) cpu+gpu-(mcp<) gpu- gpu- * cpu >= gpu < |cpu-gpu+(mcp<) cpu- cpu-gpu+(mcp<) cpu- * cpu >= gpu >=|cpu-gpu- cpu-gpu- cpu-gpu- cpu-gpu- * * Returns: %0 */ static int ips_adjust(void *data) { … } /* * Helpers for reading out temp/power values and calculating their * averages for the decision making and monitoring functions. */ static u16 calc_avg_temp(struct ips_driver *ips, u16 *array) { … } static u16 read_mgtv(struct ips_driver *ips) { … } static u16 read_ptv(struct ips_driver *ips) { … } static u16 read_ctv(struct ips_driver *ips, int cpu) { … } static u32 get_cpu_power(struct ips_driver *ips, u32 *last, int period) { … } static const u16 temp_decay_factor = …; static u16 update_average_temp(u16 avg, u16 val) { … } static const u16 power_decay_factor = …; static u16 update_average_power(u32 avg, u32 val) { … } static u32 calc_avg_power(struct ips_driver *ips, u32 *array) { … } static void monitor_timeout(struct timer_list *t) { … } /** * ips_monitor - temp/power monitoring thread * @data: ips driver structure * * This is the main function for the IPS driver. It monitors power and * temperature in the MCP and adjusts CPU and GPU power clamps accordingly. * * We keep a 5s moving average of power consumption and temperature. Using * that data, along with CPU vs GPU preference, we adjust the power clamps * up or down. * * Returns: %0 on success or -errno on error */ static int ips_monitor(void *data) { … } /** * ips_irq_handler - handle temperature triggers and other IPS events * @irq: irq number * @arg: unused * * Handle temperature limit trigger events, generally by lowering the clamps. * If we're at a critical limit, we clamp back to the lowest possible value * to prevent emergency shutdown. * * Returns: IRQ_NONE or IRQ_HANDLED */ static irqreturn_t ips_irq_handler(int irq, void *arg) { … } #ifndef CONFIG_DEBUG_FS static void ips_debugfs_init(struct ips_driver *ips) { return; } static void ips_debugfs_cleanup(struct ips_driver *ips) { return; } #else /* Expose current state and limits in debugfs if possible */ static int cpu_temp_show(struct seq_file *m, void *data) { … } DEFINE_SHOW_ATTRIBUTE(…); static int cpu_power_show(struct seq_file *m, void *data) { … } DEFINE_SHOW_ATTRIBUTE(…); static int cpu_clamp_show(struct seq_file *m, void *data) { … } DEFINE_SHOW_ATTRIBUTE(…); static int mch_temp_show(struct seq_file *m, void *data) { … } DEFINE_SHOW_ATTRIBUTE(…); static int mch_power_show(struct seq_file *m, void *data) { … } DEFINE_SHOW_ATTRIBUTE(…); static void ips_debugfs_cleanup(struct ips_driver *ips) { … } static void ips_debugfs_init(struct ips_driver *ips) { … } #endif /* CONFIG_DEBUG_FS */ /** * ips_detect_cpu - detect whether CPU supports IPS * @ips: IPS driver struct * * Walk our list and see if we're on a supported CPU. If we find one, * return the limits for it. * * Returns: the &ips_mcp_limits struct that matches the boot CPU or %NULL */ static struct ips_mcp_limits *ips_detect_cpu(struct ips_driver *ips) { … } /** * ips_get_i915_syms - try to get GPU control methods from i915 driver * @ips: IPS driver * * The i915 driver exports several interfaces to allow the IPS driver to * monitor and control graphics turbo mode. If we can find them, we can * enable graphics turbo, otherwise we must disable it to avoid exceeding * thermal and power limits in the MCP. * * Returns: %true if the required symbols are found, else %false */ static bool ips_get_i915_syms(struct ips_driver *ips) { … } static bool ips_gpu_turbo_enabled(struct ips_driver *ips) { … } void ips_link_to_i915_driver(void) { … } EXPORT_SYMBOL_GPL(…); static const struct pci_device_id ips_id_table[] = …; MODULE_DEVICE_TABLE(pci, ips_id_table); static int ips_blacklist_callback(const struct dmi_system_id *id) { … } static const struct dmi_system_id ips_blacklist[] = …; static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id) { … } static void ips_remove(struct pci_dev *dev) { … } static struct pci_driver ips_pci_driver = …; module_pci_driver(…) …; MODULE_LICENSE(…) …; MODULE_AUTHOR(…) …; MODULE_DESCRIPTION(…) …;