linux/drivers/platform/x86/intel_ips.c

// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (c) 2009-2010 Intel Corporation
 *
 * Authors:
 *	Jesse Barnes <[email protected]>
 */

/*
 * Some Intel Ibex Peak based platforms support so-called "intelligent
 * power sharing", which allows the CPU and GPU to cooperate to maximize
 * performance within a given TDP (thermal design point).  This driver
 * performs the coordination between the CPU and GPU, monitors thermal and
 * power statistics in the platform, and initializes power monitoring
 * hardware.  It also provides a few tunables to control behavior.  Its
 * primary purpose is to safely allow CPU and GPU turbo modes to be enabled
 * by tracking power and thermal budget; secondarily it can boost turbo
 * performance by allocating more power or thermal budget to the CPU or GPU
 * based on available headroom and activity.
 *
 * The basic algorithm is driven by a 5s moving average of temperature.  If
 * thermal headroom is available, the CPU and/or GPU power clamps may be
 * adjusted upwards.  If we hit the thermal ceiling or a thermal trigger,
 * we scale back the clamp.  Aside from trigger events (when we're critically
 * close or over our TDP) we don't adjust the clamps more than once every
 * five seconds.
 *
 * The thermal device (device 31, function 6) has a set of registers that
 * are updated by the ME firmware.  The ME should also take the clamp values
 * written to those registers and write them to the CPU, but we currently
 * bypass that functionality and write the CPU MSR directly.
 *
 * UNSUPPORTED:
 *   - dual MCP configs
 *
 * TODO:
 *   - handle CPU hotplug
 *   - provide turbo enable/disable api
 *
 * Related documents:
 *   - CDI 403777, 403778 - Auburndale EDS vol 1 & 2
 *   - CDI 401376 - Ibex Peak EDS
 *   - ref 26037, 26641 - IPS BIOS spec
 *   - ref 26489 - Nehalem BIOS writer's guide
 *   - ref 26921 - Ibex Peak BIOS Specification
 */

#include <linux/debugfs.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/sched.h>
#include <linux/sched/loadavg.h>
#include <linux/seq_file.h>
#include <linux/string.h>
#include <linux/tick.h>
#include <linux/timer.h>
#include <linux/dmi.h>
#include <drm/intel/i915_drm.h>
#include <asm/msr.h>
#include <asm/processor.h>
#include <asm/cpu_device_id.h>
#include "intel_ips.h"

#include <linux/io-64-nonatomic-lo-hi.h>

#define PCI_DEVICE_ID_INTEL_THERMAL_SENSOR

/*
 * Package level MSRs for monitor/control
 */
#define PLATFORM_INFO
#define PLATFORM_TDP
#define PLATFORM_RATIO

#define IA32_MISC_ENABLE
#define IA32_MISC_TURBO_EN

#define TURBO_POWER_CURRENT_LIMIT
#define TURBO_TDC_OVR_EN
#define TURBO_TDC_MASK
#define TURBO_TDC_SHIFT
#define TURBO_TDP_OVR_EN
#define TURBO_TDP_MASK

/*
 * Core/thread MSRs for monitoring
 */
#define IA32_PERF_CTL
#define IA32_PERF_TURBO_DIS

/*
 * Thermal PCI device regs
 */
#define THM_CFG_TBAR
#define THM_CFG_TBAR_HI

#define THM_TSIU
#define THM_TSE
#define TSE_EN
#define THM_TSS
#define THM_TSTR
#define THM_TSTTP
#define THM_TSCO
#define THM_TSES
#define THM_TSGPEN
#define TSGPEN_HOT_LOHI
#define TSGPEN_CRIT_LOHI
#define THM_TSPC
#define THM_PPEC
#define THM_CTA
#define THM_PTA
#define PTA_SLOPE_MASK
#define PTA_SLOPE_SHIFT
#define PTA_OFFSET_MASK
#define THM_MGTA
#define MGTA_SLOPE_MASK
#define MGTA_SLOPE_SHIFT
#define MGTA_OFFSET_MASK
#define THM_TRC
#define TRC_CORE2_EN
#define TRC_THM_EN
#define TRC_C6_WAR
#define TRC_CORE1_EN
#define TRC_CORE_PWR
#define TRC_PCH_EN
#define TRC_MCH_EN
#define TRC_DIMM4
#define TRC_DIMM3
#define TRC_DIMM2
#define TRC_DIMM1
#define THM_TES
#define THM_TEN
#define TEN_UPDATE_EN
#define THM_PSC
#define PSC_NTG
#define PSC_NTPC
#define PSC_PP_DEF
#define PSP_PP_PC
#define PSP_PP_BAL
#define PSP_PP_GFX
#define PSP_PBRT
#define THM_CTV1
#define CTV_TEMP_ERROR
#define CTV_TEMP_MASK
#define CTV_
#define THM_CTV2
#define THM_CEC
#define THM_AE
#define THM_HTS
#define HTS_PCPL_MASK
#define HTS_PCPL_SHIFT
#define HTS_GPL_MASK
#define HTS_GPL_SHIFT
#define HTS_PP_MASK
#define HTS_PP_SHIFT
#define HTS_PP_DEF
#define HTS_PP_PROC
#define HTS_PP_BAL
#define HTS_PP_GFX
#define HTS_PCTD_DIS
#define HTS_GTD_DIS
#define HTS_PTL_MASK
#define HTS_PTL_SHIFT
#define HTS_NVV
#define THM_HTSHI
#define HTS2_PPL_MASK
#define HTS2_PRST_MASK
#define HTS2_PRST_SHIFT
#define HTS2_PRST_UNLOADED
#define HTS2_PRST_RUNNING
#define HTS2_PRST_TDISOP
#define HTS2_PRST_TDISHT
#define HTS2_PRST_TDISUSR
#define HTS2_PRST_TDISPLAT
#define HTS2_PRST_TDISPM
#define HTS2_PRST_TDISERR
#define THM_PTL
#define THM_MGTV
#define TV_MASK
#define TV_SHIFT
#define THM_PTV
#define PTV_MASK
#define THM_MMGPC
#define THM_MPPC
#define THM_MPCPC
#define THM_TSPIEN
#define TSPIEN_AUX_LOHI
#define TSPIEN_HOT_LOHI
#define TSPIEN_CRIT_LOHI
#define TSPIEN_AUX2_LOHI
#define THM_TSLOCK
#define THM_ATR
#define THM_TOF
#define THM_STS
#define STS_PCPL_MASK
#define STS_PCPL_SHIFT
#define STS_GPL_MASK
#define STS_GPL_SHIFT
#define STS_PP_MASK
#define STS_PP_SHIFT
#define STS_PP_DEF
#define STS_PP_PROC
#define STS_PP_BAL
#define STS_PP_GFX
#define STS_PCTD_DIS
#define STS_GTD_DIS
#define STS_PTL_MASK
#define STS_PTL_SHIFT
#define STS_NVV
#define THM_SEC
#define SEC_ACK
#define THM_TC3
#define THM_TC1
#define STS_PPL_MASK
#define STS_PPL_SHIFT
#define THM_TC2
#define THM_DTV
#define THM_ITV
#define ITV_ME_SEQNO_MASK
#define ITV_ME_SEQNO_SHIFT
#define ITV_MCH_TEMP_MASK
#define ITV_MCH_TEMP_SHIFT
#define ITV_PCH_TEMP_MASK

#define thm_readb(off)
#define thm_readw(off)
#define thm_readl(off)
#define thm_readq(off)

#define thm_writeb(off, val)
#define thm_writew(off, val)
#define thm_writel(off, val)

static const int IPS_ADJUST_PERIOD =; /* ms */
static bool late_i915_load =;

/* For initial average collection */
static const int IPS_SAMPLE_PERIOD =; /* ms */
static const int IPS_SAMPLE_WINDOW =; /* 5s moving window of samples */
#define IPS_SAMPLE_COUNT

/* Per-SKU limits */
struct ips_mcp_limits {};

/* Max temps are -10 degrees C to avoid PROCHOT# */

static struct ips_mcp_limits ips_sv_limits =;

static struct ips_mcp_limits ips_lv_limits =;

static struct ips_mcp_limits ips_ulv_limits =;

struct ips_driver {};

static bool
ips_gpu_turbo_enabled(struct ips_driver *ips);

/**
 * ips_cpu_busy - is CPU busy?
 * @ips: IPS driver struct
 *
 * Check CPU for load to see whether we should increase its thermal budget.
 *
 * RETURNS:
 * True if the CPU could use more power, false otherwise.
 */
static bool ips_cpu_busy(struct ips_driver *ips)
{}

/**
 * ips_cpu_raise - raise CPU power clamp
 * @ips: IPS driver struct
 *
 * Raise the CPU power clamp by %IPS_CPU_STEP, in accordance with TDP for
 * this platform.
 *
 * We do this by adjusting the TURBO_POWER_CURRENT_LIMIT MSR upwards (as
 * long as we haven't hit the TDP limit for the SKU).
 */
static void ips_cpu_raise(struct ips_driver *ips)
{}

/**
 * ips_cpu_lower - lower CPU power clamp
 * @ips: IPS driver struct
 *
 * Lower CPU power clamp b %IPS_CPU_STEP if possible.
 *
 * We do this by adjusting the TURBO_POWER_CURRENT_LIMIT MSR down, going
 * as low as the platform limits will allow (though we could go lower there
 * wouldn't be much point).
 */
static void ips_cpu_lower(struct ips_driver *ips)
{}

/**
 * do_enable_cpu_turbo - internal turbo enable function
 * @data: unused
 *
 * Internal function for actually updating MSRs.  When we enable/disable
 * turbo, we need to do it on each CPU; this function is the one called
 * by on_each_cpu() when needed.
 */
static void do_enable_cpu_turbo(void *data)
{}

/**
 * ips_enable_cpu_turbo - enable turbo mode on all CPUs
 * @ips: IPS driver struct
 *
 * Enable turbo mode by clearing the disable bit in IA32_PERF_CTL on
 * all logical threads.
 */
static void ips_enable_cpu_turbo(struct ips_driver *ips)
{}

/**
 * do_disable_cpu_turbo - internal turbo disable function
 * @data: unused
 *
 * Internal function for actually updating MSRs.  When we enable/disable
 * turbo, we need to do it on each CPU; this function is the one called
 * by on_each_cpu() when needed.
 */
static void do_disable_cpu_turbo(void *data)
{}

/**
 * ips_disable_cpu_turbo - disable turbo mode on all CPUs
 * @ips: IPS driver struct
 *
 * Disable turbo mode by setting the disable bit in IA32_PERF_CTL on
 * all logical threads.
 */
static void ips_disable_cpu_turbo(struct ips_driver *ips)
{}

/**
 * ips_gpu_busy - is GPU busy?
 * @ips: IPS driver struct
 *
 * Check GPU for load to see whether we should increase its thermal budget.
 * We need to call into the i915 driver in this case.
 *
 * RETURNS:
 * True if the GPU could use more power, false otherwise.
 */
static bool ips_gpu_busy(struct ips_driver *ips)
{}

/**
 * ips_gpu_raise - raise GPU power clamp
 * @ips: IPS driver struct
 *
 * Raise the GPU frequency/power if possible.  We need to call into the
 * i915 driver in this case.
 */
static void ips_gpu_raise(struct ips_driver *ips)
{}

/**
 * ips_gpu_lower - lower GPU power clamp
 * @ips: IPS driver struct
 *
 * Lower GPU frequency/power if possible.  Need to call i915.
 */
static void ips_gpu_lower(struct ips_driver *ips)
{}

/**
 * ips_enable_gpu_turbo - notify the gfx driver turbo is available
 * @ips: IPS driver struct
 *
 * Call into the graphics driver indicating that it can safely use
 * turbo mode.
 */
static void ips_enable_gpu_turbo(struct ips_driver *ips)
{}

/**
 * ips_disable_gpu_turbo - notify the gfx driver to disable turbo mode
 * @ips: IPS driver struct
 *
 * Request that the graphics driver disable turbo mode.
 */
static void ips_disable_gpu_turbo(struct ips_driver *ips)
{}

/**
 * mcp_exceeded - check whether we're outside our thermal & power limits
 * @ips: IPS driver struct
 *
 * Check whether the MCP is over its thermal or power budget.
 *
 * Returns: %true if the temp or power has exceeded its maximum, else %false
 */
static bool mcp_exceeded(struct ips_driver *ips)
{}

/**
 * cpu_exceeded - check whether a CPU core is outside its limits
 * @ips: IPS driver struct
 * @cpu: CPU number to check
 *
 * Check a given CPU's average temp or power is over its limit.
 *
 * Returns: %true if the temp or power has exceeded its maximum, else %false
 */
static bool cpu_exceeded(struct ips_driver *ips, int cpu)
{}

/**
 * mch_exceeded - check whether the GPU is over budget
 * @ips: IPS driver struct
 *
 * Check the MCH temp & power against their maximums.
 *
 * Returns: %true if the temp or power has exceeded its maximum, else %false
 */
static bool mch_exceeded(struct ips_driver *ips)
{}

/**
 * verify_limits - verify BIOS provided limits
 * @ips: IPS structure
 *
 * BIOS can optionally provide non-default limits for power and temp.  Check
 * them here and use the defaults if the BIOS values are not provided or
 * are otherwise unusable.
 */
static void verify_limits(struct ips_driver *ips)
{}

/**
 * update_turbo_limits - get various limits & settings from regs
 * @ips: IPS driver struct
 *
 * Update the IPS power & temp limits, along with turbo enable flags,
 * based on latest register contents.
 *
 * Used at init time and for runtime BIOS support, which requires polling
 * the regs for updates (as a result of AC->DC transition for example).
 *
 * LOCKING:
 * Caller must hold turbo_status_lock (outside of init)
 */
static void update_turbo_limits(struct ips_driver *ips)
{}

/**
 * ips_adjust - adjust power clamp based on thermal state
 * @data: ips driver structure
 *
 * Wake up every 5s or so and check whether we should adjust the power clamp.
 * Check CPU and GPU load to determine which needs adjustment.  There are
 * several things to consider here:
 *   - do we need to adjust up or down?
 *   - is CPU busy?
 *   - is GPU busy?
 *   - is CPU in turbo?
 *   - is GPU in turbo?
 *   - is CPU or GPU preferred? (CPU is default)
 *
 * So, given the above, we do the following:
 *   - up (TDP available)
 *     - CPU not busy, GPU not busy - nothing
 *     - CPU busy, GPU not busy - adjust CPU up
 *     - CPU not busy, GPU busy - adjust GPU up
 *     - CPU busy, GPU busy - adjust preferred unit up, taking headroom from
 *       non-preferred unit if necessary
 *   - down (at TDP limit)
 *     - adjust both CPU and GPU down if possible
 *
 *              |cpu+ gpu+      cpu+gpu-        cpu-gpu+        cpu-gpu-
 * cpu < gpu <  |cpu+gpu+       cpu+            gpu+            nothing
 * cpu < gpu >= |cpu+gpu-(mcp<) cpu+gpu-(mcp<)  gpu-            gpu-
 * cpu >= gpu < |cpu-gpu+(mcp<) cpu-            cpu-gpu+(mcp<)  cpu-
 * cpu >= gpu >=|cpu-gpu-       cpu-gpu-        cpu-gpu-        cpu-gpu-
 *
 * Returns: %0
 */
static int ips_adjust(void *data)
{}

/*
 * Helpers for reading out temp/power values and calculating their
 * averages for the decision making and monitoring functions.
 */

static u16 calc_avg_temp(struct ips_driver *ips, u16 *array)
{}

static u16 read_mgtv(struct ips_driver *ips)
{}

static u16 read_ptv(struct ips_driver *ips)
{}

static u16 read_ctv(struct ips_driver *ips, int cpu)
{}

static u32 get_cpu_power(struct ips_driver *ips, u32 *last, int period)
{}

static const u16 temp_decay_factor =;
static u16 update_average_temp(u16 avg, u16 val)
{}

static const u16 power_decay_factor =;
static u16 update_average_power(u32 avg, u32 val)
{}

static u32 calc_avg_power(struct ips_driver *ips, u32 *array)
{}

static void monitor_timeout(struct timer_list *t)
{}

/**
 * ips_monitor - temp/power monitoring thread
 * @data: ips driver structure
 *
 * This is the main function for the IPS driver.  It monitors power and
 * temperature in the MCP and adjusts CPU and GPU power clamps accordingly.
 *
 * We keep a 5s moving average of power consumption and temperature.  Using
 * that data, along with CPU vs GPU preference, we adjust the power clamps
 * up or down.
 *
 * Returns: %0 on success or -errno on error
 */
static int ips_monitor(void *data)
{}

/**
 * ips_irq_handler - handle temperature triggers and other IPS events
 * @irq: irq number
 * @arg: unused
 *
 * Handle temperature limit trigger events, generally by lowering the clamps.
 * If we're at a critical limit, we clamp back to the lowest possible value
 * to prevent emergency shutdown.
 *
 * Returns: IRQ_NONE or IRQ_HANDLED
 */
static irqreturn_t ips_irq_handler(int irq, void *arg)
{}

#ifndef CONFIG_DEBUG_FS
static void ips_debugfs_init(struct ips_driver *ips) { return; }
static void ips_debugfs_cleanup(struct ips_driver *ips) { return; }
#else

/* Expose current state and limits in debugfs if possible */

static int cpu_temp_show(struct seq_file *m, void *data)
{}
DEFINE_SHOW_ATTRIBUTE();

static int cpu_power_show(struct seq_file *m, void *data)
{}
DEFINE_SHOW_ATTRIBUTE();

static int cpu_clamp_show(struct seq_file *m, void *data)
{}
DEFINE_SHOW_ATTRIBUTE();

static int mch_temp_show(struct seq_file *m, void *data)
{}
DEFINE_SHOW_ATTRIBUTE();

static int mch_power_show(struct seq_file *m, void *data)
{}
DEFINE_SHOW_ATTRIBUTE();

static void ips_debugfs_cleanup(struct ips_driver *ips)
{}

static void ips_debugfs_init(struct ips_driver *ips)
{}
#endif /* CONFIG_DEBUG_FS */

/**
 * ips_detect_cpu - detect whether CPU supports IPS
 * @ips: IPS driver struct
 *
 * Walk our list and see if we're on a supported CPU.  If we find one,
 * return the limits for it.
 *
 * Returns: the &ips_mcp_limits struct that matches the boot CPU or %NULL
 */
static struct ips_mcp_limits *ips_detect_cpu(struct ips_driver *ips)
{}

/**
 * ips_get_i915_syms - try to get GPU control methods from i915 driver
 * @ips: IPS driver
 *
 * The i915 driver exports several interfaces to allow the IPS driver to
 * monitor and control graphics turbo mode.  If we can find them, we can
 * enable graphics turbo, otherwise we must disable it to avoid exceeding
 * thermal and power limits in the MCP.
 *
 * Returns: %true if the required symbols are found, else %false
 */
static bool ips_get_i915_syms(struct ips_driver *ips)
{}

static bool
ips_gpu_turbo_enabled(struct ips_driver *ips)
{}

void
ips_link_to_i915_driver(void)
{}
EXPORT_SYMBOL_GPL();

static const struct pci_device_id ips_id_table[] =;

MODULE_DEVICE_TABLE(pci, ips_id_table);

static int ips_blacklist_callback(const struct dmi_system_id *id)
{}

static const struct dmi_system_id ips_blacklist[] =;

static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id)
{}

static void ips_remove(struct pci_dev *dev)
{}

static struct pci_driver ips_pci_driver =;

module_pci_driver();

MODULE_LICENSE();
MODULE_AUTHOR();
MODULE_DESCRIPTION();