vmx.c | Explore in Territory

// SPDX-License-Identifier: GPL-2.0-only
/*
 * Kernel-based Virtual Machine driver for Linux
 *
 * This module enables machines with Intel VT-x extensions to run virtual
 * machines without emulation or binary translation.
 *
 * Copyright (C) 2006 Qumranet, Inc.
 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
 *
 * Authors:
 *   Avi Kivity   <[email protected]>
 *   Yaniv Kamay  <[email protected]>
 */
#define pr_fmt(fmt) …

#include <linux/highmem.h>
#include <linux/hrtimer.h>
#include <linux/kernel.h>
#include <linux/kvm_host.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/mod_devicetable.h>
#include <linux/mm.h>
#include <linux/objtool.h>
#include <linux/sched.h>
#include <linux/sched/smt.h>
#include <linux/slab.h>
#include <linux/tboot.h>
#include <linux/trace_events.h>
#include <linux/entry-kvm.h>

#include <asm/apic.h>
#include <asm/asm.h>
#include <asm/cpu.h>
#include <asm/cpu_device_id.h>
#include <asm/debugreg.h>
#include <asm/desc.h>
#include <asm/fpu/api.h>
#include <asm/fpu/xstate.h>
#include <asm/fred.h>
#include <asm/idtentry.h>
#include <asm/io.h>
#include <asm/irq_remapping.h>
#include <asm/reboot.h>
#include <asm/perf_event.h>
#include <asm/mmu_context.h>
#include <asm/mshyperv.h>
#include <asm/mwait.h>
#include <asm/spec-ctrl.h>
#include <asm/vmx.h>

#include <trace/events/ipi.h>

#include "capabilities.h"
#include "cpuid.h"
#include "hyperv.h"
#include "kvm_onhyperv.h"
#include "irq.h"
#include "kvm_cache_regs.h"
#include "lapic.h"
#include "mmu.h"
#include "nested.h"
#include "pmu.h"
#include "sgx.h"
#include "trace.h"
#include "vmcs.h"
#include "vmcs12.h"
#include "vmx.h"
#include "x86.h"
#include "x86_ops.h"
#include "smm.h"
#include "vmx_onhyperv.h"
#include "posted_intr.h"

MODULE_AUTHOR(…) …;
MODULE_DESCRIPTION(…) …;
MODULE_LICENSE(…) …;

#ifdef MODULE
static const struct x86_cpu_id vmx_cpu_id[] = {
	X86_MATCH_FEATURE(X86_FEATURE_VMX, NULL),
	{}
};
MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
#endif

bool __read_mostly enable_vpid = …;
module_param_named(vpid, enable_vpid, bool, 0444);

static bool __read_mostly enable_vnmi = …;
module_param_named(vnmi, enable_vnmi, bool, 0444);

bool __read_mostly flexpriority_enabled = …;
module_param_named(flexpriority, flexpriority_enabled, bool, 0444);

bool __read_mostly enable_ept = …;
module_param_named(ept, enable_ept, bool, 0444);

bool __read_mostly enable_unrestricted_guest = …;
module_param_named(unrestricted_guest,
			enable_unrestricted_guest, bool, 0444);

bool __read_mostly enable_ept_ad_bits = …;
module_param_named(eptad, enable_ept_ad_bits, bool, 0444);

static bool __read_mostly emulate_invalid_guest_state = …;
module_param(emulate_invalid_guest_state, bool, 0444);

static bool __read_mostly fasteoi = …;
module_param(fasteoi, bool, 0444);

module_param(enable_apicv, bool, 0444);

bool __read_mostly enable_ipiv = …;
module_param(enable_ipiv, bool, 0444);

/*
 * If nested=1, nested virtualization is supported, i.e., guests may use
 * VMX and be a hypervisor for its own guests. If nested=0, guests may not
 * use VMX instructions.
 */
static bool __read_mostly nested = …;
module_param(nested, bool, 0444);

bool __read_mostly enable_pml = …;
module_param_named(pml, enable_pml, bool, 0444);

static bool __read_mostly error_on_inconsistent_vmcs_config = …;
module_param(error_on_inconsistent_vmcs_config, bool, 0444);

static bool __read_mostly dump_invalid_vmcs = …;
module_param(dump_invalid_vmcs, bool, 0644);

#define MSR_BITMAP_MODE_X2APIC …
#define MSR_BITMAP_MODE_X2APIC_APICV …

#define KVM_VMX_TSC_MULTIPLIER_MAX …

/* Guest_tsc -> host_tsc conversion requires 64-bit division.  */
static int __read_mostly cpu_preemption_timer_multi;
static bool __read_mostly enable_preemption_timer = …;
#ifdef CONFIG_X86_64
module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
#endif

extern bool __read_mostly allow_smaller_maxphyaddr;
module_param(allow_smaller_maxphyaddr, bool, S_IRUGO);

#define KVM_VM_CR0_ALWAYS_OFF …
#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST …
#define KVM_VM_CR0_ALWAYS_ON …

#define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST …
#define KVM_PMODE_VM_CR4_ALWAYS_ON …
#define KVM_RMODE_VM_CR4_ALWAYS_ON …

#define RMODE_GUEST_OWNED_EFLAGS_BITS …

#define MSR_IA32_RTIT_STATUS_MASK …

/*
 * List of MSRs that can be directly passed to the guest.
 * In addition to these x2apic, PT and LBR MSRs are handled specially.
 */
static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = …;

/*
 * These 2 parameters are used to config the controls for Pause-Loop Exiting:
 * ple_gap:    upper bound on the amount of time between two successive
 *             executions of PAUSE in a loop. Also indicate if ple enabled.
 *             According to test, this time is usually smaller than 128 cycles.
 * ple_window: upper bound on the amount of time a guest is allowed to execute
 *             in a PAUSE loop. Tests indicate that most spinlocks are held for
 *             less than 2^12 cycles
 * Time is measured based on a counter that runs at the same rate as the TSC,
 * refer SDM volume 3b section 21.6.13 & 22.1.3.
 */
static unsigned int ple_gap = …;
module_param(ple_gap, uint, 0444);

static unsigned int ple_window = …;
module_param(ple_window, uint, 0444);

/* Default doubles per-vcpu window every exit. */
static unsigned int ple_window_grow = …;
module_param(ple_window_grow, uint, 0444);

/* Default resets per-vcpu window every exit to ple_window. */
static unsigned int ple_window_shrink = …;
module_param(ple_window_shrink, uint, 0444);

/* Default is to compute the maximum so we can never overflow. */
static unsigned int ple_window_max        = …;
module_param(ple_window_max, uint, 0444);

/* Default is SYSTEM mode, 1 for host-guest mode */
int __read_mostly pt_mode = …;
module_param(pt_mode, int, S_IRUGO);

struct x86_pmu_lbr __ro_after_init vmx_lbr_caps;

static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond);
static DEFINE_MUTEX(vmx_l1d_flush_mutex);

/* Storage for pre module init parameter parsing */
static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = …;

static const struct { … } vmentry_l1d_param[] = …;

#define L1D_CACHE_ORDER …
static void *vmx_l1d_flush_pages;

static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
{ … }

static int vmentry_l1d_flush_parse(const char *s)
{ … }

static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
{ … }

static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
{ … }

static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx)
{ … }

static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
{ … }

static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
{ … }

static const struct kernel_param_ops vmentry_l1d_flush_ops = …;
module_param_cb(…);

static u32 vmx_segment_access_rights(struct kvm_segment *var);

void vmx_vmexit(void);

#define vmx_insn_failed(fmt...) …

noinline void vmread_error(unsigned long field)
{ … }

#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
noinstr void vmread_error_trampoline2(unsigned long field, bool fault)
{
	if (fault) {
		kvm_spurious_fault();
	} else {
		instrumentation_begin();
		vmread_error(field);
		instrumentation_end();
	}
}
#endif

noinline void vmwrite_error(unsigned long field, unsigned long value)
{ … }

noinline void vmclear_error(struct vmcs *vmcs, u64 phys_addr)
{ … }

noinline void vmptrld_error(struct vmcs *vmcs, u64 phys_addr)
{ … }

noinline void invvpid_error(unsigned long ext, u16 vpid, gva_t gva)
{ … }

noinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa)
{ … }

static DEFINE_PER_CPU(struct vmcs *, vmxarea);
DEFINE_PER_CPU(struct vmcs *, current_vmcs);
/*
 * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed
 * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
 */
static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);

static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
static DEFINE_SPINLOCK(vmx_vpid_lock);

struct vmcs_config vmcs_config __ro_after_init;
struct vmx_capability vmx_capability __ro_after_init;

#define VMX_SEGMENT_FIELD(seg) …

static const struct kvm_vmx_segment_field { … } kvm_vmx_segment_fields[] = …;

static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
{ … }

static unsigned long host_idt_base;

#if IS_ENABLED(CONFIG_HYPERV)
static bool __read_mostly enlightened_vmcs = …;
module_param(enlightened_vmcs, bool, 0444);

static int hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu)
{ … }

static __init void hv_init_evmcs(void)
{ … }

static void hv_reset_evmcs(void)
{ … }

#else /* IS_ENABLED(CONFIG_HYPERV) */
static void hv_init_evmcs(void) {}
static void hv_reset_evmcs(void) {}
#endif /* IS_ENABLED(CONFIG_HYPERV) */

/*
 * Comment's format: document - errata name - stepping - processor name.
 * Refer from
 * https://www.virtualbox.org/svn/vbox/trunk/src/VBox/VMM/VMMR0/HMR0.cpp
 */
static u32 vmx_preemption_cpu_tfms[] = …;

static inline bool cpu_has_broken_vmx_preemption_timer(void)
{ … }

static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
{ … }

static int vmx_get_passthrough_msr_slot(u32 msr)
{ … }

struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
{ … }

static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
				  struct vmx_uret_msr *msr, u64 data)
{ … }

/*
 * Disable VMX and clear CR4.VMXE (even if VMXOFF faults)
 *
 * Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to
 * atomically track post-VMXON state, e.g. this may be called in NMI context.
 * Eat all faults as all other faults on VMXOFF faults are mode related, i.e.
 * faults are guaranteed to be due to the !post-VMXON check unless the CPU is
 * magically in RM, VM86, compat mode, or at CPL>0.
 */
static int kvm_cpu_vmxoff(void)
{ … }

static void vmx_emergency_disable(void)
{ … }

static void __loaded_vmcs_clear(void *arg)
{ … }

void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
{ … }

static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg,
				       unsigned field)
{ … }

static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg)
{ … }

static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg)
{ … }

static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg)
{ … }

static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg)
{ … }

void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu)
{ … }

/*
 * Check if MSR is intercepted for currently loaded MSR bitmap.
 */
static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr)
{ … }

unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
{ … }

static __always_inline void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
		unsigned long entry, unsigned long exit)
{ … }

int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr)
{ … }

static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
{ … }

static __always_inline void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
		unsigned long entry, unsigned long exit,
		unsigned long guest_val_vmcs, unsigned long host_val_vmcs,
		u64 guest_val, u64 host_val)
{ … }

static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
				  u64 guest_val, u64 host_val, bool entry_only)
{ … }

static bool update_transition_efer(struct vcpu_vmx *vmx)
{ … }

#ifdef CONFIG_X86_32
/*
 * On 32-bit kernels, VM exits still load the FS and GS bases from the
 * VMCS rather than the segment table.  KVM uses this helper to figure
 * out the current bases to poke them into the VMCS before entry.
 */
static unsigned long segment_base(u16 selector)
{
	struct desc_struct *table;
	unsigned long v;

	if (!(selector & ~SEGMENT_RPL_MASK))
		return 0;

	table = get_current_gdt_ro();

	if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) {
		u16 ldt_selector = kvm_read_ldt();

		if (!(ldt_selector & ~SEGMENT_RPL_MASK))
			return 0;

		table = (struct desc_struct *)segment_base(ldt_selector);
	}
	v = get_desc_base(&table[selector >> 3]);
	return v;
}
#endif

static inline bool pt_can_write_msr(struct vcpu_vmx *vmx)
{ … }

static inline bool pt_output_base_valid(struct kvm_vcpu *vcpu, u64 base)
{ … }

static inline void pt_load_msr(struct pt_ctx *ctx, u32 addr_range)
{ … }

static inline void pt_save_msr(struct pt_ctx *ctx, u32 addr_range)
{ … }

static void pt_guest_enter(struct vcpu_vmx *vmx)
{ … }

static void pt_guest_exit(struct vcpu_vmx *vmx)
{ … }

void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
			unsigned long fs_base, unsigned long gs_base)
{ … }

void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
{ … }

static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
{ … }

#ifdef CONFIG_X86_64
static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
{ … }

static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
{ … }
#endif

static void grow_ple_window(struct kvm_vcpu *vcpu)
{ … }

static void shrink_ple_window(struct kvm_vcpu *vcpu)
{ … }

void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
			struct loaded_vmcs *buddy)
{ … }

/*
 * Switches to specified vcpu, until a matching vcpu_put(), but assumes
 * vcpu mutex is already taken.
 */
void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{ … }

void vmx_vcpu_put(struct kvm_vcpu *vcpu)
{ … }

bool vmx_emulation_required(struct kvm_vcpu *vcpu)
{ … }

unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
{ … }

void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
{ … }

bool vmx_get_if_flag(struct kvm_vcpu *vcpu)
{ … }

u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
{ … }

void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
{ … }

static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
{ … }

int vmx_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
				  void *insn, int insn_len)
{ … }

static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
{ … }

/*
 * Recognizes a pending MTF VM-exit and records the nested state for later
 * delivery.
 */
void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu)
{ … }

int vmx_skip_emulated_instruction(struct kvm_vcpu *vcpu)
{ … }

static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
{ … }

void vmx_inject_exception(struct kvm_vcpu *vcpu)
{ … }

static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr,
			       bool load_into_hardware)
{ … }

/*
 * Configuring user return MSRs to automatically save, load, and restore MSRs
 * that need to be shoved into hardware when running the guest.  Note, omitting
 * an MSR here does _NOT_ mean it's not emulated, only that it will not be
 * loaded into hardware when running the guest.
 */
static void vmx_setup_uret_msrs(struct vcpu_vmx *vmx)
{ … }

u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu)
{ … }

u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
{ … }

void vmx_write_tsc_offset(struct kvm_vcpu *vcpu)
{ … }

void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu)
{ … }

/*
 * Userspace is allowed to set any supported IA32_FEATURE_CONTROL regardless of
 * guest CPUID.  Note, KVM allows userspace to set "VMX in SMX" to maintain
 * backwards compatibility even though KVM doesn't support emulating SMX.  And
 * because userspace set "VMX in SMX", the guest must also be allowed to set it,
 * e.g. if the MSR is left unlocked and the guest does a RMW operation.
 */
#define KVM_SUPPORTED_FEATURE_CONTROL …

static inline bool is_vmx_feature_control_msr_valid(struct vcpu_vmx *vmx,
						    struct msr_data *msr)
{ … }

int vmx_get_msr_feature(struct kvm_msr_entry *msr)
{ … }

/*
 * Reads an msr value (of 'msr_info->index') into 'msr_info->data'.
 * Returns 0 on success, non-0 otherwise.
 * Assumes vcpu_load() was already called.
 */
int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{ … }

static u64 nested_vmx_truncate_sysenter_addr(struct kvm_vcpu *vcpu,
						    u64 data)
{ … }

static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated)
{ … }

/*
 * Writes msr value into the appropriate "register".
 * Returns 0 on success, non-0 otherwise.
 * Assumes vcpu_load() was already called.
 */
int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{ … }

void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
{ … }

/*
 * There is no X86_FEATURE for SGX yet, but anyway we need to query CPUID
 * directly instead of going through cpu_has(), to ensure KVM is trapping
 * ENCLS whenever it's supported in hardware.  It does not matter whether
 * the host OS supports or has enabled SGX.
 */
static bool cpu_has_sgx(void)
{ … }

/*
 * Some cpus support VM_{ENTRY,EXIT}_IA32_PERF_GLOBAL_CTRL but they
 * can't be used due to errata where VM Exit may incorrectly clear
 * IA32_PERF_GLOBAL_CTRL[34:32]. Work around the errata by using the
 * MSR load mechanism to switch IA32_PERF_GLOBAL_CTRL.
 */
static bool cpu_has_perf_global_ctrl_bug(void)
{ … }

static int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, u32 msr, u32 *result)
{ … }

static u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr)
{ … }

static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
			     struct vmx_capability *vmx_cap)
{ … }

static bool __kvm_is_vmx_supported(void)
{ … }

static bool kvm_is_vmx_supported(void)
{ … }

int vmx_check_processor_compat(void)
{ … }

static int kvm_cpu_vmxon(u64 vmxon_pointer)
{ … }

int vmx_hardware_enable(void)
{ … }

static void vmclear_local_loaded_vmcss(void)
{ … }

void vmx_hardware_disable(void)
{ … }

struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags)
{ … }

void free_vmcs(struct vmcs *vmcs)
{ … }

/*
 * Free a VMCS, but before that VMCLEAR it on the CPU where it was last loaded
 */
void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
{ … }

int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
{ … }

static void free_kvm_area(void)
{ … }

static __init int alloc_kvm_area(void)
{ … }

static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
		struct kvm_segment *save)
{ … }

static void enter_pmode(struct kvm_vcpu *vcpu)
{ … }

static void fix_rmode_seg(int seg, struct kvm_segment *save)
{ … }

static void enter_rmode(struct kvm_vcpu *vcpu)
{ … }

int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
{ … }

#ifdef CONFIG_X86_64

static void enter_lmode(struct kvm_vcpu *vcpu)
{ … }

static void exit_lmode(struct kvm_vcpu *vcpu)
{ … }

#endif

void vmx_flush_tlb_all(struct kvm_vcpu *vcpu)
{ … }

static inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu)
{ … }

void vmx_flush_tlb_current(struct kvm_vcpu *vcpu)
{ … }

void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
{ … }

void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu)
{ … }

void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu)
{ … }

void ept_save_pdptrs(struct kvm_vcpu *vcpu)
{ … }

#define CR3_EXITING_BITS …

bool vmx_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{ … }

void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{ … }

static int vmx_get_max_ept_level(void)
{ … }

u64 construct_eptp(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level)
{ … }

void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level)
{ … }

bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{ … }

void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{ … }

void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
{ … }

u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
{ … }

int vmx_get_cpl(struct kvm_vcpu *vcpu)
{ … }

static u32 vmx_segment_access_rights(struct kvm_segment *var)
{ … }

void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
{ … }

void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
{ … }

void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
{ … }

void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
{ … }

void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
{ … }

void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
{ … }

void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
{ … }

static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
{ … }

static bool code_segment_valid(struct kvm_vcpu *vcpu)
{ … }

static bool stack_segment_valid(struct kvm_vcpu *vcpu)
{ … }

static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
{ … }

static bool tr_valid(struct kvm_vcpu *vcpu)
{ … }

static bool ldtr_valid(struct kvm_vcpu *vcpu)
{ … }

static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
{ … }

/*
 * Check if guest state is valid. Returns true if valid, false if
 * not.
 * We assume that registers are always usable
 */
bool __vmx_guest_state_valid(struct kvm_vcpu *vcpu)
{ … }

static int init_rmode_tss(struct kvm *kvm, void __user *ua)
{ … }

static int init_rmode_identity_map(struct kvm *kvm)
{ … }

static void seg_setup(int seg)
{ … }

int allocate_vpid(void)
{ … }

void free_vpid(int vpid)
{ … }

static void vmx_msr_bitmap_l01_changed(struct vcpu_vmx *vmx)
{ … }

void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
{ … }

void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
{ … }

static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu)
{ … }

void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu)
{ … }

void vmx_msr_filter_changed(struct kvm_vcpu *vcpu)
{ … }

static inline void kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
						     int pi_vec)
{ … }

static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
						int vector)
{ … }
/*
 * Send interrupt to vcpu via posted interrupt way.
 * 1. If target vcpu is running(non-root mode), send posted interrupt
 * notification to vcpu and hardware will sync PIR to vIRR atomically.
 * 2. If target vcpu isn't running(root mode), kick it to pick up the
 * interrupt from PIR in next vmentry.
 */
static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
{ … }

void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
			   int trig_mode, int vector)
{ … }

/*
 * Set up the vmcs's constant host-state fields, i.e., host-state fields that
 * will not change in the lifetime of the guest.
 * Note that host-state that does change is set elsewhere. E.g., host-state
 * that is set differently for each CPU is set in vmx_vcpu_load(), not here.
 */
void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
{ … }

void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
{ … }

static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
{ … }

static u32 vmx_vmentry_ctrl(void)
{ … }

static u32 vmx_vmexit_ctrl(void)
{ … }

void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
{ … }

static u32 vmx_exec_control(struct vcpu_vmx *vmx)
{ … }

static u64 vmx_tertiary_exec_control(struct vcpu_vmx *vmx)
{ … }

/*
 * Adjust a single secondary execution control bit to intercept/allow an
 * instruction in the guest.  This is usually done based on whether or not a
 * feature has been exposed to the guest in order to correctly emulate faults.
 */
static inline void
vmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control,
				  u32 control, bool enabled, bool exiting)
{ … }

/*
 * Wrapper macro for the common case of adjusting a secondary execution control
 * based on a single guest CPUID bit, with a dedicated feature bit.  This also
 * verifies that the control is actually supported by KVM and hardware.
 */
#define vmx_adjust_sec_exec_control(vmx, exec_control, name, feat_name, ctrl_name, exiting) …

/* More macro magic for ENABLE_/opt-in versus _EXITING/opt-out controls. */
#define vmx_adjust_sec_exec_feature(vmx, exec_control, lname, uname) …

#define vmx_adjust_sec_exec_exiting(vmx, exec_control, lname, uname) …

static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
{ … }

static inline int vmx_get_pid_table_order(struct kvm *kvm)
{ … }

static int vmx_alloc_ipiv_pid_table(struct kvm *kvm)
{ … }

int vmx_vcpu_precreate(struct kvm *kvm)
{ … }

#define VMX_XSS_EXIT_BITMAP …

static void init_vmcs(struct vcpu_vmx *vmx)
{ … }

static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu)
{ … }

void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
{ … }

void vmx_enable_irq_window(struct kvm_vcpu *vcpu)
{ … }

void vmx_enable_nmi_window(struct kvm_vcpu *vcpu)
{ … }

void vmx_inject_irq(struct kvm_vcpu *vcpu, bool reinjected)
{ … }

void vmx_inject_nmi(struct kvm_vcpu *vcpu)
{ … }

bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
{ … }

void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
{ … }

bool vmx_nmi_blocked(struct kvm_vcpu *vcpu)
{ … }

int vmx_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
{ … }

bool __vmx_interrupt_blocked(struct kvm_vcpu *vcpu)
{ … }

bool vmx_interrupt_blocked(struct kvm_vcpu *vcpu)
{ … }

int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
{ … }

int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
{ … }

int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
{ … }

static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
{ … }

static int handle_rmode_exception(struct kvm_vcpu *vcpu,
				  int vec, u32 err_code)
{ … }

static int handle_machine_check(struct kvm_vcpu *vcpu)
{ … }

/*
 * If the host has split lock detection disabled, then #AC is
 * unconditionally injected into the guest, which is the pre split lock
 * detection behaviour.
 *
 * If the host has split lock detection enabled then #AC is
 * only injected into the guest when:
 *  - Guest CPL == 3 (user mode)
 *  - Guest has #AC detection enabled in CR0
 *  - Guest EFLAGS has AC bit set
 */
bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu)
{ … }

static int handle_exception_nmi(struct kvm_vcpu *vcpu)
{ … }

static __always_inline int handle_external_interrupt(struct kvm_vcpu *vcpu)
{ … }

static int handle_triple_fault(struct kvm_vcpu *vcpu)
{ … }

static int handle_io(struct kvm_vcpu *vcpu)
{ … }

void vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
{ … }

/* called to set cr0 as appropriate for a mov-to-cr0 exit. */
static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
{ … }

static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
{ … }

static int handle_desc(struct kvm_vcpu *vcpu)
{ … }

static int handle_cr(struct kvm_vcpu *vcpu)
{ … }

static int handle_dr(struct kvm_vcpu *vcpu)
{ … }

void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
{ … }

void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
{ … }

static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
{ … }

static int handle_interrupt_window(struct kvm_vcpu *vcpu)
{ … }

static int handle_invlpg(struct kvm_vcpu *vcpu)
{ … }

static int handle_apic_access(struct kvm_vcpu *vcpu)
{ … }

static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
{ … }

static int handle_apic_write(struct kvm_vcpu *vcpu)
{ … }

static int handle_task_switch(struct kvm_vcpu *vcpu)
{ … }

static int handle_ept_violation(struct kvm_vcpu *vcpu)
{ … }

static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
{ … }

static int handle_nmi_window(struct kvm_vcpu *vcpu)
{ … }

static bool vmx_emulation_required_with_pending_exception(struct kvm_vcpu *vcpu)
{ … }

static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
{ … }

int vmx_vcpu_pre_run(struct kvm_vcpu *vcpu)
{ … }

/*
 * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
 * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
 */
static int handle_pause(struct kvm_vcpu *vcpu)
{ … }

static int handle_monitor_trap(struct kvm_vcpu *vcpu)
{ … }

static int handle_invpcid(struct kvm_vcpu *vcpu)
{ … }

static int handle_pml_full(struct kvm_vcpu *vcpu)
{ … }

static fastpath_t handle_fastpath_preemption_timer(struct kvm_vcpu *vcpu,
						   bool force_immediate_exit)
{ … }

static int handle_preemption_timer(struct kvm_vcpu *vcpu)
{ … }

/*
 * When nested=0, all VMX instruction VM Exits filter here.  The handlers
 * are overwritten by nested_vmx_setup() when nested=1.
 */
static int handle_vmx_instruction(struct kvm_vcpu *vcpu)
{ … }

#ifndef CONFIG_X86_SGX_KVM
static int handle_encls(struct kvm_vcpu *vcpu)
{
	/*
	 * SGX virtualization is disabled.  There is no software enable bit for
	 * SGX, so KVM intercepts all ENCLS leafs and injects a #UD to prevent
	 * the guest from executing ENCLS (when SGX is supported by hardware).
	 */
	kvm_queue_exception(vcpu, UD_VECTOR);
	return 1;
}
#endif /* CONFIG_X86_SGX_KVM */

static int handle_bus_lock_vmexit(struct kvm_vcpu *vcpu)
{ … }

static int handle_notify(struct kvm_vcpu *vcpu)
{ … }

/*
 * The exit handlers return 1 if the exit was handled fully and guest execution
 * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
 * to be done to userspace and return 0.
 */
static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = …;

static const int kvm_vmx_max_exit_handlers = …;

void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
		       u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code)
{ … }

static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
{ … }

static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu)
{ … }

static void vmx_dump_sel(char *name, uint32_t sel)
{ … }

static void vmx_dump_dtsel(char *name, uint32_t limit)
{ … }

static void vmx_dump_msrs(char *name, struct vmx_msrs *m)
{ … }

void dump_vmcs(struct kvm_vcpu *vcpu)
{ … }

/*
 * The guest has exited.  See if we can fix it or if we need userspace
 * assistance.
 */
static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
{ … }

int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
{ … }

/*
 * Software based L1D cache flush which is used when microcode providing
 * the cache control MSR is not loaded.
 *
 * The L1D cache is 32 KiB on Nehalem and later microarchitectures, but to
 * flush it is required to read in 64 KiB because the replacement algorithm
 * is not exactly LRU. This could be sized at runtime via topology
 * information but as all relevant affected CPUs have 32KiB L1D cache size
 * there is no point in doing so.
 */
static noinstr void vmx_l1d_flush(struct kvm_vcpu *vcpu)
{ … }

void vmx_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
{ … }

void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
{ … }

void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
{ … }

void vmx_hwapic_isr_update(int max_isr)
{ … }

static void vmx_set_rvi(int vector)
{ … }

void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
{ … }

int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
{ … }

void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
{ … }

void vmx_apicv_pre_state_restore(struct kvm_vcpu *vcpu)
{ … }

void vmx_do_interrupt_irqoff(unsigned long entry);
void vmx_do_nmi_irqoff(void);

static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
{ … }

static void handle_exception_irqoff(struct kvm_vcpu *vcpu, u32 intr_info)
{ … }

static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu,
					     u32 intr_info)
{ … }

void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
{ … }

/*
 * The kvm parameter can be NULL (module initialization, or invocation before
 * VM creation). Be sure to check the kvm parameter before using it.
 */
bool vmx_has_emulated_msr(struct kvm *kvm, u32 index)
{ … }

static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
{ … }

static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
				      u32 idt_vectoring_info,
				      int instr_len_field,
				      int error_code_field)
{ … }

static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
{ … }

void vmx_cancel_injection(struct kvm_vcpu *vcpu)
{ … }

static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
{ … }

static void vmx_update_hv_timer(struct kvm_vcpu *vcpu, bool force_immediate_exit)
{ … }

void noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
{ … }

void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx,
					unsigned int flags)
{ … }

static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu,
					     bool force_immediate_exit)
{ … }

static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
					unsigned int flags)
{ … }

fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
{ … }

void vmx_vcpu_free(struct kvm_vcpu *vcpu)
{ … }

int vmx_vcpu_create(struct kvm_vcpu *vcpu)
{ … }

#define L1TF_MSG_SMT …
#define L1TF_MSG_L1D …

int vmx_vm_init(struct kvm *kvm)
{ … }

u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
{ … }

static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx, u32 new_ctl)
{ … }

/*
 * Generate MSR_IA32_VMX_CR{0,4}_FIXED1 according to CPUID. Only set bits
 * (indicating "allowed-1") if they are supported in the guest's CPUID.
 */
static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
{ … }

static void update_intel_pt_cfg(struct kvm_vcpu *vcpu)
{ … }

void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
{ … }

static __init u64 vmx_get_perf_capabilities(void)
{ … }

static __init void vmx_set_cpu_caps(void)
{ … }

static int vmx_check_intercept_io(struct kvm_vcpu *vcpu,
				  struct x86_instruction_info *info)
{ … }

int vmx_check_intercept(struct kvm_vcpu *vcpu,
			struct x86_instruction_info *info,
			enum x86_intercept_stage stage,
			struct x86_exception *exception)
{ … }

#ifdef CONFIG_X86_64
/* (a << shift) / divisor, return 1 if overflow otherwise 0 */
static inline int u64_shl_div_u64(u64 a, unsigned int shift,
				  u64 divisor, u64 *result)
{ … }

int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
		     bool *expired)
{ … }

void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
{ … }
#endif

void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu)
{ … }

void vmx_setup_mce(struct kvm_vcpu *vcpu)
{ … }

#ifdef CONFIG_KVM_SMM
int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
{ … }

int vmx_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
{ … }

int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
{ … }

void vmx_enable_smi_window(struct kvm_vcpu *vcpu)
{ … }
#endif

bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
{ … }

void vmx_migrate_timers(struct kvm_vcpu *vcpu)
{ … }

void vmx_hardware_unsetup(void)
{ … }

void vmx_vm_destroy(struct kvm *kvm)
{ … }

/*
 * Note, the SDM states that the linear address is masked *after* the modified
 * canonicality check, whereas KVM masks (untags) the address and then performs
 * a "normal" canonicality check.  Functionally, the two methods are identical,
 * and when the masking occurs relative to the canonicality check isn't visible
 * to software, i.e. KVM's behavior doesn't violate the SDM.
 */
gva_t vmx_get_untagged_addr(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags)
{ … }

static unsigned int vmx_handle_intel_pt_intr(void)
{ … }

static __init void vmx_setup_user_return_msrs(void)
{ … }

static void __init vmx_setup_me_spte_mask(void)
{ … }

__init int vmx_hardware_setup(void)
{ … }

static void vmx_cleanup_l1d_flush(void)
{ … }

static void __vmx_exit(void)
{ … }

static void vmx_exit(void)
{ … }
module_exit(vmx_exit);

static int __init vmx_init(void)
{ … }
module_init(…) …;
linux/arch/x86/kvm/vmx/vmx.c