X86SpeculativeLoadHardening.cpp | Explore in Territory

//====- X86SpeculativeLoadHardening.cpp - A Spectre v1 mitigation ---------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
///
/// Provide a pass which mitigates speculative execution attacks which operate
/// by speculating incorrectly past some predicate (a type check, bounds check,
/// or other condition) to reach a load with invalid inputs and leak the data
/// accessed by that load using a side channel out of the speculative domain.
///
/// For details on the attacks, see the first variant in both the Project Zero
/// writeup and the Spectre paper:
/// https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html
/// https://spectreattack.com/spectre.pdf
///
//===----------------------------------------------------------------------===//

#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86InstrInfo.h"
#include "X86Subtarget.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineSSAUpdater.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/MC/MCSchedule.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
#include <iterator>
#include <optional>
#include <utility>

usingnamespacellvm;

#define PASS_KEY …
#define DEBUG_TYPE …

STATISTIC(NumCondBranchesTraced, "Number of conditional branches traced");
STATISTIC(NumBranchesUntraced, "Number of branches unable to trace");
STATISTIC(NumAddrRegsHardened,
          "Number of address mode used registers hardaned");
STATISTIC(NumPostLoadRegsHardened,
          "Number of post-load register values hardened");
STATISTIC(NumCallsOrJumpsHardened,
          "Number of calls or jumps requiring extra hardening");
STATISTIC(NumInstsInserted, "Number of instructions inserted");
STATISTIC(NumLFENCEsInserted, "Number of lfence instructions inserted");

static cl::opt<bool> EnableSpeculativeLoadHardening(
    "x86-speculative-load-hardening",
    cl::desc("Force enable speculative load hardening"), cl::init(false),
    cl::Hidden);

static cl::opt<bool> HardenEdgesWithLFENCE(
    PASS_KEY "-lfence",
    cl::desc(
        "Use LFENCE along each conditional edge to harden against speculative "
        "loads rather than conditional movs and poisoned pointers."),
    cl::init(false), cl::Hidden);

static cl::opt<bool> EnablePostLoadHardening(
    PASS_KEY "-post-load",
    cl::desc("Harden the value loaded *after* it is loaded by "
             "flushing the loaded bits to 1. This is hard to do "
             "in general but can be done easily for GPRs."),
    cl::init(true), cl::Hidden);

static cl::opt<bool> FenceCallAndRet(
    PASS_KEY "-fence-call-and-ret",
    cl::desc("Use a full speculation fence to harden both call and ret edges "
             "rather than a lighter weight mitigation."),
    cl::init(false), cl::Hidden);

static cl::opt<bool> HardenInterprocedurally(
    PASS_KEY "-ip",
    cl::desc("Harden interprocedurally by passing our state in and out of "
             "functions in the high bits of the stack pointer."),
    cl::init(true), cl::Hidden);

static cl::opt<bool>
    HardenLoads(PASS_KEY "-loads",
                cl::desc("Sanitize loads from memory. When disable, no "
                         "significant security is provided."),
                cl::init(true), cl::Hidden);

static cl::opt<bool> HardenIndirectCallsAndJumps(
    PASS_KEY "-indirect",
    cl::desc("Harden indirect calls and jumps against using speculatively "
             "stored attacker controlled addresses. This is designed to "
             "mitigate Spectre v1.2 style attacks."),
    cl::init(true), cl::Hidden);

namespace {

class X86SpeculativeLoadHardeningPass : public MachineFunctionPass { … };

} // end anonymous namespace

char X86SpeculativeLoadHardeningPass::ID = …;

void X86SpeculativeLoadHardeningPass::getAnalysisUsage(
    AnalysisUsage &AU) const { … }

static MachineBasicBlock &splitEdge(MachineBasicBlock &MBB,
                                    MachineBasicBlock &Succ, int SuccCount,
                                    MachineInstr *Br, MachineInstr *&UncondBr,
                                    const X86InstrInfo &TII) { … }

/// Removing duplicate PHI operands to leave the PHI in a canonical and
/// predictable form.
///
/// FIXME: It's really frustrating that we have to do this, but SSA-form in MIR
/// isn't what you might expect. We may have multiple entries in PHI nodes for
/// a single predecessor. This makes CFG-updating extremely complex, so here we
/// simplify all PHI nodes to a model even simpler than the IR's model: exactly
/// one entry per predecessor, regardless of how many edges there are.
static void canonicalizePHIOperands(MachineFunction &MF) { … }

/// Helper to scan a function for loads vulnerable to misspeculation that we
/// want to harden.
///
/// We use this to avoid making changes to functions where there is nothing we
/// need to do to harden against misspeculation.
static bool hasVulnerableLoad(MachineFunction &MF) { … }

bool X86SpeculativeLoadHardeningPass::runOnMachineFunction(
    MachineFunction &MF) { … }

/// Implements the naive hardening approach of putting an LFENCE after every
/// potentially mis-predicted control flow construct.
///
/// We include this as an alternative mostly for the purpose of comparison. The
/// performance impact of this is expected to be extremely severe and not
/// practical for any real-world users.
void X86SpeculativeLoadHardeningPass::hardenEdgesWithLFENCE(
    MachineFunction &MF) { … }

SmallVector<X86SpeculativeLoadHardeningPass::BlockCondInfo, 16>
X86SpeculativeLoadHardeningPass::collectBlockCondInfo(MachineFunction &MF) { … }

/// Trace the predicate state through the CFG, instrumenting each conditional
/// branch such that misspeculation through an edge will poison the predicate
/// state.
///
/// Returns the list of inserted CMov instructions so that they can have their
/// uses of the predicate state rewritten into proper SSA form once it is
/// complete.
SmallVector<MachineInstr *, 16>
X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG(
    MachineFunction &MF, ArrayRef<BlockCondInfo> Infos) { … }

/// Compute the register class for the unfolded load.
///
/// FIXME: This should probably live in X86InstrInfo, potentially by adding
/// a way to unfold into a newly created vreg rather than requiring a register
/// input.
static const TargetRegisterClass *
getRegClassForUnfoldedLoad(MachineFunction &MF, const X86InstrInfo &TII,
                           unsigned Opcode) { … }

void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads(
    MachineFunction &MF) { … }

/// Trace the predicate state through indirect branches, instrumenting them to
/// poison the state if a target is reached that does not match the expected
/// target.
///
/// This is designed to mitigate Spectre variant 1 attacks where an indirect
/// branch is trained to predict a particular target and then mispredicts that
/// target in a way that can leak data. Despite using an indirect branch, this
/// is really a variant 1 style attack: it does not steer execution to an
/// arbitrary or attacker controlled address, and it does not require any
/// special code executing next to the victim. This attack can also be mitigated
/// through retpolines, but those require either replacing indirect branches
/// with conditional direct branches or lowering them through a device that
/// blocks speculation. This mitigation can replace these retpoline-style
/// mitigations for jump tables and other indirect branches within a function
/// when variant 2 isn't a risk while allowing limited speculation. Indirect
/// calls, however, cannot be mitigated through this technique without changing
/// the ABI in a fundamental way.
SmallVector<MachineInstr *, 16>
X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches(
    MachineFunction &MF) { … }

// Returns true if the MI has EFLAGS as a register def operand and it's live,
// otherwise it returns false
static bool isEFLAGSDefLive(const MachineInstr &MI) { … }

static bool isEFLAGSLive(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                         const TargetRegisterInfo &TRI) { … }

/// Trace the predicate state through each of the blocks in the function,
/// hardening everything necessary along the way.
///
/// We call this routine once the initial predicate state has been established
/// for each basic block in the function in the SSA updater. This routine traces
/// it through the instructions within each basic block, and for non-returning
/// blocks informs the SSA updater about the final state that lives out of the
/// block. Along the way, it hardens any vulnerable instruction using the
/// currently valid predicate state. We have to do these two things together
/// because the SSA updater only works across blocks. Within a block, we track
/// the current predicate state directly and update it as it changes.
///
/// This operates in two passes over each block. First, we analyze the loads in
/// the block to determine which strategy will be used to harden them: hardening
/// the address or hardening the loaded value when loaded into a register
/// amenable to hardening. We have to process these first because the two
/// strategies may interact -- later hardening may change what strategy we wish
/// to use. We also will analyze data dependencies between loads and avoid
/// hardening those loads that are data dependent on a load with a hardened
/// address. We also skip hardening loads already behind an LFENCE as that is
/// sufficient to harden them against misspeculation.
///
/// Second, we actively trace the predicate state through the block, applying
/// the hardening steps we determined necessary in the first pass as we go.
///
/// These two passes are applied to each basic block. We operate one block at a
/// time to simplify reasoning about reachability and sequencing.
void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden(
    MachineFunction &MF) { … }

/// Save EFLAGS into the returned GPR. This can in turn be restored with
/// `restoreEFLAGS`.
///
/// Note that LLVM can only lower very simple patterns of saved and restored
/// EFLAGS registers. The restore should always be within the same basic block
/// as the save so that no PHI nodes are inserted.
unsigned X86SpeculativeLoadHardeningPass::saveEFLAGS(
    MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
    const DebugLoc &Loc) { … }

/// Restore EFLAGS from the provided GPR. This should be produced by
/// `saveEFLAGS`.
///
/// This must be done within the same basic block as the save in order to
/// reliably lower.
void X86SpeculativeLoadHardeningPass::restoreEFLAGS(
    MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
    const DebugLoc &Loc, Register Reg) { … }

/// Takes the current predicate state (in a register) and merges it into the
/// stack pointer. The state is essentially a single bit, but we merge this in
/// a way that won't form non-canonical pointers and also will be preserved
/// across normal stack adjustments.
void X86SpeculativeLoadHardeningPass::mergePredStateIntoSP(
    MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
    const DebugLoc &Loc, unsigned PredStateReg) { … }

/// Extracts the predicate state stored in the high bits of the stack pointer.
unsigned X86SpeculativeLoadHardeningPass::extractPredStateFromSP(
    MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
    const DebugLoc &Loc) { … }

void X86SpeculativeLoadHardeningPass::hardenLoadAddr(
    MachineInstr &MI, MachineOperand &BaseMO, MachineOperand &IndexMO,
    SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg) { … }

MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst(
    MachineInstr &InitialMI, SmallPtrSetImpl<MachineInstr *> &HardenedInstrs) { … }

bool X86SpeculativeLoadHardeningPass::canHardenRegister(Register Reg) { … }

/// Harden a value in a register.
///
/// This is the low-level logic to fully harden a value sitting in a register
/// against leaking during speculative execution.
///
/// Unlike hardening an address that is used by a load, this routine is required
/// to hide *all* incoming bits in the register.
///
/// `Reg` must be a virtual register. Currently, it is required to be a GPR no
/// larger than the predicate state register. FIXME: We should support vector
/// registers here by broadcasting the predicate state.
///
/// The new, hardened virtual register is returned. It will have the same
/// register class as `Reg`.
unsigned X86SpeculativeLoadHardeningPass::hardenValueInRegister(
    Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
    const DebugLoc &Loc) { … }

/// Harden a load by hardening the loaded value in the defined register.
///
/// We can harden a non-leaking load into a register without touching the
/// address by just hiding all of the loaded bits during misspeculation. We use
/// an `or` instruction to do this because we set up our poison value as all
/// ones. And the goal is just for the loaded bits to not be exposed to
/// execution and coercing them to one is sufficient.
///
/// Returns the newly hardened register.
unsigned X86SpeculativeLoadHardeningPass::hardenPostLoad(MachineInstr &MI) { … }

/// Harden a return instruction.
///
/// Returns implicitly perform a load which we need to harden. Without hardening
/// this load, an attacker my speculatively write over the return address to
/// steer speculation of the return to an attacker controlled address. This is
/// called Spectre v1.1 or Bounds Check Bypass Store (BCBS) and is described in
/// this paper:
/// https://people.csail.mit.edu/vlk/spectre11.pdf
///
/// We can harden this by introducing an LFENCE that will delay any load of the
/// return address until prior instructions have retired (and thus are not being
/// speculated), or we can harden the address used by the implicit load: the
/// stack pointer.
///
/// If we are not using an LFENCE, hardening the stack pointer has an additional
/// benefit: it allows us to pass the predicate state accumulated in this
/// function back to the caller. In the absence of a BCBS attack on the return,
/// the caller will typically be resumed and speculatively executed due to the
/// Return Stack Buffer (RSB) prediction which is very accurate and has a high
/// priority. It is possible that some code from the caller will be executed
/// speculatively even during a BCBS-attacked return until the steering takes
/// effect. Whenever this happens, the caller can recover the (poisoned)
/// predicate state from the stack pointer and continue to harden loads.
void X86SpeculativeLoadHardeningPass::hardenReturnInstr(MachineInstr &MI) { … }

/// Trace the predicate state through a call.
///
/// There are several layers of this needed to handle the full complexity of
/// calls.
///
/// First, we need to send the predicate state into the called function. We do
/// this by merging it into the high bits of the stack pointer.
///
/// For tail calls, this is all we need to do.
///
/// For calls where we might return and resume the control flow, we need to
/// extract the predicate state from the high bits of the stack pointer after
/// control returns from the called function.
///
/// We also need to verify that we intended to return to this location in the
/// code. An attacker might arrange for the processor to mispredict the return
/// to this valid but incorrect return address in the program rather than the
/// correct one. See the paper on this attack, called "ret2spec" by the
/// researchers, here:
/// https://christian-rossow.de/publications/ret2spec-ccs2018.pdf
///
/// The way we verify that we returned to the correct location is by preserving
/// the expected return address across the call. One technique involves taking
/// advantage of the red-zone to load the return address from `8(%rsp)` where it
/// was left by the RET instruction when it popped `%rsp`. Alternatively, we can
/// directly save the address into a register that will be preserved across the
/// call. We compare this intended return address against the address
/// immediately following the call (the observed return address). If these
/// mismatch, we have detected misspeculation and can poison our predicate
/// state.
void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall(
    MachineInstr &MI) { … }

/// An attacker may speculatively store over a value that is then speculatively
/// loaded and used as the target of an indirect call or jump instruction. This
/// is called Spectre v1.2 or Bounds Check Bypass Store (BCBS) and is described
/// in this paper:
/// https://people.csail.mit.edu/vlk/spectre11.pdf
///
/// When this happens, the speculative execution of the call or jump will end up
/// being steered to this attacker controlled address. While most such loads
/// will be adequately hardened already, we want to ensure that they are
/// definitively treated as needing post-load hardening. While address hardening
/// is sufficient to prevent secret data from leaking to the attacker, it may
/// not be sufficient to prevent an attacker from steering speculative
/// execution. We forcibly unfolded all relevant loads above and so will always
/// have an opportunity to post-load harden here, we just need to scan for cases
/// not already flagged and add them.
void X86SpeculativeLoadHardeningPass::hardenIndirectCallOrJumpInstr(
    MachineInstr &MI,
    SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg) { … }

INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, PASS_KEY,
                      "X86 speculative load hardener", false, false)
INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass, PASS_KEY,
                    "X86 speculative load hardener", false, false)

FunctionPass *llvm::createX86SpeculativeLoadHardeningPass() { … }
llvm/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp