//====- X86SpeculativeLoadHardening.cpp - A Spectre v1 mitigation ---------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file /// /// Provide a pass which mitigates speculative execution attacks which operate /// by speculating incorrectly past some predicate (a type check, bounds check, /// or other condition) to reach a load with invalid inputs and leak the data /// accessed by that load using a side channel out of the speculative domain. /// /// For details on the attacks, see the first variant in both the Project Zero /// writeup and the Spectre paper: /// https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html /// https://spectreattack.com/spectre.pdf /// //===----------------------------------------------------------------------===// #include "X86.h" #include "X86InstrBuilder.h" #include "X86InstrInfo.h" #include "X86Subtarget.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineSSAUpdater.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCSchedule.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include <algorithm> #include <cassert> #include <iterator> #include <optional> #include <utility> usingnamespacellvm; #define PASS_KEY … #define DEBUG_TYPE … STATISTIC(NumCondBranchesTraced, "Number of conditional branches traced"); STATISTIC(NumBranchesUntraced, "Number of branches unable to trace"); STATISTIC(NumAddrRegsHardened, "Number of address mode used registers hardaned"); STATISTIC(NumPostLoadRegsHardened, "Number of post-load register values hardened"); STATISTIC(NumCallsOrJumpsHardened, "Number of calls or jumps requiring extra hardening"); STATISTIC(NumInstsInserted, "Number of instructions inserted"); STATISTIC(NumLFENCEsInserted, "Number of lfence instructions inserted"); static cl::opt<bool> EnableSpeculativeLoadHardening( "x86-speculative-load-hardening", cl::desc("Force enable speculative load hardening"), cl::init(false), cl::Hidden); static cl::opt<bool> HardenEdgesWithLFENCE( PASS_KEY "-lfence", cl::desc( "Use LFENCE along each conditional edge to harden against speculative " "loads rather than conditional movs and poisoned pointers."), cl::init(false), cl::Hidden); static cl::opt<bool> EnablePostLoadHardening( PASS_KEY "-post-load", cl::desc("Harden the value loaded *after* it is loaded by " "flushing the loaded bits to 1. This is hard to do " "in general but can be done easily for GPRs."), cl::init(true), cl::Hidden); static cl::opt<bool> FenceCallAndRet( PASS_KEY "-fence-call-and-ret", cl::desc("Use a full speculation fence to harden both call and ret edges " "rather than a lighter weight mitigation."), cl::init(false), cl::Hidden); static cl::opt<bool> HardenInterprocedurally( PASS_KEY "-ip", cl::desc("Harden interprocedurally by passing our state in and out of " "functions in the high bits of the stack pointer."), cl::init(true), cl::Hidden); static cl::opt<bool> HardenLoads(PASS_KEY "-loads", cl::desc("Sanitize loads from memory. When disable, no " "significant security is provided."), cl::init(true), cl::Hidden); static cl::opt<bool> HardenIndirectCallsAndJumps( PASS_KEY "-indirect", cl::desc("Harden indirect calls and jumps against using speculatively " "stored attacker controlled addresses. This is designed to " "mitigate Spectre v1.2 style attacks."), cl::init(true), cl::Hidden); namespace { class X86SpeculativeLoadHardeningPass : public MachineFunctionPass { … }; } // end anonymous namespace char X86SpeculativeLoadHardeningPass::ID = …; void X86SpeculativeLoadHardeningPass::getAnalysisUsage( AnalysisUsage &AU) const { … } static MachineBasicBlock &splitEdge(MachineBasicBlock &MBB, MachineBasicBlock &Succ, int SuccCount, MachineInstr *Br, MachineInstr *&UncondBr, const X86InstrInfo &TII) { … } /// Removing duplicate PHI operands to leave the PHI in a canonical and /// predictable form. /// /// FIXME: It's really frustrating that we have to do this, but SSA-form in MIR /// isn't what you might expect. We may have multiple entries in PHI nodes for /// a single predecessor. This makes CFG-updating extremely complex, so here we /// simplify all PHI nodes to a model even simpler than the IR's model: exactly /// one entry per predecessor, regardless of how many edges there are. static void canonicalizePHIOperands(MachineFunction &MF) { … } /// Helper to scan a function for loads vulnerable to misspeculation that we /// want to harden. /// /// We use this to avoid making changes to functions where there is nothing we /// need to do to harden against misspeculation. static bool hasVulnerableLoad(MachineFunction &MF) { … } bool X86SpeculativeLoadHardeningPass::runOnMachineFunction( MachineFunction &MF) { … } /// Implements the naive hardening approach of putting an LFENCE after every /// potentially mis-predicted control flow construct. /// /// We include this as an alternative mostly for the purpose of comparison. The /// performance impact of this is expected to be extremely severe and not /// practical for any real-world users. void X86SpeculativeLoadHardeningPass::hardenEdgesWithLFENCE( MachineFunction &MF) { … } SmallVector<X86SpeculativeLoadHardeningPass::BlockCondInfo, 16> X86SpeculativeLoadHardeningPass::collectBlockCondInfo(MachineFunction &MF) { … } /// Trace the predicate state through the CFG, instrumenting each conditional /// branch such that misspeculation through an edge will poison the predicate /// state. /// /// Returns the list of inserted CMov instructions so that they can have their /// uses of the predicate state rewritten into proper SSA form once it is /// complete. SmallVector<MachineInstr *, 16> X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG( MachineFunction &MF, ArrayRef<BlockCondInfo> Infos) { … } /// Compute the register class for the unfolded load. /// /// FIXME: This should probably live in X86InstrInfo, potentially by adding /// a way to unfold into a newly created vreg rather than requiring a register /// input. static const TargetRegisterClass * getRegClassForUnfoldedLoad(MachineFunction &MF, const X86InstrInfo &TII, unsigned Opcode) { … } void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads( MachineFunction &MF) { … } /// Trace the predicate state through indirect branches, instrumenting them to /// poison the state if a target is reached that does not match the expected /// target. /// /// This is designed to mitigate Spectre variant 1 attacks where an indirect /// branch is trained to predict a particular target and then mispredicts that /// target in a way that can leak data. Despite using an indirect branch, this /// is really a variant 1 style attack: it does not steer execution to an /// arbitrary or attacker controlled address, and it does not require any /// special code executing next to the victim. This attack can also be mitigated /// through retpolines, but those require either replacing indirect branches /// with conditional direct branches or lowering them through a device that /// blocks speculation. This mitigation can replace these retpoline-style /// mitigations for jump tables and other indirect branches within a function /// when variant 2 isn't a risk while allowing limited speculation. Indirect /// calls, however, cannot be mitigated through this technique without changing /// the ABI in a fundamental way. SmallVector<MachineInstr *, 16> X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches( MachineFunction &MF) { … } // Returns true if the MI has EFLAGS as a register def operand and it's live, // otherwise it returns false static bool isEFLAGSDefLive(const MachineInstr &MI) { … } static bool isEFLAGSLive(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const TargetRegisterInfo &TRI) { … } /// Trace the predicate state through each of the blocks in the function, /// hardening everything necessary along the way. /// /// We call this routine once the initial predicate state has been established /// for each basic block in the function in the SSA updater. This routine traces /// it through the instructions within each basic block, and for non-returning /// blocks informs the SSA updater about the final state that lives out of the /// block. Along the way, it hardens any vulnerable instruction using the /// currently valid predicate state. We have to do these two things together /// because the SSA updater only works across blocks. Within a block, we track /// the current predicate state directly and update it as it changes. /// /// This operates in two passes over each block. First, we analyze the loads in /// the block to determine which strategy will be used to harden them: hardening /// the address or hardening the loaded value when loaded into a register /// amenable to hardening. We have to process these first because the two /// strategies may interact -- later hardening may change what strategy we wish /// to use. We also will analyze data dependencies between loads and avoid /// hardening those loads that are data dependent on a load with a hardened /// address. We also skip hardening loads already behind an LFENCE as that is /// sufficient to harden them against misspeculation. /// /// Second, we actively trace the predicate state through the block, applying /// the hardening steps we determined necessary in the first pass as we go. /// /// These two passes are applied to each basic block. We operate one block at a /// time to simplify reasoning about reachability and sequencing. void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden( MachineFunction &MF) { … } /// Save EFLAGS into the returned GPR. This can in turn be restored with /// `restoreEFLAGS`. /// /// Note that LLVM can only lower very simple patterns of saved and restored /// EFLAGS registers. The restore should always be within the same basic block /// as the save so that no PHI nodes are inserted. unsigned X86SpeculativeLoadHardeningPass::saveEFLAGS( MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, const DebugLoc &Loc) { … } /// Restore EFLAGS from the provided GPR. This should be produced by /// `saveEFLAGS`. /// /// This must be done within the same basic block as the save in order to /// reliably lower. void X86SpeculativeLoadHardeningPass::restoreEFLAGS( MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, const DebugLoc &Loc, Register Reg) { … } /// Takes the current predicate state (in a register) and merges it into the /// stack pointer. The state is essentially a single bit, but we merge this in /// a way that won't form non-canonical pointers and also will be preserved /// across normal stack adjustments. void X86SpeculativeLoadHardeningPass::mergePredStateIntoSP( MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, const DebugLoc &Loc, unsigned PredStateReg) { … } /// Extracts the predicate state stored in the high bits of the stack pointer. unsigned X86SpeculativeLoadHardeningPass::extractPredStateFromSP( MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, const DebugLoc &Loc) { … } void X86SpeculativeLoadHardeningPass::hardenLoadAddr( MachineInstr &MI, MachineOperand &BaseMO, MachineOperand &IndexMO, SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg) { … } MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst( MachineInstr &InitialMI, SmallPtrSetImpl<MachineInstr *> &HardenedInstrs) { … } bool X86SpeculativeLoadHardeningPass::canHardenRegister(Register Reg) { … } /// Harden a value in a register. /// /// This is the low-level logic to fully harden a value sitting in a register /// against leaking during speculative execution. /// /// Unlike hardening an address that is used by a load, this routine is required /// to hide *all* incoming bits in the register. /// /// `Reg` must be a virtual register. Currently, it is required to be a GPR no /// larger than the predicate state register. FIXME: We should support vector /// registers here by broadcasting the predicate state. /// /// The new, hardened virtual register is returned. It will have the same /// register class as `Reg`. unsigned X86SpeculativeLoadHardeningPass::hardenValueInRegister( Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, const DebugLoc &Loc) { … } /// Harden a load by hardening the loaded value in the defined register. /// /// We can harden a non-leaking load into a register without touching the /// address by just hiding all of the loaded bits during misspeculation. We use /// an `or` instruction to do this because we set up our poison value as all /// ones. And the goal is just for the loaded bits to not be exposed to /// execution and coercing them to one is sufficient. /// /// Returns the newly hardened register. unsigned X86SpeculativeLoadHardeningPass::hardenPostLoad(MachineInstr &MI) { … } /// Harden a return instruction. /// /// Returns implicitly perform a load which we need to harden. Without hardening /// this load, an attacker my speculatively write over the return address to /// steer speculation of the return to an attacker controlled address. This is /// called Spectre v1.1 or Bounds Check Bypass Store (BCBS) and is described in /// this paper: /// https://people.csail.mit.edu/vlk/spectre11.pdf /// /// We can harden this by introducing an LFENCE that will delay any load of the /// return address until prior instructions have retired (and thus are not being /// speculated), or we can harden the address used by the implicit load: the /// stack pointer. /// /// If we are not using an LFENCE, hardening the stack pointer has an additional /// benefit: it allows us to pass the predicate state accumulated in this /// function back to the caller. In the absence of a BCBS attack on the return, /// the caller will typically be resumed and speculatively executed due to the /// Return Stack Buffer (RSB) prediction which is very accurate and has a high /// priority. It is possible that some code from the caller will be executed /// speculatively even during a BCBS-attacked return until the steering takes /// effect. Whenever this happens, the caller can recover the (poisoned) /// predicate state from the stack pointer and continue to harden loads. void X86SpeculativeLoadHardeningPass::hardenReturnInstr(MachineInstr &MI) { … } /// Trace the predicate state through a call. /// /// There are several layers of this needed to handle the full complexity of /// calls. /// /// First, we need to send the predicate state into the called function. We do /// this by merging it into the high bits of the stack pointer. /// /// For tail calls, this is all we need to do. /// /// For calls where we might return and resume the control flow, we need to /// extract the predicate state from the high bits of the stack pointer after /// control returns from the called function. /// /// We also need to verify that we intended to return to this location in the /// code. An attacker might arrange for the processor to mispredict the return /// to this valid but incorrect return address in the program rather than the /// correct one. See the paper on this attack, called "ret2spec" by the /// researchers, here: /// https://christian-rossow.de/publications/ret2spec-ccs2018.pdf /// /// The way we verify that we returned to the correct location is by preserving /// the expected return address across the call. One technique involves taking /// advantage of the red-zone to load the return address from `8(%rsp)` where it /// was left by the RET instruction when it popped `%rsp`. Alternatively, we can /// directly save the address into a register that will be preserved across the /// call. We compare this intended return address against the address /// immediately following the call (the observed return address). If these /// mismatch, we have detected misspeculation and can poison our predicate /// state. void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall( MachineInstr &MI) { … } /// An attacker may speculatively store over a value that is then speculatively /// loaded and used as the target of an indirect call or jump instruction. This /// is called Spectre v1.2 or Bounds Check Bypass Store (BCBS) and is described /// in this paper: /// https://people.csail.mit.edu/vlk/spectre11.pdf /// /// When this happens, the speculative execution of the call or jump will end up /// being steered to this attacker controlled address. While most such loads /// will be adequately hardened already, we want to ensure that they are /// definitively treated as needing post-load hardening. While address hardening /// is sufficient to prevent secret data from leaking to the attacker, it may /// not be sufficient to prevent an attacker from steering speculative /// execution. We forcibly unfolded all relevant loads above and so will always /// have an opportunity to post-load harden here, we just need to scan for cases /// not already flagged and add them. void X86SpeculativeLoadHardeningPass::hardenIndirectCallOrJumpInstr( MachineInstr &MI, SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg) { … } INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, PASS_KEY, "X86 speculative load hardener", false, false) INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass, PASS_KEY, "X86 speculative load hardener", false, false) FunctionPass *llvm::createX86SpeculativeLoadHardeningPass() { … }