//===-- SafepointIRVerifier.cpp - Verify gc.statepoint invariants ---------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Run a basic correctness check on the IR to ensure that Safepoints - if // they've been inserted - were inserted correctly. In particular, look for use // of non-relocated values after a safepoint. It's primary use is to check the // correctness of safepoint insertion immediately after insertion, but it can // also be used to verify that later transforms have not found a way to break // safepoint semenatics. // // In its current form, this verify checks a property which is sufficient, but // not neccessary for correctness. There are some cases where an unrelocated // pointer can be used after the safepoint. Consider this example: // // a = ... // b = ... // (a',b') = safepoint(a,b) // c = cmp eq a b // br c, ..., .... // // Because it is valid to reorder 'c' above the safepoint, this is legal. In // practice, this is a somewhat uncommon transform, but CodeGenPrep does create // idioms like this. The verifier knows about these cases and avoids reporting // false positives. // //===----------------------------------------------------------------------===// #include "llvm/IR/SafepointIRVerifier.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Statepoint.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE … usingnamespacellvm; /// This option is used for writing test cases. Instead of crashing the program /// when verification fails, report a message to the console (for FileCheck /// usage) and continue execution as if nothing happened. static cl::opt<bool> PrintOnly("safepoint-ir-verifier-print-only", cl::init(false)); namespace { /// This CFG Deadness finds dead blocks and edges. Algorithm starts with a set /// of blocks unreachable from entry then propagates deadness using foldable /// conditional branches without modifying CFG. So GVN does but it changes CFG /// by splitting critical edges. In most cases passes rely on SimplifyCFG to /// clean up dead blocks, but in some cases, like verification or loop passes /// it's not possible. class CFGDeadness { … }; } // namespace static void Verify(const Function &F, const DominatorTree &DT, const CFGDeadness &CD); namespace llvm { PreservedAnalyses SafepointIRVerifierPass::run(Function &F, FunctionAnalysisManager &AM) { … } } // namespace llvm namespace { struct SafepointIRVerifier : public FunctionPass { … }; } // namespace void llvm::verifySafepointIR(Function &F) { … } char SafepointIRVerifier::ID = …; FunctionPass *llvm::createSafepointIRVerifierPass() { … } INITIALIZE_PASS_BEGIN(SafepointIRVerifier, "verify-safepoint-ir", "Safepoint IR Verifier", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(SafepointIRVerifier, "verify-safepoint-ir", "Safepoint IR Verifier", false, false) static bool isGCPointerType(Type *T) { … } static bool containsGCPtrType(Type *Ty) { … } // Debugging aid -- prints a [Begin, End) range of values. template<typename IteratorTy> static void PrintValueSet(raw_ostream &OS, IteratorTy Begin, IteratorTy End) { … } /// The verifier algorithm is phrased in terms of availability. The set of /// values "available" at a given point in the control flow graph is the set of /// correctly relocated value at that point, and is a subset of the set of /// definitions dominating that point. AvailableValueSet; /// State we compute and track per basic block. struct BasicBlockState { … }; /// A given derived pointer can have multiple base pointers through phi/selects. /// This type indicates when the base pointer is exclusively constant /// (ExclusivelySomeConstant), and if that constant is proven to be exclusively /// null, we record that as ExclusivelyNull. In all other cases, the BaseType is /// NonConstant. enum BaseType { … }; /// Return the baseType for Val which states whether Val is exclusively /// derived from constant/null, or not exclusively derived from constant. /// Val is exclusively derived off a constant base when all operands of phi and /// selects are derived off a constant base. static enum BaseType getBaseType(const Value *Val) { … } static bool isNotExclusivelyConstantDerived(const Value *V) { … } namespace { class InstructionVerifier; /// Builds BasicBlockState for each BB of the function. /// It can traverse function for verification and provides all required /// information. /// /// GC pointer may be in one of three states: relocated, unrelocated and /// poisoned. /// Relocated pointer may be used without any restrictions. /// Unrelocated pointer cannot be dereferenced, passed as argument to any call /// or returned. Unrelocated pointer may be safely compared against another /// unrelocated pointer or against a pointer exclusively derived from null. /// Poisoned pointers are produced when we somehow derive pointer from relocated /// and unrelocated pointers (e.g. phi, select). This pointers may be safely /// used in a very limited number of situations. Currently the only way to use /// it is comparison against constant exclusively derived from null. All /// limitations arise due to their undefined state: this pointers should be /// treated as relocated and unrelocated simultaneously. /// Rules of deriving: /// R + U = P - that's where the poisoned pointers come from /// P + X = P /// U + U = U /// R + R = R /// X + C = X /// Where "+" - any operation that somehow derive pointer, U - unrelocated, /// R - relocated and P - poisoned, C - constant, X - U or R or P or C or /// nothing (in case when "+" is unary operation). /// Deriving of pointers by itself is always safe. /// NOTE: when we are making decision on the status of instruction's result: /// a) for phi we need to check status of each input *at the end of /// corresponding predecessor BB*. /// b) for other instructions we need to check status of each input *at the /// current point*. /// /// FIXME: This works fairly well except one case /// bb1: /// p = *some GC-ptr def* /// p1 = gep p, offset /// / | /// / | /// bb2: | /// safepoint | /// \ | /// \ | /// bb3: /// p2 = phi [p, bb2] [p1, bb1] /// p3 = phi [p, bb2] [p, bb1] /// here p and p1 is unrelocated /// p2 and p3 is poisoned (though they shouldn't be) /// /// This leads to some weird results: /// cmp eq p, p2 - illegal instruction (false-positive) /// cmp eq p1, p2 - illegal instruction (false-positive) /// cmp eq p, p3 - illegal instruction (false-positive) /// cmp eq p, p1 - ok /// To fix this we need to introduce conception of generations and be able to /// check if two values belong to one generation or not. This way p2 will be /// considered to be unrelocated and no false alarm will happen. class GCPtrTracker { … }; /// It is a visitor for GCPtrTracker::verifyFunction. It decides if the /// instruction (which uses heap reference) is legal or not, given our safepoint /// semantics. class InstructionVerifier { … }; } // end anonymous namespace GCPtrTracker::GCPtrTracker(const Function &F, const DominatorTree &DT, const CFGDeadness &CD) : … { … } BasicBlockState *GCPtrTracker::getBasicBlockState(const BasicBlock *BB) { … } const BasicBlockState *GCPtrTracker::getBasicBlockState( const BasicBlock *BB) const { … } bool GCPtrTracker::instructionMayBeSkipped(const Instruction *I) const { … } void GCPtrTracker::verifyFunction(GCPtrTracker &&Tracker, InstructionVerifier &Verifier) { … } void GCPtrTracker::recalculateBBsStates() { … } bool GCPtrTracker::removeValidUnrelocatedDefs(const BasicBlock *BB, const BasicBlockState *BBS, AvailableValueSet &Contribution) { … } void GCPtrTracker::gatherDominatingDefs(const BasicBlock *BB, AvailableValueSet &Result, const DominatorTree &DT) { … } void GCPtrTracker::transferBlock(const BasicBlock *BB, BasicBlockState &BBS, bool ContributionChanged) { … } void GCPtrTracker::transferInstruction(const Instruction &I, bool &Cleared, AvailableValueSet &Available) { … } void InstructionVerifier::verifyInstruction( const GCPtrTracker *Tracker, const Instruction &I, const AvailableValueSet &AvailableSet) { … } void InstructionVerifier::reportInvalidUse(const Value &V, const Instruction &I) { … } static void Verify(const Function &F, const DominatorTree &DT, const CFGDeadness &CD) { … }