//===- ThreadSafety.cpp ---------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // A intra-procedural analysis for thread safety (e.g. deadlocks and race // conditions), based off of an annotation system. // // See http://clang.llvm.org/docs/ThreadSafetyAnalysis.html // for more information. // //===----------------------------------------------------------------------===// #include "clang/Analysis/Analyses/ThreadSafety.h" #include "clang/AST/Attr.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclGroup.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/OperationKinds.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtVisitor.h" #include "clang/AST/Type.h" #include "clang/Analysis/Analyses/PostOrderCFGView.h" #include "clang/Analysis/Analyses/ThreadSafetyCommon.h" #include "clang/Analysis/Analyses/ThreadSafetyTIL.h" #include "clang/Analysis/Analyses/ThreadSafetyTraverse.h" #include "clang/Analysis/Analyses/ThreadSafetyUtil.h" #include "clang/Analysis/AnalysisDeclContext.h" #include "clang/Analysis/CFG.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/OperatorKinds.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/Specifiers.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/ImmutableMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cassert> #include <functional> #include <iterator> #include <memory> #include <optional> #include <string> #include <type_traits> #include <utility> #include <vector> usingnamespaceclang; usingnamespacethreadSafety; // Key method definition ThreadSafetyHandler::~ThreadSafetyHandler() = default; /// Issue a warning about an invalid lock expression static void warnInvalidLock(ThreadSafetyHandler &Handler, const Expr *MutexExp, const NamedDecl *D, const Expr *DeclExp, StringRef Kind) { … } namespace { /// A set of CapabilityExpr objects, which are compiled from thread safety /// attributes on a function. class CapExprSet : public SmallVector<CapabilityExpr, 4> { … }; class FactManager; class FactSet; /// This is a helper class that stores a fact that is known at a /// particular point in program execution. Currently, a fact is a capability, /// along with additional information, such as where it was acquired, whether /// it is exclusive or shared, etc. /// /// FIXME: this analysis does not currently support re-entrant locking. class FactEntry : public CapabilityExpr { … }; FactID; /// FactManager manages the memory for all facts that are created during /// the analysis of a single routine. class FactManager { … }; /// A FactSet is the set of facts that are known to be true at a /// particular program point. FactSets must be small, because they are /// frequently copied, and are thus implemented as a set of indices into a /// table maintained by a FactManager. A typical FactSet only holds 1 or 2 /// locks, so we can get away with doing a linear search for lookup. Note /// that a hashtable or map is inappropriate in this case, because lookups /// may involve partial pattern matches, rather than exact matches. class FactSet { … }; class ThreadSafetyAnalyzer; } // namespace namespace clang { namespace threadSafety { class BeforeSet { … }; } // namespace threadSafety } // namespace clang namespace { class LocalVariableMap; LocalVarContext; /// A side (entry or exit) of a CFG node. enum CFGBlockSide { … }; /// CFGBlockInfo is a struct which contains all the information that is /// maintained for each block in the CFG. See LocalVariableMap for more /// information about the contexts. struct CFGBlockInfo { … }; // A LocalVariableMap maintains a map from local variables to their currently // valid definitions. It provides SSA-like functionality when traversing the // CFG. Like SSA, each definition or assignment to a variable is assigned a // unique name (an integer), which acts as the SSA name for that definition. // The total set of names is shared among all CFG basic blocks. // Unlike SSA, we do not rewrite expressions to replace local variables declrefs // with their SSA-names. Instead, we compute a Context for each point in the // code, which maps local variables to the appropriate SSA-name. This map // changes with each assignment. // // The map is computed in a single pass over the CFG. Subsequent analyses can // then query the map to find the appropriate Context for a statement, and use // that Context to look up the definitions of variables. class LocalVariableMap { … }; } // namespace // This has to be defined after LocalVariableMap. CFGBlockInfo CFGBlockInfo::getEmptyBlockInfo(LocalVariableMap &M) { … } namespace { /// Visitor which builds a LocalVariableMap class VarMapBuilder : public ConstStmtVisitor<VarMapBuilder> { … }; } // namespace // Add new local variables to the variable map void VarMapBuilder::VisitDeclStmt(const DeclStmt *S) { … } // Update local variable definitions in variable map void VarMapBuilder::VisitBinaryOperator(const BinaryOperator *BO) { … } // Computes the intersection of two contexts. The intersection is the // set of variables which have the same definition in both contexts; // variables with different definitions are discarded. LocalVariableMap::Context LocalVariableMap::intersectContexts(Context C1, Context C2) { … } // For every variable in C, create a new variable that refers to the // definition in C. Return a new context that contains these new variables. // (We use this for a naive implementation of SSA on loop back-edges.) LocalVariableMap::Context LocalVariableMap::createReferenceContext(Context C) { … } // This routine also takes the intersection of C1 and C2, but it does so by // altering the VarDefinitions. C1 must be the result of an earlier call to // createReferenceContext. void LocalVariableMap::intersectBackEdge(Context C1, Context C2) { … } // Traverse the CFG in topological order, so all predecessors of a block // (excluding back-edges) are visited before the block itself. At // each point in the code, we calculate a Context, which holds the set of // variable definitions which are visible at that point in execution. // Visible variables are mapped to their definitions using an array that // contains all definitions. // // At join points in the CFG, the set is computed as the intersection of // the incoming sets along each edge, E.g. // // { Context | VarDefinitions } // int x = 0; { x -> x1 | x1 = 0 } // int y = 0; { x -> x1, y -> y1 | y1 = 0, x1 = 0 } // if (b) x = 1; { x -> x2, y -> y1 | x2 = 1, y1 = 0, ... } // else x = 2; { x -> x3, y -> y1 | x3 = 2, x2 = 1, ... } // ... { y -> y1 (x is unknown) | x3 = 2, x2 = 1, ... } // // This is essentially a simpler and more naive version of the standard SSA // algorithm. Those definitions that remain in the intersection are from blocks // that strictly dominate the current block. We do not bother to insert proper // phi nodes, because they are not used in our analysis; instead, wherever // a phi node would be required, we simply remove that definition from the // context (E.g. x above). // // The initial traversal does not capture back-edges, so those need to be // handled on a separate pass. Whenever the first pass encounters an // incoming back edge, it duplicates the context, creating new definitions // that refer back to the originals. (These correspond to places where SSA // might have to insert a phi node.) On the second pass, these definitions are // set to NULL if the variable has changed on the back-edge (i.e. a phi // node was actually required.) E.g. // // { Context | VarDefinitions } // int x = 0, y = 0; { x -> x1, y -> y1 | y1 = 0, x1 = 0 } // while (b) { x -> x2, y -> y1 | [1st:] x2=x1; [2nd:] x2=NULL; } // x = x+1; { x -> x3, y -> y1 | x3 = x2 + 1, ... } // ... { y -> y1 | x3 = 2, x2 = 1, ... } void LocalVariableMap::traverseCFG(CFG *CFGraph, const PostOrderCFGView *SortedGraph, std::vector<CFGBlockInfo> &BlockInfo) { … } /// Find the appropriate source locations to use when producing diagnostics for /// each block in the CFG. static void findBlockLocations(CFG *CFGraph, const PostOrderCFGView *SortedGraph, std::vector<CFGBlockInfo> &BlockInfo) { … } namespace { class LockableFactEntry : public FactEntry { … }; class ScopedLockableFactEntry : public FactEntry { … }; /// Class which implements the core thread safety analysis routines. class ThreadSafetyAnalyzer { … }; } // namespace /// Process acquired_before and acquired_after attributes on Vd. BeforeSet::BeforeInfo* BeforeSet::insertAttrExprs(const ValueDecl* Vd, ThreadSafetyAnalyzer& Analyzer) { … } BeforeSet::BeforeInfo * BeforeSet::getBeforeInfoForDecl(const ValueDecl *Vd, ThreadSafetyAnalyzer &Analyzer) { … } /// Return true if any mutexes in FSet are in the acquired_before set of Vd. void BeforeSet::checkBeforeAfter(const ValueDecl* StartVd, const FactSet& FSet, ThreadSafetyAnalyzer& Analyzer, SourceLocation Loc, StringRef CapKind) { … } /// Gets the value decl pointer from DeclRefExprs or MemberExprs. static const ValueDecl *getValueDecl(const Expr *Exp) { … } namespace { template <typename Ty> class has_arg_iterator_range { … }; } // namespace bool ThreadSafetyAnalyzer::inCurrentScope(const CapabilityExpr &CapE) { … } /// Add a new lock to the lockset, warning if the lock is already there. /// \param ReqAttr -- true if this is part of an initial Requires attribute. void ThreadSafetyAnalyzer::addLock(FactSet &FSet, std::unique_ptr<FactEntry> Entry, bool ReqAttr) { … } /// Remove a lock from the lockset, warning if the lock is not there. /// \param UnlockLoc The source location of the unlock (only used in error msg) void ThreadSafetyAnalyzer::removeLock(FactSet &FSet, const CapabilityExpr &Cp, SourceLocation UnlockLoc, bool FullyRemove, LockKind ReceivedKind) { … } /// Extract the list of mutexIDs from the attribute on an expression, /// and push them onto Mtxs, discarding any duplicates. template <typename AttrType> void ThreadSafetyAnalyzer::getMutexIDs(CapExprSet &Mtxs, AttrType *Attr, const Expr *Exp, const NamedDecl *D, til::SExpr *Self) { … } /// Extract the list of mutexIDs from a trylock attribute. If the /// trylock applies to the given edge, then push them onto Mtxs, discarding /// any duplicates. template <class AttrType> void ThreadSafetyAnalyzer::getMutexIDs(CapExprSet &Mtxs, AttrType *Attr, const Expr *Exp, const NamedDecl *D, const CFGBlock *PredBlock, const CFGBlock *CurrBlock, Expr *BrE, bool Neg) { … } static bool getStaticBooleanValue(Expr *E, bool &TCond) { … } // If Cond can be traced back to a function call, return the call expression. // The negate variable should be called with false, and will be set to true // if the function call is negated, e.g. if (!mu.tryLock(...)) const CallExpr* ThreadSafetyAnalyzer::getTrylockCallExpr(const Stmt *Cond, LocalVarContext C, bool &Negate) { … } /// Find the lockset that holds on the edge between PredBlock /// and CurrBlock. The edge set is the exit set of PredBlock (passed /// as the ExitSet parameter) plus any trylocks, which are conditionally held. void ThreadSafetyAnalyzer::getEdgeLockset(FactSet& Result, const FactSet &ExitSet, const CFGBlock *PredBlock, const CFGBlock *CurrBlock) { … } namespace { /// We use this class to visit different types of expressions in /// CFGBlocks, and build up the lockset. /// An expression may cause us to add or remove locks from the lockset, or else /// output error messages related to missing locks. /// FIXME: In future, we may be able to not inherit from a visitor. class BuildLockset : public ConstStmtVisitor<BuildLockset> { … }; } // namespace /// Warn if the LSet does not contain a lock sufficient to protect access /// of at least the passed in AccessKind. void ThreadSafetyAnalyzer::warnIfMutexNotHeld( const FactSet &FSet, const NamedDecl *D, const Expr *Exp, AccessKind AK, Expr *MutexExp, ProtectedOperationKind POK, til::LiteralPtr *Self, SourceLocation Loc) { … } /// Warn if the LSet contains the given lock. void ThreadSafetyAnalyzer::warnIfMutexHeld(const FactSet &FSet, const NamedDecl *D, const Expr *Exp, Expr *MutexExp, til::LiteralPtr *Self, SourceLocation Loc) { … } /// Checks guarded_by and pt_guarded_by attributes. /// Whenever we identify an access (read or write) to a DeclRefExpr that is /// marked with guarded_by, we must ensure the appropriate mutexes are held. /// Similarly, we check if the access is to an expression that dereferences /// a pointer marked with pt_guarded_by. void ThreadSafetyAnalyzer::checkAccess(const FactSet &FSet, const Expr *Exp, AccessKind AK, ProtectedOperationKind POK) { … } /// Checks pt_guarded_by and pt_guarded_var attributes. /// POK is the same operationKind that was passed to checkAccess. void ThreadSafetyAnalyzer::checkPtAccess(const FactSet &FSet, const Expr *Exp, AccessKind AK, ProtectedOperationKind POK) { … } /// Process a function call, method call, constructor call, /// or destructor call. This involves looking at the attributes on the /// corresponding function/method/constructor/destructor, issuing warnings, /// and updating the locksets accordingly. /// /// FIXME: For classes annotated with one of the guarded annotations, we need /// to treat const method calls as reads and non-const method calls as writes, /// and check that the appropriate locks are held. Non-const method calls with /// the same signature as const method calls can be also treated as reads. /// /// \param Exp The call expression. /// \param D The callee declaration. /// \param Self If \p Exp = nullptr, the implicit this argument or the argument /// of an implicitly called cleanup function. /// \param Loc If \p Exp = nullptr, the location. void BuildLockset::handleCall(const Expr *Exp, const NamedDecl *D, til::LiteralPtr *Self, SourceLocation Loc) { … } /// For unary operations which read and write a variable, we need to /// check whether we hold any required mutexes. Reads are checked in /// VisitCastExpr. void BuildLockset::VisitUnaryOperator(const UnaryOperator *UO) { … } /// For binary operations which assign to a variable (writes), we need to check /// whether we hold any required mutexes. /// FIXME: Deal with non-primitive types. void BuildLockset::VisitBinaryOperator(const BinaryOperator *BO) { … } /// Whenever we do an LValue to Rvalue cast, we are reading a variable and /// need to ensure we hold any required mutexes. /// FIXME: Deal with non-primitive types. void BuildLockset::VisitCastExpr(const CastExpr *CE) { … } void BuildLockset::examineArguments(const FunctionDecl *FD, CallExpr::const_arg_iterator ArgBegin, CallExpr::const_arg_iterator ArgEnd, bool SkipFirstParam) { … } void BuildLockset::VisitCallExpr(const CallExpr *Exp) { … } void BuildLockset::VisitCXXConstructExpr(const CXXConstructExpr *Exp) { … } static const Expr *UnpackConstruction(const Expr *E) { … } void BuildLockset::VisitDeclStmt(const DeclStmt *S) { … } void BuildLockset::VisitMaterializeTemporaryExpr( const MaterializeTemporaryExpr *Exp) { … } void BuildLockset::VisitReturnStmt(const ReturnStmt *S) { … } /// Given two facts merging on a join point, possibly warn and decide whether to /// keep or replace. /// /// \param CanModify Whether we can replace \p A by \p B. /// \return false if we should keep \p A, true if we should take \p B. bool ThreadSafetyAnalyzer::join(const FactEntry &A, const FactEntry &B, bool CanModify) { … } /// Compute the intersection of two locksets and issue warnings for any /// locks in the symmetric difference. /// /// This function is used at a merge point in the CFG when comparing the lockset /// of each branch being merged. For example, given the following sequence: /// A; if () then B; else C; D; we need to check that the lockset after B and C /// are the same. In the event of a difference, we use the intersection of these /// two locksets at the start of D. /// /// \param EntrySet A lockset for entry into a (possibly new) block. /// \param ExitSet The lockset on exiting a preceding block. /// \param JoinLoc The location of the join point for error reporting /// \param EntryLEK The warning if a mutex is missing from \p EntrySet. /// \param ExitLEK The warning if a mutex is missing from \p ExitSet. void ThreadSafetyAnalyzer::intersectAndWarn(FactSet &EntrySet, const FactSet &ExitSet, SourceLocation JoinLoc, LockErrorKind EntryLEK, LockErrorKind ExitLEK) { … } // Return true if block B never continues to its successors. static bool neverReturns(const CFGBlock *B) { … } /// Check a function's CFG for thread-safety violations. /// /// We traverse the blocks in the CFG, compute the set of mutexes that are held /// at the end of each block, and issue warnings for thread safety violations. /// Each block in the CFG is traversed exactly once. void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) { … } /// Check a function's CFG for thread-safety violations. /// /// We traverse the blocks in the CFG, compute the set of mutexes that are held /// at the end of each block, and issue warnings for thread safety violations. /// Each block in the CFG is traversed exactly once. void threadSafety::runThreadSafetyAnalysis(AnalysisDeclContext &AC, ThreadSafetyHandler &Handler, BeforeSet **BSet) { … } void threadSafety::threadSafetyCleanup(BeforeSet *Cache) { … } /// Helper function that returns a LockKind required for the given level /// of access. LockKind threadSafety::getLockKindFromAccessKind(AccessKind AK) { … }