//===- ThreadSafetyTIL.h ----------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines a simple Typed Intermediate Language, or TIL, that is used // by the thread safety analysis (See ThreadSafety.cpp). The TIL is intended // to be largely independent of clang, in the hope that the analysis can be // reused for other non-C++ languages. All dependencies on clang/llvm should // go in ThreadSafetyUtil.h. // // Thread safety analysis works by comparing mutex expressions, e.g. // // class A { Mutex mu; int dat GUARDED_BY(this->mu); } // class B { A a; } // // void foo(B* b) { // (*b).a.mu.lock(); // locks (*b).a.mu // b->a.dat = 0; // substitute &b->a for 'this'; // // requires lock on (&b->a)->mu // (b->a.mu).unlock(); // unlocks (b->a.mu) // } // // As illustrated by the above example, clang Exprs are not well-suited to // represent mutex expressions directly, since there is no easy way to compare // Exprs for equivalence. The thread safety analysis thus lowers clang Exprs // into a simple intermediate language (IL). The IL supports: // // (1) comparisons for semantic equality of expressions // (2) SSA renaming of variables // (3) wildcards and pattern matching over expressions // (4) hash-based expression lookup // // The TIL is currently very experimental, is intended only for use within // the thread safety analysis, and is subject to change without notice. // After the API stabilizes and matures, it may be appropriate to make this // more generally available to other analyses. // // UNDER CONSTRUCTION. USE AT YOUR OWN RISK. // //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_ANALYSIS_ANALYSES_THREADSAFETYTIL_H #define LLVM_CLANG_ANALYSIS_ANALYSES_THREADSAFETYTIL_H #include "clang/AST/Decl.h" #include "clang/Analysis/Analyses/ThreadSafetyUtil.h" #include "clang/Basic/LLVM.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Casting.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cassert> #include <cstddef> #include <cstdint> #include <iterator> #include <optional> #include <string> #include <utility> namespace clang { class CallExpr; class Expr; class Stmt; namespace threadSafety { namespace til { class BasicBlock; /// Enum for the different distinct classes of SExpr enum TIL_Opcode : unsigned char { … }; /// Opcode for unary arithmetic operations. enum TIL_UnaryOpcode : unsigned char { … }; /// Opcode for binary arithmetic operations. enum TIL_BinaryOpcode : unsigned char { … }; /// Opcode for cast operations. enum TIL_CastOpcode : unsigned char { … }; const TIL_Opcode COP_Min = …; const TIL_Opcode COP_Max = …; const TIL_UnaryOpcode UOP_Min = …; const TIL_UnaryOpcode UOP_Max = …; const TIL_BinaryOpcode BOP_Min = …; const TIL_BinaryOpcode BOP_Max = …; const TIL_CastOpcode CAST_Min = …; const TIL_CastOpcode CAST_Max = …; /// Return the name of a unary opcode. StringRef getUnaryOpcodeString(TIL_UnaryOpcode Op); /// Return the name of a binary opcode. StringRef getBinaryOpcodeString(TIL_BinaryOpcode Op); /// ValueTypes are data types that can actually be held in registers. /// All variables and expressions must have a value type. /// Pointer types are further subdivided into the various heap-allocated /// types, such as functions, records, etc. /// Structured types that are passed by value (e.g. complex numbers) /// require special handling; they use BT_ValueRef, and size ST_0. struct ValueType { … }; inline ValueType::SizeType ValueType::getSizeType(unsigned nbytes) { … } template<> inline ValueType ValueType::getValueType<void>() { … } template<> inline ValueType ValueType::getValueType<bool>() { … } template<> inline ValueType ValueType::getValueType<int8_t>() { … } template<> inline ValueType ValueType::getValueType<uint8_t>() { … } template<> inline ValueType ValueType::getValueType<int16_t>() { … } template<> inline ValueType ValueType::getValueType<uint16_t>() { … } template<> inline ValueType ValueType::getValueType<int32_t>() { … } template<> inline ValueType ValueType::getValueType<uint32_t>() { … } template<> inline ValueType ValueType::getValueType<int64_t>() { … } template<> inline ValueType ValueType::getValueType<uint64_t>() { … } template<> inline ValueType ValueType::getValueType<float>() { … } template<> inline ValueType ValueType::getValueType<double>() { … } template<> inline ValueType ValueType::getValueType<long double>() { … } template<> inline ValueType ValueType::getValueType<StringRef>() { … } template<> inline ValueType ValueType::getValueType<void*>() { … } /// Base class for AST nodes in the typed intermediate language. class SExpr { … }; // Contains various helper functions for SExprs. namespace ThreadSafetyTIL { inline bool isTrivial(const SExpr *E) { … } } // namespace ThreadSafetyTIL // Nodes which declare variables /// A named variable, e.g. "x". /// /// There are two distinct places in which a Variable can appear in the AST. /// A variable declaration introduces a new variable, and can occur in 3 places: /// Let-expressions: (Let (x = t) u) /// Functions: (Function (x : t) u) /// Self-applicable functions (SFunction (x) t) /// /// If a variable occurs in any other location, it is a reference to an existing /// variable declaration -- e.g. 'x' in (x * y + z). To save space, we don't /// allocate a separate AST node for variable references; a reference is just a /// pointer to the original declaration. class Variable : public SExpr { … }; /// Placeholder for an expression that has not yet been created. /// Used to implement lazy copy and rewriting strategies. class Future : public SExpr { … }; /// Placeholder for expressions that cannot be represented in the TIL. class Undefined : public SExpr { … }; /// Placeholder for a wildcard that matches any other expression. class Wildcard : public SExpr { … }; template <class T> class LiteralT; // Base class for literal values. class Literal : public SExpr { … }; // Derived class for literal values, which stores the actual value. template<class T> class LiteralT : public Literal { … }; template <class V> typename V::R_SExpr Literal::traverse(V &Vs, typename V::R_Ctx Ctx) { … } /// A Literal pointer to an object allocated in memory. /// At compile time, pointer literals are represented by symbolic names. class LiteralPtr : public SExpr { … }; /// A function -- a.k.a. lambda abstraction. /// Functions with multiple arguments are created by currying, /// e.g. (Function (x: Int) (Function (y: Int) (Code { return x + y }))) class Function : public SExpr { … }; /// A self-applicable function. /// A self-applicable function can be applied to itself. It's useful for /// implementing objects and late binding. class SFunction : public SExpr { … }; /// A block of code -- e.g. the body of a function. class Code : public SExpr { … }; /// A typed, writable location in memory class Field : public SExpr { … }; /// Apply an argument to a function. /// Note that this does not actually call the function. Functions are curried, /// so this returns a closure in which the first parameter has been applied. /// Once all parameters have been applied, Call can be used to invoke the /// function. class Apply : public SExpr { … }; /// Apply a self-argument to a self-applicable function. class SApply : public SExpr { … }; /// Project a named slot from a C++ struct or class. class Project : public SExpr { … }; /// Call a function (after all arguments have been applied). class Call : public SExpr { … }; /// Allocate memory for a new value on the heap or stack. class Alloc : public SExpr { … }; /// Load a value from memory. class Load : public SExpr { … }; /// Store a value to memory. /// The destination is a pointer to a field, the source is the value to store. class Store : public SExpr { … }; /// If p is a reference to an array, then p[i] is a reference to the i'th /// element of the array. class ArrayIndex : public SExpr { … }; /// Pointer arithmetic, restricted to arrays only. /// If p is a reference to an array, then p + n, where n is an integer, is /// a reference to a subarray. class ArrayAdd : public SExpr { … }; /// Simple arithmetic unary operations, e.g. negate and not. /// These operations have no side-effects. class UnaryOp : public SExpr { … }; /// Simple arithmetic binary operations, e.g. +, -, etc. /// These operations have no side effects. class BinaryOp : public SExpr { … }; /// Cast expressions. /// Cast expressions are essentially unary operations, but we treat them /// as a distinct AST node because they only change the type of the result. class Cast : public SExpr { … }; class SCFG; /// Phi Node, for code in SSA form. /// Each Phi node has an array of possible values that it can take, /// depending on where control flow comes from. class Phi : public SExpr { … }; /// Base class for basic block terminators: Branch, Goto, and Return. class Terminator : public SExpr { … }; /// Jump to another basic block. /// A goto instruction is essentially a tail-recursive call into another /// block. In addition to the block pointer, it specifies an index into the /// phi nodes of that block. The index can be used to retrieve the "arguments" /// of the call. class Goto : public Terminator { … }; /// A conditional branch to two other blocks. /// Note that unlike Goto, Branch does not have an index. The target blocks /// must be child-blocks, and cannot have Phi nodes. class Branch : public Terminator { … }; /// Return from the enclosing function, passing the return value to the caller. /// Only the exit block should end with a return statement. class Return : public Terminator { … }; inline ArrayRef<BasicBlock*> Terminator::successors() { … } /// A basic block is part of an SCFG. It can be treated as a function in /// continuation passing style. A block consists of a sequence of phi nodes, /// which are "arguments" to the function, followed by a sequence of /// instructions. It ends with a Terminator, which is a Branch or Goto to /// another basic block in the same SCFG. class BasicBlock : public SExpr { … }; /// An SCFG is a control-flow graph. It consists of a set of basic blocks, /// each of which terminates in a branch to another basic block. There is one /// entry point, and one exit point. class SCFG : public SExpr { … }; /// An identifier, e.g. 'foo' or 'x'. /// This is a pseduo-term; it will be lowered to a variable or projection. class Identifier : public SExpr { … }; /// An if-then-else expression. /// This is a pseduo-term; it will be lowered to a branch in a CFG. class IfThenElse : public SExpr { … }; /// A let-expression, e.g. let x=t; u. /// This is a pseduo-term; it will be lowered to instructions in a CFG. class Let : public SExpr { … }; const SExpr *getCanonicalVal(const SExpr *E); SExpr* simplifyToCanonicalVal(SExpr *E); void simplifyIncompleteArg(til::Phi *Ph); } // namespace til } // namespace threadSafety } // namespace clang #endif // LLVM_CLANG_ANALYSIS_ANALYSES_THREADSAFETYTIL_H