llvm/clang/include/clang/Analysis/Analyses/ThreadSafetyTIL.h

//===- ThreadSafetyTIL.h ----------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines a simple Typed Intermediate Language, or TIL, that is used
// by the thread safety analysis (See ThreadSafety.cpp).  The TIL is intended
// to be largely independent of clang, in the hope that the analysis can be
// reused for other non-C++ languages.  All dependencies on clang/llvm should
// go in ThreadSafetyUtil.h.
//
// Thread safety analysis works by comparing mutex expressions, e.g.
//
// class A { Mutex mu; int dat GUARDED_BY(this->mu); }
// class B { A a; }
//
// void foo(B* b) {
//   (*b).a.mu.lock();     // locks (*b).a.mu
//   b->a.dat = 0;         // substitute &b->a for 'this';
//                         // requires lock on (&b->a)->mu
//   (b->a.mu).unlock();   // unlocks (b->a.mu)
// }
//
// As illustrated by the above example, clang Exprs are not well-suited to
// represent mutex expressions directly, since there is no easy way to compare
// Exprs for equivalence.  The thread safety analysis thus lowers clang Exprs
// into a simple intermediate language (IL).  The IL supports:
//
// (1) comparisons for semantic equality of expressions
// (2) SSA renaming of variables
// (3) wildcards and pattern matching over expressions
// (4) hash-based expression lookup
//
// The TIL is currently very experimental, is intended only for use within
// the thread safety analysis, and is subject to change without notice.
// After the API stabilizes and matures, it may be appropriate to make this
// more generally available to other analyses.
//
// UNDER CONSTRUCTION.  USE AT YOUR OWN RISK.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_THREADSAFETYTIL_H
#define LLVM_CLANG_ANALYSIS_ANALYSES_THREADSAFETYTIL_H

#include "clang/AST/Decl.h"
#include "clang/Analysis/Analyses/ThreadSafetyUtil.h"
#include "clang/Basic/LLVM.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <iterator>
#include <optional>
#include <string>
#include <utility>

namespace clang {

class CallExpr;
class Expr;
class Stmt;

namespace threadSafety {
namespace til {

class BasicBlock;

/// Enum for the different distinct classes of SExpr
enum TIL_Opcode : unsigned char {};

/// Opcode for unary arithmetic operations.
enum TIL_UnaryOpcode : unsigned char {};

/// Opcode for binary arithmetic operations.
enum TIL_BinaryOpcode : unsigned char {};

/// Opcode for cast operations.
enum TIL_CastOpcode : unsigned char {};

const TIL_Opcode       COP_Min  =;
const TIL_Opcode       COP_Max  =;
const TIL_UnaryOpcode  UOP_Min  =;
const TIL_UnaryOpcode  UOP_Max  =;
const TIL_BinaryOpcode BOP_Min  =;
const TIL_BinaryOpcode BOP_Max  =;
const TIL_CastOpcode   CAST_Min =;
const TIL_CastOpcode   CAST_Max =;

/// Return the name of a unary opcode.
StringRef getUnaryOpcodeString(TIL_UnaryOpcode Op);

/// Return the name of a binary opcode.
StringRef getBinaryOpcodeString(TIL_BinaryOpcode Op);

/// ValueTypes are data types that can actually be held in registers.
/// All variables and expressions must have a value type.
/// Pointer types are further subdivided into the various heap-allocated
/// types, such as functions, records, etc.
/// Structured types that are passed by value (e.g. complex numbers)
/// require special handling; they use BT_ValueRef, and size ST_0.
struct ValueType {};

inline ValueType::SizeType ValueType::getSizeType(unsigned nbytes) {}

template<>
inline ValueType ValueType::getValueType<void>() {}

template<>
inline ValueType ValueType::getValueType<bool>() {}

template<>
inline ValueType ValueType::getValueType<int8_t>() {}

template<>
inline ValueType ValueType::getValueType<uint8_t>() {}

template<>
inline ValueType ValueType::getValueType<int16_t>() {}

template<>
inline ValueType ValueType::getValueType<uint16_t>() {}

template<>
inline ValueType ValueType::getValueType<int32_t>() {}

template<>
inline ValueType ValueType::getValueType<uint32_t>() {}

template<>
inline ValueType ValueType::getValueType<int64_t>() {}

template<>
inline ValueType ValueType::getValueType<uint64_t>() {}

template<>
inline ValueType ValueType::getValueType<float>() {}

template<>
inline ValueType ValueType::getValueType<double>() {}

template<>
inline ValueType ValueType::getValueType<long double>() {}

template<>
inline ValueType ValueType::getValueType<StringRef>() {}

template<>
inline ValueType ValueType::getValueType<void*>() {}

/// Base class for AST nodes in the typed intermediate language.
class SExpr {};

// Contains various helper functions for SExprs.
namespace ThreadSafetyTIL {

inline bool isTrivial(const SExpr *E) {}

} // namespace ThreadSafetyTIL

// Nodes which declare variables

/// A named variable, e.g. "x".
///
/// There are two distinct places in which a Variable can appear in the AST.
/// A variable declaration introduces a new variable, and can occur in 3 places:
///   Let-expressions:           (Let (x = t) u)
///   Functions:                 (Function (x : t) u)
///   Self-applicable functions  (SFunction (x) t)
///
/// If a variable occurs in any other location, it is a reference to an existing
/// variable declaration -- e.g. 'x' in (x * y + z). To save space, we don't
/// allocate a separate AST node for variable references; a reference is just a
/// pointer to the original declaration.
class Variable : public SExpr {};

/// Placeholder for an expression that has not yet been created.
/// Used to implement lazy copy and rewriting strategies.
class Future : public SExpr {};

/// Placeholder for expressions that cannot be represented in the TIL.
class Undefined : public SExpr {};

/// Placeholder for a wildcard that matches any other expression.
class Wildcard : public SExpr {};

template <class T> class LiteralT;

// Base class for literal values.
class Literal : public SExpr {};

// Derived class for literal values, which stores the actual value.
template<class T>
class LiteralT : public Literal {};

template <class V>
typename V::R_SExpr Literal::traverse(V &Vs, typename V::R_Ctx Ctx) {}

/// A Literal pointer to an object allocated in memory.
/// At compile time, pointer literals are represented by symbolic names.
class LiteralPtr : public SExpr {};

/// A function -- a.k.a. lambda abstraction.
/// Functions with multiple arguments are created by currying,
/// e.g. (Function (x: Int) (Function (y: Int) (Code { return x + y })))
class Function : public SExpr {};

/// A self-applicable function.
/// A self-applicable function can be applied to itself.  It's useful for
/// implementing objects and late binding.
class SFunction : public SExpr {};

/// A block of code -- e.g. the body of a function.
class Code : public SExpr {};

/// A typed, writable location in memory
class Field : public SExpr {};

/// Apply an argument to a function.
/// Note that this does not actually call the function.  Functions are curried,
/// so this returns a closure in which the first parameter has been applied.
/// Once all parameters have been applied, Call can be used to invoke the
/// function.
class Apply : public SExpr {};

/// Apply a self-argument to a self-applicable function.
class SApply : public SExpr {};

/// Project a named slot from a C++ struct or class.
class Project : public SExpr {};

/// Call a function (after all arguments have been applied).
class Call : public SExpr {};

/// Allocate memory for a new value on the heap or stack.
class Alloc : public SExpr {};

/// Load a value from memory.
class Load : public SExpr {};

/// Store a value to memory.
/// The destination is a pointer to a field, the source is the value to store.
class Store : public SExpr {};

/// If p is a reference to an array, then p[i] is a reference to the i'th
/// element of the array.
class ArrayIndex : public SExpr {};

/// Pointer arithmetic, restricted to arrays only.
/// If p is a reference to an array, then p + n, where n is an integer, is
/// a reference to a subarray.
class ArrayAdd : public SExpr {};

/// Simple arithmetic unary operations, e.g. negate and not.
/// These operations have no side-effects.
class UnaryOp : public SExpr {};

/// Simple arithmetic binary operations, e.g. +, -, etc.
/// These operations have no side effects.
class BinaryOp : public SExpr {};

/// Cast expressions.
/// Cast expressions are essentially unary operations, but we treat them
/// as a distinct AST node because they only change the type of the result.
class Cast : public SExpr {};

class SCFG;

/// Phi Node, for code in SSA form.
/// Each Phi node has an array of possible values that it can take,
/// depending on where control flow comes from.
class Phi : public SExpr {};

/// Base class for basic block terminators:  Branch, Goto, and Return.
class Terminator : public SExpr {};

/// Jump to another basic block.
/// A goto instruction is essentially a tail-recursive call into another
/// block.  In addition to the block pointer, it specifies an index into the
/// phi nodes of that block.  The index can be used to retrieve the "arguments"
/// of the call.
class Goto : public Terminator {};

/// A conditional branch to two other blocks.
/// Note that unlike Goto, Branch does not have an index.  The target blocks
/// must be child-blocks, and cannot have Phi nodes.
class Branch : public Terminator {};

/// Return from the enclosing function, passing the return value to the caller.
/// Only the exit block should end with a return statement.
class Return : public Terminator {};

inline ArrayRef<BasicBlock*> Terminator::successors() {}

/// A basic block is part of an SCFG.  It can be treated as a function in
/// continuation passing style.  A block consists of a sequence of phi nodes,
/// which are "arguments" to the function, followed by a sequence of
/// instructions.  It ends with a Terminator, which is a Branch or Goto to
/// another basic block in the same SCFG.
class BasicBlock : public SExpr {};

/// An SCFG is a control-flow graph.  It consists of a set of basic blocks,
/// each of which terminates in a branch to another basic block.  There is one
/// entry point, and one exit point.
class SCFG : public SExpr {};

/// An identifier, e.g. 'foo' or 'x'.
/// This is a pseduo-term; it will be lowered to a variable or projection.
class Identifier : public SExpr {};

/// An if-then-else expression.
/// This is a pseduo-term; it will be lowered to a branch in a CFG.
class IfThenElse : public SExpr {};

/// A let-expression,  e.g.  let x=t; u.
/// This is a pseduo-term; it will be lowered to instructions in a CFG.
class Let : public SExpr {};

const SExpr *getCanonicalVal(const SExpr *E);
SExpr* simplifyToCanonicalVal(SExpr *E);
void simplifyIncompleteArg(til::Phi *Ph);

} // namespace til
} // namespace threadSafety

} // namespace clang

#endif // LLVM_CLANG_ANALYSIS_ANALYSES_THREADSAFETYTIL_H