llvm/clang/lib/Analysis/UnsafeBufferUsage.cpp

//===- UnsafeBufferUsage.cpp - Replace pointers with modern C++ -----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
#include "clang/AST/Expr.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/Stmt.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/ASTMatchers/ASTMatchers.h"
#include "clang/Basic/CharInfo.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Lex/Lexer.h"
#include "clang/Lex/Preprocessor.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Casting.h"
#include <memory>
#include <optional>
#include <queue>
#include <sstream>

usingnamespacellvm;
usingnamespaceclang;
usingnamespaceast_matchers;

#ifndef NDEBUG
namespace {
class StmtDebugPrinter
    : public ConstStmtVisitor<StmtDebugPrinter, std::string> {
public:
  std::string VisitStmt(const Stmt *S) { return S->getStmtClassName(); }

  std::string VisitBinaryOperator(const BinaryOperator *BO) {
    return "BinaryOperator(" + BO->getOpcodeStr().str() + ")";
  }

  std::string VisitUnaryOperator(const UnaryOperator *UO) {
    return "UnaryOperator(" + UO->getOpcodeStr(UO->getOpcode()).str() + ")";
  }

  std::string VisitImplicitCastExpr(const ImplicitCastExpr *ICE) {
    return "ImplicitCastExpr(" + std::string(ICE->getCastKindName()) + ")";
  }
};

// Returns a string of ancestor `Stmt`s of the given `DRE` in such a form:
// "DRE ==> parent-of-DRE ==> grandparent-of-DRE ==> ...".
static std::string getDREAncestorString(const DeclRefExpr *DRE,
                                        ASTContext &Ctx) {
  std::stringstream SS;
  const Stmt *St = DRE;
  StmtDebugPrinter StmtPriner;

  do {
    SS << StmtPriner.Visit(St);

    DynTypedNodeList StParents = Ctx.getParents(*St);

    if (StParents.size() > 1)
      return "unavailable due to multiple parents";
    if (StParents.size() == 0)
      break;
    St = StParents.begin()->get<Stmt>();
    if (St)
      SS << " ==> ";
  } while (St);
  return SS.str();
}
} // namespace
#endif /* NDEBUG */

namespace clang::ast_matchers {
// A `RecursiveASTVisitor` that traverses all descendants of a given node "n"
// except for those belonging to a different callable of "n".
class MatchDescendantVisitor
    : public RecursiveASTVisitor<MatchDescendantVisitor> {};

// Because we're dealing with raw pointers, let's define what we mean by that.
static auto hasPointerType() {}

static auto hasArrayType() {}

AST_MATCHER_P(Stmt, forEachDescendantEvaluatedStmt, internal::Matcher<Stmt>,
              innerMatcher) {}

AST_MATCHER_P(Stmt, forEachDescendantStmt, internal::Matcher<Stmt>,
              innerMatcher) {}

// Matches a `Stmt` node iff the node is in a safe-buffer opt-out region
AST_MATCHER_P(Stmt, notInSafeBufferOptOut, const UnsafeBufferUsageHandler *,
              Handler) {}

AST_MATCHER_P(Stmt, ignoreUnsafeBufferInContainer,
              const UnsafeBufferUsageHandler *, Handler) {}

AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher<Expr>, innerMatcher) {}

// Matches a `UnaryOperator` whose operator is pre-increment:
AST_MATCHER(UnaryOperator, isPreInc) {}

// Returns a matcher that matches any expression 'e' such that `innerMatcher`
// matches 'e' and 'e' is in an Unspecified Lvalue Context.
static auto isInUnspecifiedLvalueContext(internal::Matcher<Expr> innerMatcher) {}

// Returns a matcher that matches any expression `e` such that `InnerMatcher`
// matches `e` and `e` is in an Unspecified Pointer Context (UPC).
static internal::Matcher<Stmt>
isInUnspecifiedPointerContext(internal::Matcher<Stmt> InnerMatcher) {}

// Returns a matcher that matches any expression 'e' such that `innerMatcher`
// matches 'e' and 'e' is in an unspecified untyped context (i.e the expression
// 'e' isn't evaluated to an RValue). For example, consider the following code:
//    int *p = new int[4];
//    int *q = new int[4];
//    if ((p = q)) {}
//    p = q;
// The expression `p = q` in the conditional of the `if` statement
// `if ((p = q))` is evaluated as an RValue, whereas the expression `p = q;`
// in the assignment statement is in an untyped context.
static internal::Matcher<Stmt>
isInUnspecifiedUntypedContext(internal::Matcher<Stmt> InnerMatcher) {}

// Given a two-param std::span construct call, matches iff the call has the
// following forms:
//   1. `std::span<T>{new T[n], n}`, where `n` is a literal or a DRE
//   2. `std::span<T>{new T, 1}`
//   3. `std::span<T>{&var, 1}`
//   4. `std::span<T>{a, n}`, where `a` is of an array-of-T with constant size
//   `n`
//   5. `std::span<T>{any, 0}`
AST_MATCHER(CXXConstructExpr, isSafeSpanTwoParamConstruct) {}

AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {}

} // namespace clang::ast_matchers

namespace {
// Because the analysis revolves around variables and their types, we'll need to
// track uses of variables (aka DeclRefExprs).
DeclUseList;

// Convenience typedef.
FixItList;
} // namespace

namespace {
/// Gadget is an individual operation in the code that may be of interest to
/// this analysis. Each (non-abstract) subclass corresponds to a specific
/// rigid AST structure that constitutes an operation on a pointer-type object.
/// Discovery of a gadget in the code corresponds to claiming that we understand
/// what this part of code is doing well enough to potentially improve it.
/// Gadgets can be warning (immediately deserving a warning) or fixable (not
/// always deserving a warning per se, but requires our attention to identify
/// it warrants a fixit).
class Gadget {};

/// Warning gadgets correspond to unsafe code patterns that warrants
/// an immediate warning.
class WarningGadget : public Gadget {};

/// Fixable gadgets correspond to code patterns that aren't always unsafe but
/// need to be properly recognized in order to emit fixes. For example, if a raw
/// pointer-type variable is replaced by a safe C++ container, every use of such
/// variable must be carefully considered and possibly updated.
class FixableGadget : public Gadget {};

static auto toSupportedVariable() {}

FixableGadgetList;
WarningGadgetList;

/// An increment of a pointer-type value is unsafe as it may run the pointer
/// out of bounds.
class IncrementGadget : public WarningGadget {};

/// A decrement of a pointer-type value is unsafe as it may run the pointer
/// out of bounds.
class DecrementGadget : public WarningGadget {};

/// Array subscript expressions on raw pointers as if they're arrays. Unsafe as
/// it doesn't have any bounds checks for the array.
class ArraySubscriptGadget : public WarningGadget {};

/// A pointer arithmetic expression of one of the forms:
///  \code
///  ptr + n | n + ptr | ptr - n | ptr += n | ptr -= n
///  \endcode
class PointerArithmeticGadget : public WarningGadget {};

class SpanTwoParamConstructorGadget : public WarningGadget {};

/// A pointer initialization expression of the form:
///  \code
///  int *p = q;
///  \endcode
class PointerInitGadget : public FixableGadget {};

/// A pointer assignment expression of the form:
///  \code
///  p = q;
///  \endcode
/// where both `p` and `q` are pointers.
class PtrToPtrAssignmentGadget : public FixableGadget {};

/// An assignment expression of the form:
///  \code
///  ptr = array;
///  \endcode
/// where `p` is a pointer and `array` is a constant size array.
class CArrayToPtrAssignmentGadget : public FixableGadget {};

/// A call of a function or method that performs unchecked buffer operations
/// over one of its pointer parameters.
class UnsafeBufferUsageAttrGadget : public WarningGadget {};

/// A call of a constructor that performs unchecked buffer operations
/// over one of its pointer parameters, or constructs a class object that will
/// perform buffer operations that depend on the correctness of the parameters.
class UnsafeBufferUsageCtorAttrGadget : public WarningGadget {};

// Warning gadget for unsafe invocation of span::data method.
// Triggers when the pointer returned by the invocation is immediately
// cast to a larger type.

class DataInvocationGadget : public WarningGadget {};

// Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
// Context (see `isInUnspecifiedLvalueContext`).
// Note here `[]` is the built-in subscript operator.
class ULCArraySubscriptGadget : public FixableGadget {};

// Fixable gadget to handle stand alone pointers of the form `UPC(DRE)` in the
// unspecified pointer context (isInUnspecifiedPointerContext). The gadget emits
// fixit of the form `UPC(DRE.data())`.
class UPCStandalonePointerGadget : public FixableGadget {};

class PointerDereferenceGadget : public FixableGadget {};

// Represents expressions of the form `&DRE[any]` in the Unspecified Pointer
// Context (see `isInUnspecifiedPointerContext`).
// Note here `[]` is the built-in subscript operator.
class UPCAddressofArraySubscriptGadget : public FixableGadget {};
} // namespace

namespace {
// An auxiliary tracking facility for the fixit analysis. It helps connect
// declarations to its uses and make sure we've covered all uses with our
// analysis before we try to fix the declaration.
class DeclUseTracker {};
} // namespace

// Representing a pointer type expression of the form `++Ptr` in an Unspecified
// Pointer Context (UPC):
class UPCPreIncrementGadget : public FixableGadget {};

// Representing a pointer type expression of the form `Ptr += n` in an
// Unspecified Untyped Context (UUC):
class UUCAddAssignGadget : public FixableGadget {};

// Representing a fixable expression of the form `*(ptr + 123)` or `*(123 +
// ptr)`:
class DerefSimplePtrArithFixableGadget : public FixableGadget {};

/// Scan the function and return a list of gadgets found with provided kits.
static std::tuple<FixableGadgetList, WarningGadgetList, DeclUseTracker>
findGadgets(const Decl *D, const UnsafeBufferUsageHandler &Handler,
            bool EmitSuggestions) {}

// Compares AST nodes by source locations.
template <typename NodeTy> struct CompareNode {};

struct WarningGadgetSets {};

static WarningGadgetSets
groupWarningGadgetsByVar(const WarningGadgetList &AllUnsafeOperations) {}

struct FixableGadgetSets {};

static FixableGadgetSets
groupFixablesByVar(FixableGadgetList &&AllFixableOperations) {}

bool clang::internal::anyConflict(const SmallVectorImpl<FixItHint> &FixIts,
                                  const SourceManager &SM) {}

std::optional<FixItList>
PtrToPtrAssignmentGadget::getFixits(const FixitStrategy &S) const {}

/// \returns fixit that adds .data() call after \DRE.
static inline std::optional<FixItList> createDataFixit(const ASTContext &Ctx,
                                                       const DeclRefExpr *DRE);

std::optional<FixItList>
CArrayToPtrAssignmentGadget::getFixits(const FixitStrategy &S) const {}

std::optional<FixItList>
PointerInitGadget::getFixits(const FixitStrategy &S) const {}

static bool isNonNegativeIntegerExpr(const Expr *Expr, const VarDecl *VD,
                                     const ASTContext &Ctx) {}

std::optional<FixItList>
ULCArraySubscriptGadget::getFixits(const FixitStrategy &S) const {}

static std::optional<FixItList> // forward declaration
fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node);

std::optional<FixItList>
UPCAddressofArraySubscriptGadget::getFixits(const FixitStrategy &S) const {}

// FIXME: this function should be customizable through format
static StringRef getEndOfLine() {}

// Returns the text indicating that the user needs to provide input there:
std::string getUserFillPlaceHolder(StringRef HintTextToUser = "placeholder") {}

// Return the source location of the last character of the AST `Node`.
template <typename NodeTy>
static std::optional<SourceLocation>
getEndCharLoc(const NodeTy *Node, const SourceManager &SM,
              const LangOptions &LangOpts) {}

// Return the source location just past the last character of the AST `Node`.
template <typename NodeTy>
static std::optional<SourceLocation> getPastLoc(const NodeTy *Node,
                                                const SourceManager &SM,
                                                const LangOptions &LangOpts) {}

// Return text representation of an `Expr`.
static std::optional<StringRef> getExprText(const Expr *E,
                                            const SourceManager &SM,
                                            const LangOptions &LangOpts) {}

// Returns the literal text in `SourceRange SR`, if `SR` is a valid range.
static std::optional<StringRef> getRangeText(SourceRange SR,
                                             const SourceManager &SM,
                                             const LangOptions &LangOpts) {}

// Returns the begin location of the identifier of the given variable
// declaration.
static SourceLocation getVarDeclIdentifierLoc(const VarDecl *VD) {}

// Returns the literal text of the identifier of the given variable declaration.
static std::optional<StringRef>
getVarDeclIdentifierText(const VarDecl *VD, const SourceManager &SM,
                         const LangOptions &LangOpts) {}

// We cannot fix a variable declaration if it has some other specifiers than the
// type specifier.  Because the source ranges of those specifiers could overlap
// with the source range that is being replaced using fix-its.  Especially when
// we often cannot obtain accurate source ranges of cv-qualified type
// specifiers.
// FIXME: also deal with type attributes
static bool hasUnsupportedSpecifiers(const VarDecl *VD,
                                     const SourceManager &SM) {}

// Returns the `SourceRange` of `D`.  The reason why this function exists is
// that `D->getSourceRange()` may return a range where the end location is the
// starting location of the last token.  The end location of the source range
// returned by this function is the last location of the last token.
static SourceRange getSourceRangeToTokenEnd(const Decl *D,
                                            const SourceManager &SM,
                                            const LangOptions &LangOpts) {}

// Returns the text of the pointee type of `T` from a `VarDecl` of a pointer
// type. The text is obtained through from `TypeLoc`s.  Since `TypeLoc` does not
// have source ranges of qualifiers ( The `QualifiedTypeLoc` looks hacky too me
// :( ), `Qualifiers` of the pointee type is returned separately through the
// output parameter `QualifiersToAppend`.
static std::optional<std::string>
getPointeeTypeText(const VarDecl *VD, const SourceManager &SM,
                   const LangOptions &LangOpts,
                   std::optional<Qualifiers> *QualifiersToAppend) {}

// Returns the text of the name (with qualifiers) of a `FunctionDecl`.
static std::optional<StringRef> getFunNameText(const FunctionDecl *FD,
                                               const SourceManager &SM,
                                               const LangOptions &LangOpts) {}

// Returns the text representing a `std::span` type where the element type is
// represented by `EltTyText`.
//
// Note the optional parameter `Qualifiers`: one needs to pass qualifiers
// explicitly if the element type needs to be qualified.
static std::string
getSpanTypeText(StringRef EltTyText,
                std::optional<Qualifiers> Quals = std::nullopt) {}

std::optional<FixItList>
DerefSimplePtrArithFixableGadget::getFixits(const FixitStrategy &s) const {}

std::optional<FixItList>
PointerDereferenceGadget::getFixits(const FixitStrategy &S) const {}

static inline std::optional<FixItList> createDataFixit(const ASTContext &Ctx,
                                                       const DeclRefExpr *DRE) {}

// Generates fix-its replacing an expression of the form UPC(DRE) with
// `DRE.data()`
std::optional<FixItList>
UPCStandalonePointerGadget::getFixits(const FixitStrategy &S) const {}

// Generates fix-its replacing an expression of the form `&DRE[e]` with
// `&DRE.data()[e]`:
static std::optional<FixItList>
fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node) {}

std::optional<FixItList>
UUCAddAssignGadget::getFixits(const FixitStrategy &S) const {}

std::optional<FixItList>
UPCPreIncrementGadget::getFixits(const FixitStrategy &S) const {}

// For a non-null initializer `Init` of `T *` type, this function returns
// `FixItHint`s producing a list initializer `{Init,  S}` as a part of a fix-it
// to output stream.
// In many cases, this function cannot figure out the actual extent `S`.  It
// then will use a place holder to replace `S` to ask users to fill `S` in.  The
// initializer shall be used to initialize a variable of type `std::span<T>`.
// In some cases (e. g. constant size array) the initializer should remain
// unchanged and the function returns empty list. In case the function can't
// provide the right fixit it will return nullopt.
//
// FIXME: Support multi-level pointers
//
// Parameters:
//   `Init` a pointer to the initializer expression
//   `Ctx` a reference to the ASTContext
static std::optional<FixItList>
FixVarInitializerWithSpan(const Expr *Init, ASTContext &Ctx,
                          const StringRef UserFillPlaceHolder) {}

#ifndef NDEBUG
#define DEBUG_NOTE_DECL_FAIL
#else
#define DEBUG_NOTE_DECL_FAIL(D, Msg)
#endif

// For the given variable declaration with a pointer-to-T type, returns the text
// `std::span<T>`.  If it is unable to generate the text, returns
// `std::nullopt`.
static std::optional<std::string>
createSpanTypeForVarDecl(const VarDecl *VD, const ASTContext &Ctx) {}

// For a `VarDecl` of the form `T  * var (= Init)?`, this
// function generates fix-its that
//  1) replace `T * var` with `std::span<T> var`; and
//  2) change `Init` accordingly to a span constructor, if it exists.
//
// FIXME: support Multi-level pointers
//
// Parameters:
//   `D` a pointer the variable declaration node
//   `Ctx` a reference to the ASTContext
//   `UserFillPlaceHolder` the user-input placeholder text
// Returns:
//    the non-empty fix-it list, if fix-its are successfuly generated; empty
//    list otherwise.
static FixItList fixLocalVarDeclWithSpan(const VarDecl *D, ASTContext &Ctx,
                                         const StringRef UserFillPlaceHolder,
                                         UnsafeBufferUsageHandler &Handler) {}

static bool hasConflictingOverload(const FunctionDecl *FD) {}

// For a `FunctionDecl`, whose `ParmVarDecl`s are being changed to have new
// types, this function produces fix-its to make the change self-contained.  Let
// 'F' be the entity defined by the original `FunctionDecl` and "NewF" be the
// entity defined by the `FunctionDecl` after the change to the parameters.
// Fix-its produced by this function are
//   1. Add the `[[clang::unsafe_buffer_usage]]` attribute to each declaration
//   of 'F';
//   2. Create a declaration of "NewF" next to each declaration of `F`;
//   3. Create a definition of "F" (as its' original definition is now belongs
//      to "NewF") next to its original definition.  The body of the creating
//      definition calls to "NewF".
//
// Example:
//
// void f(int *p);  // original declaration
// void f(int *p) { // original definition
//    p[5];
// }
//
// To change the parameter `p` to be of `std::span<int>` type, we
// also add overloads:
//
// [[clang::unsafe_buffer_usage]] void f(int *p); // original decl
// void f(std::span<int> p);                      // added overload decl
// void f(std::span<int> p) {     // original def where param is changed
//    p[5];
// }
// [[clang::unsafe_buffer_usage]] void f(int *p) {  // added def
//   return f(std::span(p, <# size #>));
// }
//
static std::optional<FixItList>
createOverloadsForFixedParams(const FixitStrategy &S, const FunctionDecl *FD,
                              const ASTContext &Ctx,
                              UnsafeBufferUsageHandler &Handler) {}

// To fix a `ParmVarDecl` to be of `std::span` type.
static FixItList fixParamWithSpan(const ParmVarDecl *PVD, const ASTContext &Ctx,
                                  UnsafeBufferUsageHandler &Handler) {}

static FixItList fixVariableWithSpan(const VarDecl *VD,
                                     const DeclUseTracker &Tracker,
                                     ASTContext &Ctx,
                                     UnsafeBufferUsageHandler &Handler) {}

static FixItList fixVarDeclWithArray(const VarDecl *D, const ASTContext &Ctx,
                                     UnsafeBufferUsageHandler &Handler) {}

static FixItList fixVariableWithArray(const VarDecl *VD,
                                      const DeclUseTracker &Tracker,
                                      const ASTContext &Ctx,
                                      UnsafeBufferUsageHandler &Handler) {}

// TODO: we should be consistent to use `std::nullopt` to represent no-fix due
// to any unexpected problem.
static FixItList
fixVariable(const VarDecl *VD, FixitStrategy::Kind K,
            /* The function decl under analysis */ const Decl *D,
            const DeclUseTracker &Tracker, ASTContext &Ctx,
            UnsafeBufferUsageHandler &Handler) {}

// Returns true iff there exists a `FixItHint` 'h' in `FixIts` such that the
// `RemoveRange` of 'h' overlaps with a macro use.
static bool overlapWithMacro(const FixItList &FixIts) {}

// Returns true iff `VD` is a parameter of the declaration `D`:
static bool isParameterOf(const VarDecl *VD, const Decl *D) {}

// Erases variables in `FixItsForVariable`, if such a variable has an unfixable
// group mate.  A variable `v` is unfixable iff `FixItsForVariable` does not
// contain `v`.
static void eraseVarsForUnfixableGroupMates(
    std::map<const VarDecl *, FixItList> &FixItsForVariable,
    const VariableGroupsManager &VarGrpMgr) {}

// Returns the fix-its that create bounds-safe function overloads for the
// function `D`, if `D`'s parameters will be changed to safe-types through
// fix-its in `FixItsForVariable`.
//
// NOTE: In case `D`'s parameters will be changed but bounds-safe function
// overloads cannot created, the whole group that contains the parameters will
// be erased from `FixItsForVariable`.
static FixItList createFunctionOverloadsForParms(
    std::map<const VarDecl *, FixItList> &FixItsForVariable /* mutable */,
    const VariableGroupsManager &VarGrpMgr, const FunctionDecl *FD,
    const FixitStrategy &S, ASTContext &Ctx,
    UnsafeBufferUsageHandler &Handler) {}

// Constructs self-contained fix-its for each variable in `FixablesForAllVars`.
static std::map<const VarDecl *, FixItList>
getFixIts(FixableGadgetSets &FixablesForAllVars, const FixitStrategy &S,
          ASTContext &Ctx,
          /* The function decl under analysis */ const Decl *D,
          const DeclUseTracker &Tracker, UnsafeBufferUsageHandler &Handler,
          const VariableGroupsManager &VarGrpMgr) {}

template <typename VarDeclIterTy>
static FixitStrategy
getNaiveStrategy(llvm::iterator_range<VarDeclIterTy> UnsafeVars) {}

//  Manages variable groups:
class VariableGroupsManagerImpl : public VariableGroupsManager {};

void clang::checkUnsafeBufferUsage(const Decl *D,
                                   UnsafeBufferUsageHandler &Handler,
                                   bool EmitSuggestions) {}