llvm/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp

//===- DataFlowSanitizer.cpp - dynamic data flow analysis -----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// This file is a part of DataFlowSanitizer, a generalised dynamic data flow
/// analysis.
///
/// Unlike other Sanitizer tools, this tool is not designed to detect a specific
/// class of bugs on its own.  Instead, it provides a generic dynamic data flow
/// analysis framework to be used by clients to help detect application-specific
/// issues within their own code.
///
/// The analysis is based on automatic propagation of data flow labels (also
/// known as taint labels) through a program as it performs computation.
///
/// Argument and return value labels are passed through TLS variables
/// __dfsan_arg_tls and __dfsan_retval_tls.
///
/// Each byte of application memory is backed by a shadow memory byte. The
/// shadow byte can represent up to 8 labels. On Linux/x86_64, memory is then
/// laid out as follows:
///
/// +--------------------+ 0x800000000000 (top of memory)
/// |    application 3   |
/// +--------------------+ 0x700000000000
/// |      invalid       |
/// +--------------------+ 0x610000000000
/// |      origin 1      |
/// +--------------------+ 0x600000000000
/// |    application 2   |
/// +--------------------+ 0x510000000000
/// |      shadow 1      |
/// +--------------------+ 0x500000000000
/// |      invalid       |
/// +--------------------+ 0x400000000000
/// |      origin 3      |
/// +--------------------+ 0x300000000000
/// |      shadow 3      |
/// +--------------------+ 0x200000000000
/// |      origin 2      |
/// +--------------------+ 0x110000000000
/// |      invalid       |
/// +--------------------+ 0x100000000000
/// |      shadow 2      |
/// +--------------------+ 0x010000000000
/// |    application 1   |
/// +--------------------+ 0x000000000000
///
/// MEM_TO_SHADOW(mem) = mem ^ 0x500000000000
/// SHADOW_TO_ORIGIN(shadow) = shadow + 0x100000000000
///
/// For more information, please refer to the design document:
/// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Instrumentation/DataFlowSanitizer.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/iterator.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SpecialCaseList.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Instrumentation.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <set>
#include <string>
#include <utility>
#include <vector>

usingnamespacellvm;

// This must be consistent with ShadowWidthBits.
static const Align ShadowTLSAlignment =;

static const Align MinOriginAlignment =;

// The size of TLS variables. These constants must be kept in sync with the ones
// in dfsan.cpp.
static const unsigned ArgTLSSize =;
static const unsigned RetvalTLSSize =;

// The -dfsan-preserve-alignment flag controls whether this pass assumes that
// alignment requirements provided by the input IR are correct.  For example,
// if the input IR contains a load with alignment 8, this flag will cause
// the shadow load to have alignment 16.  This flag is disabled by default as
// we have unfortunately encountered too much code (including Clang itself;
// see PR14291) which performs misaligned access.
static cl::opt<bool> ClPreserveAlignment(
    "dfsan-preserve-alignment",
    cl::desc("respect alignment requirements provided by input IR"), cl::Hidden,
    cl::init(false));

// The ABI list files control how shadow parameters are passed. The pass treats
// every function labelled "uninstrumented" in the ABI list file as conforming
// to the "native" (i.e. unsanitized) ABI.  Unless the ABI list contains
// additional annotations for those functions, a call to one of those functions
// will produce a warning message, as the labelling behaviour of the function is
// unknown. The other supported annotations for uninstrumented functions are
// "functional" and "discard", which are described below under
// DataFlowSanitizer::WrapperKind.
// Functions will often be labelled with both "uninstrumented" and one of
// "functional" or "discard". This will leave the function unchanged by this
// pass, and create a wrapper function that will call the original.
//
// Instrumented functions can also be annotated as "force_zero_labels", which
// will make all shadow and return values set zero labels.
// Functions should never be labelled with both "force_zero_labels" and
// "uninstrumented" or any of the unistrumented wrapper kinds.
static cl::list<std::string> ClABIListFiles(
    "dfsan-abilist",
    cl::desc("File listing native ABI functions and how the pass treats them"),
    cl::Hidden);

// Controls whether the pass includes or ignores the labels of pointers in load
// instructions.
static cl::opt<bool> ClCombinePointerLabelsOnLoad(
    "dfsan-combine-pointer-labels-on-load",
    cl::desc("Combine the label of the pointer with the label of the data when "
             "loading from memory."),
    cl::Hidden, cl::init(true));

// Controls whether the pass includes or ignores the labels of pointers in
// stores instructions.
static cl::opt<bool> ClCombinePointerLabelsOnStore(
    "dfsan-combine-pointer-labels-on-store",
    cl::desc("Combine the label of the pointer with the label of the data when "
             "storing in memory."),
    cl::Hidden, cl::init(false));

// Controls whether the pass propagates labels of offsets in GEP instructions.
static cl::opt<bool> ClCombineOffsetLabelsOnGEP(
    "dfsan-combine-offset-labels-on-gep",
    cl::desc(
        "Combine the label of the offset with the label of the pointer when "
        "doing pointer arithmetic."),
    cl::Hidden, cl::init(true));

static cl::list<std::string> ClCombineTaintLookupTables(
    "dfsan-combine-taint-lookup-table",
    cl::desc(
        "When dfsan-combine-offset-labels-on-gep and/or "
        "dfsan-combine-pointer-labels-on-load are false, this flag can "
        "be used to re-enable combining offset and/or pointer taint when "
        "loading specific constant global variables (i.e. lookup tables)."),
    cl::Hidden);

static cl::opt<bool> ClDebugNonzeroLabels(
    "dfsan-debug-nonzero-labels",
    cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, "
             "load or return with a nonzero label"),
    cl::Hidden);

// Experimental feature that inserts callbacks for certain data events.
// Currently callbacks are only inserted for loads, stores, memory transfers
// (i.e. memcpy and memmove), and comparisons.
//
// If this flag is set to true, the user must provide definitions for the
// following callback functions:
//   void __dfsan_load_callback(dfsan_label Label, void* addr);
//   void __dfsan_store_callback(dfsan_label Label, void* addr);
//   void __dfsan_mem_transfer_callback(dfsan_label *Start, size_t Len);
//   void __dfsan_cmp_callback(dfsan_label CombinedLabel);
static cl::opt<bool> ClEventCallbacks(
    "dfsan-event-callbacks",
    cl::desc("Insert calls to __dfsan_*_callback functions on data events."),
    cl::Hidden, cl::init(false));

// Experimental feature that inserts callbacks for conditionals, including:
// conditional branch, switch, select.
// This must be true for dfsan_set_conditional_callback() to have effect.
static cl::opt<bool> ClConditionalCallbacks(
    "dfsan-conditional-callbacks",
    cl::desc("Insert calls to callback functions on conditionals."), cl::Hidden,
    cl::init(false));

// Experimental feature that inserts callbacks for data reaching a function,
// either via function arguments and loads.
// This must be true for dfsan_set_reaches_function_callback() to have effect.
static cl::opt<bool> ClReachesFunctionCallbacks(
    "dfsan-reaches-function-callbacks",
    cl::desc("Insert calls to callback functions on data reaching a function."),
    cl::Hidden, cl::init(false));

// Controls whether the pass tracks the control flow of select instructions.
static cl::opt<bool> ClTrackSelectControlFlow(
    "dfsan-track-select-control-flow",
    cl::desc("Propagate labels from condition values of select instructions "
             "to results."),
    cl::Hidden, cl::init(true));

// TODO: This default value follows MSan. DFSan may use a different value.
static cl::opt<int> ClInstrumentWithCallThreshold(
    "dfsan-instrument-with-call-threshold",
    cl::desc("If the function being instrumented requires more than "
             "this number of origin stores, use callbacks instead of "
             "inline checks (-1 means never use callbacks)."),
    cl::Hidden, cl::init(3500));

// Controls how to track origins.
// * 0: do not track origins.
// * 1: track origins at memory store operations.
// * 2: track origins at memory load and store operations.
//      TODO: track callsites.
static cl::opt<int> ClTrackOrigins("dfsan-track-origins",
                                   cl::desc("Track origins of labels"),
                                   cl::Hidden, cl::init(0));

static cl::opt<bool> ClIgnorePersonalityRoutine(
    "dfsan-ignore-personality-routine",
    cl::desc("If a personality routine is marked uninstrumented from the ABI "
             "list, do not create a wrapper for it."),
    cl::Hidden, cl::init(false));

static StringRef getGlobalTypeString(const GlobalValue &G) {}

namespace {

// Memory map parameters used in application-to-shadow address calculation.
// Offset = (Addr & ~AndMask) ^ XorMask
// Shadow = ShadowBase + Offset
// Origin = (OriginBase + Offset) & ~3ULL
struct MemoryMapParams {};

} // end anonymous namespace

// NOLINTBEGIN(readability-identifier-naming)
// aarch64 Linux
const MemoryMapParams Linux_AArch64_MemoryMapParams =;

// x86_64 Linux
const MemoryMapParams Linux_X86_64_MemoryMapParams =;
// NOLINTEND(readability-identifier-naming)

// loongarch64 Linux
const MemoryMapParams Linux_LoongArch64_MemoryMapParams =;

namespace {

class DFSanABIList {};

/// TransformedFunction is used to express the result of transforming one
/// function type into another.  This struct is immutable.  It holds metadata
/// useful for updating calls of the old function to the new type.
struct TransformedFunction {};

/// Given function attributes from a call site for the original function,
/// return function attributes appropriate for a call to the transformed
/// function.
AttributeList
transformFunctionAttributes(const TransformedFunction &TransformedFunction,
                            LLVMContext &Ctx, AttributeList CallSiteAttrs) {}

class DataFlowSanitizer {};

struct DFSanFunction {};

class DFSanVisitor : public InstVisitor<DFSanVisitor> {};

bool LibAtomicFunction(const Function &F) {}

} // end anonymous namespace

DataFlowSanitizer::DataFlowSanitizer(
    const std::vector<std::string> &ABIListFiles) {}

TransformedFunction DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {}

bool DataFlowSanitizer::isZeroShadow(Value *V) {}

bool DataFlowSanitizer::hasLoadSizeForFastPath(uint64_t Size) {}

bool DataFlowSanitizer::shouldTrackOrigins() {}

Constant *DataFlowSanitizer::getZeroShadow(Type *OrigTy) {}

Constant *DataFlowSanitizer::getZeroShadow(Value *V) {}

static Value *expandFromPrimitiveShadowRecursive(
    Value *Shadow, SmallVector<unsigned, 4> &Indices, Type *SubShadowTy,
    Value *PrimitiveShadow, IRBuilder<> &IRB) {}

bool DFSanFunction::shouldInstrumentWithCall() {}

Value *DFSanFunction::expandFromPrimitiveShadow(Type *T, Value *PrimitiveShadow,
                                                BasicBlock::iterator Pos) {}

template <class AggregateType>
Value *DFSanFunction::collapseAggregateShadow(AggregateType *AT, Value *Shadow,
                                              IRBuilder<> &IRB) {}

Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,
                                                IRBuilder<> &IRB) {}

Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,
                                                BasicBlock::iterator Pos) {}

void DFSanFunction::addConditionalCallbacksIfEnabled(Instruction &I,
                                                     Value *Condition) {}

void DFSanFunction::addReachesFunctionCallbacksIfEnabled(IRBuilder<> &IRB,
                                                         Instruction &I,
                                                         Value *Data) {}

Type *DataFlowSanitizer::getShadowTy(Type *OrigTy) {}

Type *DataFlowSanitizer::getShadowTy(Value *V) {}

bool DataFlowSanitizer::initializeModule(Module &M) {}

bool DataFlowSanitizer::isInstrumented(const Function *F) {}

bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) {}

bool DataFlowSanitizer::isForceZeroLabels(const Function *F) {}

DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) {}

void DataFlowSanitizer::addGlobalNameSuffix(GlobalValue *GV) {}

void DataFlowSanitizer::buildExternWeakCheckIfNeeded(IRBuilder<> &IRB,
                                                     Function *F) {}

Function *
DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
                                        GlobalValue::LinkageTypes NewFLink,
                                        FunctionType *NewFT) {}

// Initialize DataFlowSanitizer runtime functions and declare them in the module
void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {}

// Initializes event callback functions and declare them in the module
void DataFlowSanitizer::initializeCallbackFunctions(Module &M) {}

bool DataFlowSanitizer::runImpl(
    Module &M, llvm::function_ref<TargetLibraryInfo &(Function &)> GetTLI) {}

Value *DFSanFunction::getArgTLS(Type *T, unsigned ArgOffset, IRBuilder<> &IRB) {}

Value *DFSanFunction::getRetvalTLS(Type *T, IRBuilder<> &IRB) {}

Value *DFSanFunction::getRetvalOriginTLS() {}

Value *DFSanFunction::getArgOriginTLS(unsigned ArgNo, IRBuilder<> &IRB) {}

Value *DFSanFunction::getOrigin(Value *V) {}

void DFSanFunction::setOrigin(Instruction *I, Value *Origin) {}

Value *DFSanFunction::getShadowForTLSArgument(Argument *A) {}

Value *DFSanFunction::getShadow(Value *V) {}

void DFSanFunction::setShadow(Instruction *I, Value *Shadow) {}

/// Compute the integer shadow offset that corresponds to a given
/// application address.
///
/// Offset = (Addr & ~AndMask) ^ XorMask
Value *DataFlowSanitizer::getShadowOffset(Value *Addr, IRBuilder<> &IRB) {}

std::pair<Value *, Value *>
DataFlowSanitizer::getShadowOriginAddress(Value *Addr, Align InstAlignment,
                                          BasicBlock::iterator Pos) {}

Value *DataFlowSanitizer::getShadowAddress(Value *Addr,
                                           BasicBlock::iterator Pos,
                                           Value *ShadowOffset) {}

Value *DataFlowSanitizer::getShadowAddress(Value *Addr,
                                           BasicBlock::iterator Pos) {}

Value *DFSanFunction::combineShadowsThenConvert(Type *T, Value *V1, Value *V2,
                                                BasicBlock::iterator Pos) {}

// Generates IR to compute the union of the two given shadows, inserting it
// before Pos. The combined value is with primitive type.
Value *DFSanFunction::combineShadows(Value *V1, Value *V2,
                                     BasicBlock::iterator Pos) {}

// A convenience function which folds the shadows of each of the operands
// of the provided instruction Inst, inserting the IR before Inst.  Returns
// the computed union Value.
Value *DFSanFunction::combineOperandShadows(Instruction *Inst) {}

void DFSanVisitor::visitInstOperands(Instruction &I) {}

Value *DFSanFunction::combineOrigins(const std::vector<Value *> &Shadows,
                                     const std::vector<Value *> &Origins,
                                     BasicBlock::iterator Pos,
                                     ConstantInt *Zero) {}

Value *DFSanFunction::combineOperandOrigins(Instruction *Inst) {}

void DFSanVisitor::visitInstOperandOrigins(Instruction &I) {}

Align DFSanFunction::getShadowAlign(Align InstAlignment) {}

Align DFSanFunction::getOriginAlign(Align InstAlignment) {}

bool DFSanFunction::isLookupTableConstant(Value *P) {}

bool DFSanFunction::useCallbackLoadLabelAndOrigin(uint64_t Size,
                                                  Align InstAlignment) {}

Value *DataFlowSanitizer::loadNextOrigin(BasicBlock::iterator Pos,
                                         Align OriginAlign,
                                         Value **OriginAddr) {}

std::pair<Value *, Value *> DFSanFunction::loadShadowFast(
    Value *ShadowAddr, Value *OriginAddr, uint64_t Size, Align ShadowAlign,
    Align OriginAlign, Value *FirstOrigin, BasicBlock::iterator Pos) {}

std::pair<Value *, Value *> DFSanFunction::loadShadowOriginSansLoadTracking(
    Value *Addr, uint64_t Size, Align InstAlignment, BasicBlock::iterator Pos) {}

std::pair<Value *, Value *>
DFSanFunction::loadShadowOrigin(Value *Addr, uint64_t Size, Align InstAlignment,
                                BasicBlock::iterator Pos) {}

static AtomicOrdering addAcquireOrdering(AtomicOrdering AO) {}

Value *StripPointerGEPsAndCasts(Value *V) {}

void DFSanVisitor::visitLoadInst(LoadInst &LI) {}

Value *DFSanFunction::updateOriginIfTainted(Value *Shadow, Value *Origin,
                                            IRBuilder<> &IRB) {}

Value *DFSanFunction::updateOrigin(Value *V, IRBuilder<> &IRB) {}

Value *DFSanFunction::originToIntptr(IRBuilder<> &IRB, Value *Origin) {}

void DFSanFunction::paintOrigin(IRBuilder<> &IRB, Value *Origin,
                                Value *StoreOriginAddr,
                                uint64_t StoreOriginSize, Align Alignment) {}

Value *DFSanFunction::convertToBool(Value *V, IRBuilder<> &IRB,
                                    const Twine &Name) {}

void DFSanFunction::storeOrigin(BasicBlock::iterator Pos, Value *Addr,
                                uint64_t Size, Value *Shadow, Value *Origin,
                                Value *StoreOriginAddr, Align InstAlignment) {}

void DFSanFunction::storeZeroPrimitiveShadow(Value *Addr, uint64_t Size,
                                             Align ShadowAlign,
                                             BasicBlock::iterator Pos) {}

void DFSanFunction::storePrimitiveShadowOrigin(Value *Addr, uint64_t Size,
                                               Align InstAlignment,
                                               Value *PrimitiveShadow,
                                               Value *Origin,
                                               BasicBlock::iterator Pos) {}

static AtomicOrdering addReleaseOrdering(AtomicOrdering AO) {}

void DFSanVisitor::visitStoreInst(StoreInst &SI) {}

void DFSanVisitor::visitCASOrRMW(Align InstAlignment, Instruction &I) {}

void DFSanVisitor::visitAtomicRMWInst(AtomicRMWInst &I) {}

void DFSanVisitor::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {}

void DFSanVisitor::visitUnaryOperator(UnaryOperator &UO) {}

void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) {}

void DFSanVisitor::visitBitCastInst(BitCastInst &BCI) {}

void DFSanVisitor::visitCastInst(CastInst &CI) {}

void DFSanVisitor::visitCmpInst(CmpInst &CI) {}

void DFSanVisitor::visitLandingPadInst(LandingPadInst &LPI) {}

void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {}

void DFSanVisitor::visitExtractElementInst(ExtractElementInst &I) {}

void DFSanVisitor::visitInsertElementInst(InsertElementInst &I) {}

void DFSanVisitor::visitShuffleVectorInst(ShuffleVectorInst &I) {}

void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) {}

void DFSanVisitor::visitInsertValueInst(InsertValueInst &I) {}

void DFSanVisitor::visitAllocaInst(AllocaInst &I) {}

void DFSanVisitor::visitSelectInst(SelectInst &I) {}

void DFSanVisitor::visitMemSetInst(MemSetInst &I) {}

void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {}

void DFSanVisitor::visitBranchInst(BranchInst &BR) {}

void DFSanVisitor::visitSwitchInst(SwitchInst &SW) {}

static bool isAMustTailRetVal(Value *RetVal) {}

void DFSanVisitor::visitReturnInst(ReturnInst &RI) {}

void DFSanVisitor::addShadowArguments(Function &F, CallBase &CB,
                                      std::vector<Value *> &Args,
                                      IRBuilder<> &IRB) {}

void DFSanVisitor::addOriginArguments(Function &F, CallBase &CB,
                                      std::vector<Value *> &Args,
                                      IRBuilder<> &IRB) {}

bool DFSanVisitor::visitWrappedCallBase(Function &F, CallBase &CB) {}

Value *DFSanVisitor::makeAddAcquireOrderingTable(IRBuilder<> &IRB) {}

void DFSanVisitor::visitLibAtomicLoad(CallBase &CB) {}

Value *DFSanVisitor::makeAddReleaseOrderingTable(IRBuilder<> &IRB) {}

void DFSanVisitor::visitLibAtomicStore(CallBase &CB) {}

void DFSanVisitor::visitLibAtomicExchange(CallBase &CB) {}

void DFSanVisitor::visitLibAtomicCompareExchange(CallBase &CB) {}

void DFSanVisitor::visitCallBase(CallBase &CB) {}

void DFSanVisitor::visitPHINode(PHINode &PN) {}

PreservedAnalyses DataFlowSanitizerPass::run(Module &M,
                                             ModuleAnalysisManager &AM) {}