NumericalStabilitySanitizer.cpp | Explore in Territory

//===-- NumericalStabilitySanitizer.cpp -----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains the instrumentation pass for the numerical sanitizer.
// Conceptually the pass injects shadow computations using higher precision
// types and inserts consistency checks. For details see the paper
// https://arxiv.org/abs/2102.12782.
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h"

#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/EscapeEnumerator.h"
#include "llvm/Transforms/Utils/Instrumentation.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"

#include <cstdint>

usingnamespacellvm;

#define DEBUG_TYPE …

STATISTIC(NumInstrumentedFTLoads,
          "Number of instrumented floating-point loads");

STATISTIC(NumInstrumentedFTCalls,
          "Number of instrumented floating-point calls");
STATISTIC(NumInstrumentedFTRets,
          "Number of instrumented floating-point returns");
STATISTIC(NumInstrumentedFTStores,
          "Number of instrumented floating-point stores");
STATISTIC(NumInstrumentedNonFTStores,
          "Number of instrumented non floating-point stores");
STATISTIC(
    NumInstrumentedNonFTMemcpyStores,
    "Number of instrumented non floating-point stores with memcpy semantics");
STATISTIC(NumInstrumentedFCmp, "Number of instrumented fcmps");

// Using smaller shadow types types can help improve speed. For example, `dlq`
// is 3x slower to 5x faster in opt mode and 2-6x faster in dbg mode compared to
// `dqq`.
static cl::opt<std::string> ClShadowMapping(
    "nsan-shadow-type-mapping", cl::init("dqq"),
    cl::desc("One shadow type id for each of `float`, `double`, `long double`. "
             "`d`,`l`,`q`,`e` mean double, x86_fp80, fp128 (quad) and "
             "ppc_fp128 (extended double) respectively. The default is to "
             "shadow `float` as `double`, and `double` and `x86_fp80` as "
             "`fp128`"),
    cl::Hidden);

static cl::opt<bool>
    ClInstrumentFCmp("nsan-instrument-fcmp", cl::init(true),
                     cl::desc("Instrument floating-point comparisons"),
                     cl::Hidden);

static cl::opt<std::string> ClCheckFunctionsFilter(
    "check-functions-filter",
    cl::desc("Only emit checks for arguments of functions "
             "whose names match the given regular expression"),
    cl::value_desc("regex"));

static cl::opt<bool> ClTruncateFCmpEq(
    "nsan-truncate-fcmp-eq", cl::init(true),
    cl::desc(
        "This flag controls the behaviour of fcmp equality comparisons."
        "For equality comparisons such as `x == 0.0f`, we can perform the "
        "shadow check in the shadow (`x_shadow == 0.0) == (x == 0.0f)`) or app "
        " domain (`(trunc(x_shadow) == 0.0f) == (x == 0.0f)`). This helps "
        "catch the case when `x_shadow` is accurate enough (and therefore "
        "close enough to zero) so that `trunc(x_shadow)` is zero even though "
        "both `x` and `x_shadow` are not"),
    cl::Hidden);

// When there is external, uninstrumented code writing to memory, the shadow
// memory can get out of sync with the application memory. Enabling this flag
// emits consistency checks for loads to catch this situation.
// When everything is instrumented, this is not strictly necessary because any
// load should have a corresponding store, but can help debug cases when the
// framework did a bad job at tracking shadow memory modifications by failing on
// load rather than store.
// TODO: provide a way to resume computations from the FT value when the load
// is inconsistent. This ensures that further computations are not polluted.
static cl::opt<bool> ClCheckLoads("nsan-check-loads",
                                  cl::desc("Check floating-point load"),
                                  cl::Hidden);

static cl::opt<bool> ClCheckStores("nsan-check-stores", cl::init(true),
                                   cl::desc("Check floating-point stores"),
                                   cl::Hidden);

static cl::opt<bool> ClCheckRet("nsan-check-ret", cl::init(true),
                                cl::desc("Check floating-point return values"),
                                cl::Hidden);

// LLVM may store constant floats as bitcasted ints.
// It's not really necessary to shadow such stores,
// if the shadow value is unknown the framework will re-extend it on load
// anyway. Moreover, because of size collisions (e.g. bf16 vs f16) it is
// impossible to determine the floating-point type based on the size.
// However, for debugging purposes it can be useful to model such stores.
static cl::opt<bool> ClPropagateNonFTConstStoresAsFT(
    "nsan-propagate-non-ft-const-stores-as-ft",
    cl::desc(
        "Propagate non floating-point const stores as floating point values."
        "For debugging purposes only"),
    cl::Hidden);

constexpr StringLiteral kNsanModuleCtorName("nsan.module_ctor");
constexpr StringLiteral kNsanInitName("__nsan_init");

// The following values must be kept in sync with the runtime.
constexpr int kShadowScale = …;
constexpr int kMaxVectorWidth = …;
constexpr int kMaxNumArgs = …;
constexpr int kMaxShadowTypeSizeBytes = …; // fp128

namespace {

// Defines the characteristics (type id, type, and floating-point semantics)
// attached for all possible shadow types.
class ShadowTypeConfig { … };

template <char NsanTypeId>
class ShadowTypeConfigImpl : public ShadowTypeConfig { … };

// `double` (`d`) shadow type.
class F64ShadowConfig : public ShadowTypeConfigImpl<'d'> { … };

// `x86_fp80` (`l`) shadow type: X86 long double.
class F80ShadowConfig : public ShadowTypeConfigImpl<'l'> { … };

// `fp128` (`q`) shadow type.
class F128ShadowConfig : public ShadowTypeConfigImpl<'q'> { … };

// `ppc_fp128` (`e`) shadow type: IBM extended double with 106 bits of mantissa.
class PPC128ShadowConfig : public ShadowTypeConfigImpl<'e'> { … };

// Creates a ShadowTypeConfig given its type id.
std::unique_ptr<ShadowTypeConfig>
ShadowTypeConfig::fromNsanTypeId(const char TypeId) { … }

// An enum corresponding to shadow value types. Used as indices in arrays, so
// not an `enum class`.
enum FTValueType { … };

// If `FT` corresponds to a primitive FTValueType, return it.
static std::optional<FTValueType> ftValueTypeFromType(Type *FT) { … }

// Returns the LLVM type for an FTValueType.
static Type *typeFromFTValueType(FTValueType VT, LLVMContext &Context) { … }

// Returns the type name for an FTValueType.
static const char *typeNameFromFTValueType(FTValueType VT) { … }

// A specific mapping configuration of application type to shadow type for nsan
// (see -nsan-shadow-mapping flag).
class MappingConfig { … };

// The memory extents of a type specifies how many elements of a given
// FTValueType needs to be stored when storing this type.
struct MemoryExtents { … };

static MemoryExtents getMemoryExtentsOrDie(Type *FT) { … }

// The location of a check. Passed as parameters to runtime checking functions.
class CheckLoc { … };

// A map of LLVM IR values to shadow LLVM IR values.
class ValueToShadowMap { … };

class NsanMemOpFn { … };

NsanMemOpFn::NsanMemOpFn(Module &M, ArrayRef<StringRef> Sized,
                         StringRef Fallback, size_t NumArgs) { … }

FunctionCallee NsanMemOpFn::getFunctionFor(uint64_t MemOpSize) const { … }

FunctionCallee NsanMemOpFn::getFallback() const { … }

/// Instantiating NumericalStabilitySanitizer inserts the nsan runtime library
/// API function declarations into the module if they don't exist already.
/// Instantiating ensures the __nsan_init function is in the list of global
/// constructors for the module.
class NumericalStabilitySanitizer { … };
} // end anonymous namespace

PreservedAnalyses
NumericalStabilitySanitizerPass::run(Module &M, ModuleAnalysisManager &MAM) { … }

static GlobalValue *createThreadLocalGV(const char *Name, Module &M, Type *Ty) { … }

NumericalStabilitySanitizer::NumericalStabilitySanitizer(Module &M)
    : … { … }

// Returns true if the given LLVM Value points to constant data (typically, a
// global variable reference).
bool NumericalStabilitySanitizer::addrPointsToConstantData(Value *Addr) { … }

// This instruments the function entry to create shadow arguments.
// Pseudocode:
//   if (this_fn_ptr == __nsan_shadow_args_tag) {
//     s(arg0) = LOAD<sizeof(arg0)>(__nsan_shadow_args);
//     s(arg1) = LOAD<sizeof(arg1)>(__nsan_shadow_args + sizeof(arg0));
//     ...
//     __nsan_shadow_args_tag = 0;
//   } else {
//     s(arg0) = fext(arg0);
//     s(arg1) = fext(arg1);
//     ...
//   }
void NumericalStabilitySanitizer::createShadowArguments(
    Function &F, const TargetLibraryInfo &TLI, ValueToShadowMap &Map) { … }

// Returns true if the instrumentation should emit code to check arguments
// before a function call.
static bool shouldCheckArgs(CallBase &CI, const TargetLibraryInfo &TLI,
                            const std::optional<Regex> &CheckFunctionsFilter) { … }

// Populates the shadow call stack (which contains shadow values for every
// floating-point parameter to the function).
void NumericalStabilitySanitizer::populateShadowStack(
    CallBase &CI, const TargetLibraryInfo &TLI, const ValueToShadowMap &Map) { … }

// Internal part of emitCheck(). Returns a value that indicates whether
// computation should continue with the shadow or resume by re-fextending the
// value.
enum class ContinuationType { … };

Value *NumericalStabilitySanitizer::emitCheckInternal(Value *V, Value *ShadowV,
                                                      IRBuilder<> &Builder,
                                                      CheckLoc Loc) { … }

// Inserts a runtime check of V against its shadow value ShadowV.
// We check values whenever they escape: on return, call, stores, and
// insertvalue.
// Returns the shadow value that should be used to continue the computations,
// depending on the answer from the runtime.
// TODO: Should we check on select ? phi ?
Value *NumericalStabilitySanitizer::emitCheck(Value *V, Value *ShadowV,
                                              IRBuilder<> &Builder,
                                              CheckLoc Loc) { … }

// Inserts a check that fcmp on shadow values are consistent with that on base
// values.
void NumericalStabilitySanitizer::emitFCmpCheck(FCmpInst &FCmp,
                                                const ValueToShadowMap &Map) { … }

// Creates a shadow phi value for any phi that defines a value of FT type.
PHINode *NumericalStabilitySanitizer::maybeCreateShadowPhi(
    PHINode &Phi, const TargetLibraryInfo &TLI) { … }

Value *NumericalStabilitySanitizer::handleLoad(LoadInst &Load, Type *VT,
                                               Type *ExtendedVT) { … }

Value *NumericalStabilitySanitizer::handleTrunc(const FPTruncInst &Trunc,
                                                Type *VT, Type *ExtendedVT,
                                                const ValueToShadowMap &Map,
                                                IRBuilder<> &Builder) { … }

Value *NumericalStabilitySanitizer::handleExt(const FPExtInst &Ext, Type *VT,
                                              Type *ExtendedVT,
                                              const ValueToShadowMap &Map,
                                              IRBuilder<> &Builder) { … }

namespace {
// TODO: This should be tablegen-ed.
struct KnownIntrinsic { … };
} // namespace

static FunctionType *makeDoubleDouble(LLVMContext &C) { … }

static FunctionType *makeX86FP80X86FP80(LLVMContext &C) { … }

static FunctionType *makeDoubleDoubleI32(LLVMContext &C) { … }

static FunctionType *makeX86FP80X86FP80I32(LLVMContext &C) { … }

static FunctionType *makeDoubleDoubleDouble(LLVMContext &C) { … }

static FunctionType *makeX86FP80X86FP80X86FP80(LLVMContext &C) { … }

static FunctionType *makeDoubleDoubleDoubleDouble(LLVMContext &C) { … }

static FunctionType *makeX86FP80X86FP80X86FP80X86FP80(LLVMContext &C) { … }

const KnownIntrinsic::WidenedIntrinsic KnownIntrinsic::kWidenedIntrinsics[] = …;

const KnownIntrinsic::LFEntry KnownIntrinsic::kLibfuncIntrinsics[] = …;

const char *KnownIntrinsic::get(LibFunc LFunc) { … }

const KnownIntrinsic::WidenedIntrinsic *KnownIntrinsic::widen(StringRef Name) { … }

// Returns the name of the LLVM intrinsic corresponding to the given function.
static const char *getIntrinsicFromLibfunc(Function &Fn, Type *VT,
                                           const TargetLibraryInfo &TLI) { … }

// Try to handle a known function call.
Value *NumericalStabilitySanitizer::maybeHandleKnownCallBase(
    CallBase &Call, Type *VT, Type *ExtendedVT, const TargetLibraryInfo &TLI,
    const ValueToShadowMap &Map, IRBuilder<> &Builder) { … }

// Handle a CallBase, i.e. a function call, an inline asm sequence, or an
// invoke.
Value *NumericalStabilitySanitizer::handleCallBase(CallBase &Call, Type *VT,
                                                   Type *ExtendedVT,
                                                   const TargetLibraryInfo &TLI,
                                                   const ValueToShadowMap &Map,
                                                   IRBuilder<> &Builder) { … }

// Creates a shadow value for the given FT value. At that point all operands are
// guaranteed to be available.
Value *NumericalStabilitySanitizer::createShadowValueWithOperandsAvailable(
    Instruction &Inst, const TargetLibraryInfo &TLI,
    const ValueToShadowMap &Map) { … }

// Creates a shadow value for an instruction that defines a value of FT type.
// FT operands that do not already have shadow values are created recursively.
// The DFS is guaranteed to not loop as phis and arguments already have
// shadows.
void NumericalStabilitySanitizer::maybeCreateShadowValue(
    Instruction &Root, const TargetLibraryInfo &TLI, ValueToShadowMap &Map) { … }

// A floating-point store needs its value and type written to shadow memory.
void NumericalStabilitySanitizer::propagateFTStore(
    StoreInst &Store, Type *VT, Type *ExtendedVT, const ValueToShadowMap &Map) { … }

// A non-ft store needs to invalidate shadow memory. Exceptions are:
//   - memory transfers of floating-point data through other pointer types (llvm
//     optimization passes transform `*(float*)a = *(float*)b` into
//     `*(i32*)a = *(i32*)b` ). These have the same semantics as memcpy.
//   - Writes of FT-sized constants. LLVM likes to do float stores as bitcasted
//     ints. Note that this is not really necessary because if the value is
//     unknown the framework will re-extend it on load anyway. It just felt
//     easier to debug tests with vectors of FTs.
void NumericalStabilitySanitizer::propagateNonFTStore(
    StoreInst &Store, Type *VT, const ValueToShadowMap &Map) { … }

void NumericalStabilitySanitizer::propagateShadowValues(
    Instruction &Inst, const TargetLibraryInfo &TLI,
    const ValueToShadowMap &Map) { … }

// Moves fast math flags from the function to individual instructions, and
// removes the attribute from the function.
// TODO: Make this controllable with a flag.
static void moveFastMathFlags(Function &F,
                              std::vector<Instruction *> &Instructions) { … }

bool NumericalStabilitySanitizer::sanitizeFunction(
    Function &F, const TargetLibraryInfo &TLI) { … }

static uint64_t GetMemOpSize(Value *V) { … }

// Instrument the memory intrinsics so that they properly modify the shadow
// memory.
bool NumericalStabilitySanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { … }

void NumericalStabilitySanitizer::maybeAddSuffixForNsanInterface(CallBase *CI) { … }
llvm/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp