PoisonChecking.cpp | Explore in Territory

//===- PoisonChecking.cpp - -----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Implements a transform pass which instruments IR such that poison semantics
// are made explicit.  That is, it provides a (possibly partial) executable
// semantics for every instruction w.r.t. poison as specified in the LLVM
// LangRef.  There are obvious parallels to the sanitizer tools, but this pass
// is focused purely on the semantics of LLVM IR, not any particular source
// language.   If you're looking for something to see if your C/C++ contains
// UB, this is not it.
//
// The rewritten semantics of each instruction will include the following
// components:
//
// 1) The original instruction, unmodified.
// 2) A propagation rule which translates dynamic information about the poison
//    state of each input to whether the dynamic output of the instruction
//    produces poison.
// 3) A creation rule which validates any poison producing flags on the
//    instruction itself (e.g. checks for overflow on nsw).
// 4) A check rule which traps (to a handler function) if this instruction must
//    execute undefined behavior given the poison state of it's inputs.
//
// This is a must analysis based transform; that is, the resulting code may
// produce a false negative result (not report UB when actually exists
// according to the LangRef spec), but should never produce a false positive
// (report UB where it doesn't exist).
//
// Use cases for this pass include:
// - Understanding (and testing!) the implications of the definition of poison
//   from the LangRef.
// - Validating the output of a IR fuzzer to ensure that all programs produced
//   are well defined on the specific input used.
// - Finding/confirming poison specific miscompiles by checking the poison
//   status of an input/IR pair is the same before and after an optimization
//   transform.
// - Checking that a bugpoint reduction does not introduce UB which didn't
//   exist in the original program being reduced.
//
// The major sources of inaccuracy are currently:
// - Most validation rules not yet implemented for instructions with poison
//   relavant flags.  At the moment, only nsw/nuw on add/sub are supported.
// - UB which is control dependent on a branch on poison is not yet
//   reported. Currently, only data flow dependence is modeled.
// - Poison which is propagated through memory is not modeled.  As such,
//   storing poison to memory and then reloading it will cause a false negative
//   as we consider the reloaded value to not be poisoned.
// - Poison propagation across function boundaries is not modeled.  At the
//   moment, all arguments and return values are assumed not to be poison.
// - Undef is not modeled.  In particular, the optimizer's freedom to pick
//   concrete values for undef bits so as to maximize potential for producing
//   poison is not modeled.
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Instrumentation/PoisonChecking.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"

usingnamespacellvm;

#define DEBUG_TYPE …

static cl::opt<bool>
LocalCheck("poison-checking-function-local",
           cl::init(false),
           cl::desc("Check that returns are non-poison (for testing)"));


static bool isConstantFalse(Value* V) { … }

static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) { … }

static void generateCreationChecksForBinOp(Instruction &I,
                                           SmallVectorImpl<Value*> &Checks) { … }

/// Given an instruction which can produce poison on non-poison inputs
/// (i.e. canCreatePoison returns true), generate runtime checks to produce
/// boolean indicators of when poison would result.
static void generateCreationChecks(Instruction &I,
                                   SmallVectorImpl<Value*> &Checks) { … }

static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) { … }

static void CreateAssert(IRBuilder<> &B, Value *Cond) { … }

static void CreateAssertNot(IRBuilder<> &B, Value *Cond) { … }

static bool rewrite(Function &F) { … }


PreservedAnalyses PoisonCheckingPass::run(Module &M,
                                          ModuleAnalysisManager &AM) { … }

PreservedAnalyses PoisonCheckingPass::run(Function &F,
                                          FunctionAnalysisManager &AM) { … }

/* Major TODO Items:
   - Control dependent poison UB
   - Strict mode - (i.e. must analyze every operand)
     - Poison through memory
     - Function ABIs
     - Full coverage of intrinsics, etc.. (ouch)

   Instructions w/Unclear Semantics:
   - shufflevector - It would seem reasonable for an out of bounds mask element
     to produce poison, but the LangRef does not state.
   - all binary ops w/vector operands - The likely interpretation would be that
     any element overflowing should produce poison for the entire result, but
     the LangRef does not state.
   - Floating point binary ops w/fmf flags other than (nnan, noinfs).  It seems
     strange that only certian flags should be documented as producing poison.

   Cases of clear poison semantics not yet implemented:
   - Exact flags on ashr/lshr produce poison
   - NSW/NUW flags on shl produce poison
   - Inbounds flag on getelementptr produce poison
   - fptosi/fptoui (out of bounds input) produce poison
   - Scalable vector types for insertelement/extractelement
   - Floating point binary ops w/fmf nnan/noinfs flags produce poison
 */
llvm/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp