llvm/llvm/lib/Target/BPF/BPFPreserveStaticOffset.cpp

//===------ BPFPreserveStaticOffset.cpp -----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// TLDR: replaces llvm.preserve.static.offset + GEP + load / store
//           with llvm.bpf.getelementptr.and.load / store
//
// This file implements BPFPreserveStaticOffsetPass transformation.
// This transformation address two BPF verifier specific issues:
//
// (a) Access to the fields of some structural types is allowed only
//     using load and store instructions with static immediate offsets.
//
//     Examples of such types are `struct __sk_buff` and `struct
//     bpf_sock_ops`.  This is so because offsets of the fields of
//     these structures do not match real offsets in the running
//     kernel. During BPF program load LDX and STX instructions
//     referring to the fields of these types are rewritten so that
//     offsets match real offsets. For this rewrite to happen field
//     offsets have to be encoded as immediate operands of the
//     instructions.
//
//     See kernel/bpf/verifier.c:convert_ctx_access function in the
//     Linux kernel source tree for details.
//
// (b) Pointers to context parameters of BPF programs must not be
//     modified before access.
//
//     During BPF program verification a tag PTR_TO_CTX is tracked for
//     register values. In case if register with such tag is modified
//     BPF program is not allowed to read or write memory using this
//     register. See kernel/bpf/verifier.c:check_mem_access function
//     in the Linux kernel source tree for details.
//
// The following sequence of the IR instructions:
//
//   %x = getelementptr %ptr, %constant_offset
//   %y = load %x
//
// Is translated as a single machine instruction:
//
//   LDW %ptr, %constant_offset
//
// In order for cases (a) and (b) to work the sequence %x-%y above has
// to be preserved by the IR passes.
//
// However, several optimization passes might sink `load` instruction
// or hoist `getelementptr` instruction so that the instructions are
// no longer in sequence. Examples of such passes are:
// SimplifyCFGPass, InstCombinePass, GVNPass.
// After such modification the verifier would reject the BPF program.
//
// To avoid this issue the patterns like (load/store (getelementptr ...))
// are replaced by calls to BPF specific intrinsic functions:
// - llvm.bpf.getelementptr.and.load
// - llvm.bpf.getelementptr.and.store
//
// These calls are lowered back to (load/store (getelementptr ...))
// by BPFCheckAndAdjustIR pass right before the translation from IR to
// machine instructions.
//
// The transformation is split into the following steps:
// - When IR is generated from AST the calls to intrinsic function
//   llvm.preserve.static.offset are inserted.
// - BPFPreserveStaticOffsetPass is executed as early as possible
//   with AllowPatial set to true, this handles marked GEP chains
//   with constant offsets.
// - BPFPreserveStaticOffsetPass is executed at ScalarOptimizerLateEPCallback
//   with AllowPatial set to false, this handles marked GEP chains
//   with offsets that became constant after loop unrolling, e.g.
//   to handle the following code:
//
// struct context { int x[4]; } __attribute__((preserve_static_offset));
//
//   struct context *ctx = ...;
// #pragma clang loop unroll(full)
//   for (int i = 0; i < 4; ++i)
//     foo(ctx->x[i]);
//
// The early BPFPreserveStaticOffsetPass run is necessary to allow
// additional GVN / CSE opportunities after functions inlining.
// The relative order of optimization applied to function:
// - early stage (1)
// - ...
// - function inlining (2)
// - ...
// - loop unrolling
// - ...
// - ScalarOptimizerLateEPCallback (3)
//
// When function A is inlined into function B all optimizations for A
// are already done, while some passes remain for B. In case if
// BPFPreserveStaticOffsetPass is done at (3) but not done at (1)
// the code after (2) would contain a mix of
// (load (gep %p)) and (get.and.load %p) usages:
// - the (load (gep %p)) would come from the calling function;
// - the (get.and.load %p) would come from the callee function.
// Thus clobbering CSE / GVN passes done after inlining.

#include "BPF.h"
#include "BPFCORE.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsBPF.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"

#define DEBUG_TYPE

usingnamespacellvm;

static const unsigned GepAndLoadFirstIdxArg =;
static const unsigned GepAndStoreFirstIdxArg =;

static bool isIntrinsicCall(Value *I, Intrinsic::ID Id) {}

static bool isPreserveStaticOffsetCall(Value *I) {}

static CallInst *isGEPAndLoad(Value *I) {}

static CallInst *isGEPAndStore(Value *I) {}

template <class T = Instruction>
static DILocation *mergeDILocations(SmallVector<T *> &Insns) {}

static CallInst *makeIntrinsicCall(Module *M,
                                   Intrinsic::BPFIntrinsics Intrinsic,
                                   ArrayRef<Type *> Types,
                                   ArrayRef<Value *> Args) {}

static void setParamElementType(CallInst *Call, unsigned ArgNo, Type *Type) {}

static void setParamReadNone(CallInst *Call, unsigned ArgNo) {}

static void setParamReadOnly(CallInst *Call, unsigned ArgNo) {}

static void setParamWriteOnly(CallInst *Call, unsigned ArgNo) {}

namespace {
struct GEPChainInfo {};
} // Anonymous namespace

template <class T = std::disjunction<LoadInst, StoreInst>>
static void fillCommonArgs(LLVMContext &C, SmallVector<Value *> &Args,
                           GEPChainInfo &GEP, T *Insn) {}

static Instruction *makeGEPAndLoad(Module *M, GEPChainInfo &GEP,
                                   LoadInst *Load) {}

static Instruction *makeGEPAndStore(Module *M, GEPChainInfo &GEP,
                                    StoreInst *Store) {}

static unsigned getOperandAsUnsigned(CallInst *Call, unsigned ArgNo) {}

static GetElementPtrInst *reconstructGEP(CallInst *Call, int Delta) {}

template <class T = std::disjunction<LoadInst, StoreInst>>
static void reconstructCommon(CallInst *Call, GetElementPtrInst *GEP, T *Insn,
                              int Delta) {}

std::pair<GetElementPtrInst *, LoadInst *>
BPFPreserveStaticOffsetPass::reconstructLoad(CallInst *Call) {}

std::pair<GetElementPtrInst *, StoreInst *>
BPFPreserveStaticOffsetPass::reconstructStore(CallInst *Call) {}

static bool isZero(Value *V) {}

// Given a chain of GEP instructions collect information necessary to
// merge this chain as a single GEP instruction of form:
//   getelementptr %<type>, ptr %p, i32 0, <field_idx1>, <field_idx2>, ...
static bool foldGEPChainAsStructAccess(SmallVector<GetElementPtrInst *> &GEPs,
                                       GEPChainInfo &Info) {}

// Given a chain of GEP instructions collect information necessary to
// merge this chain as a single GEP instruction of form:
//   getelementptr i8, ptr %p, i64 %offset
static bool foldGEPChainAsU8Access(SmallVector<GetElementPtrInst *> &GEPs,
                                   GEPChainInfo &Info) {}

static void reportNonStaticGEPChain(Instruction *Insn) {}

static bool allZeroIndices(SmallVector<GetElementPtrInst *> &GEPs) {}

static bool tryToReplaceWithGEPBuiltin(Instruction *LoadOrStoreTemplate,
                                       SmallVector<GetElementPtrInst *> &GEPs,
                                       Instruction *InsnToReplace) {}

// Check if U->getPointerOperand() == I
static bool isPointerOperand(Value *I, User *U) {}

static bool isInlineableCall(User *U) {}

static void rewriteAccessChain(Instruction *Insn,
                               SmallVector<GetElementPtrInst *> &GEPs,
                               SmallVector<Instruction *> &Visited,
                               bool AllowPatial, bool &StillUsed);

static void rewriteUses(Instruction *Insn,
                        SmallVector<GetElementPtrInst *> &GEPs,
                        SmallVector<Instruction *> &Visited, bool AllowPatial,
                        bool &StillUsed) {}

// A DFS traversal of GEP chain trees starting from Root.
//
// Recursion descends through GEP instructions and
// llvm.preserve.static.offset calls. Recursion stops at any other
// instruction. If load or store instruction is reached it is replaced
// by a call to `llvm.bpf.getelementptr.and.load` or
// `llvm.bpf.getelementptr.and.store` intrinsic.
// If `llvm.bpf.getelementptr.and.load/store` is reached the accumulated
// GEPs are merged into the intrinsic call.
// If nested calls to `llvm.preserve.static.offset` are encountered these
// calls are marked for deletion.
//
// Parameters description:
// - Insn - current position in the tree
// - GEPs - GEP instructions for the current branch
// - Visited - a list of visited instructions in DFS order,
//   order is important for unused instruction deletion.
// - AllowPartial - when true GEP chains that can't be folded are
//   not reported, otherwise diagnostic message is show for such chains.
// - StillUsed - set to true if one of the GEP chains could not be
//   folded, makes sense when AllowPartial is false, means that root
//   preserve.static.offset call is still in use and should remain
//   until the next run of this pass.
static void rewriteAccessChain(Instruction *Insn,
                               SmallVector<GetElementPtrInst *> &GEPs,
                               SmallVector<Instruction *> &Visited,
                               bool AllowPatial, bool &StillUsed) {}

static void removeMarkerCall(Instruction *Marker) {}

static bool rewriteAccessChain(Instruction *Marker, bool AllowPatial,
                               SmallPtrSetImpl<Instruction *> &RemovedMarkers) {}

static std::vector<Instruction *>
collectPreserveStaticOffsetCalls(Function &F) {}

bool isPreserveArrayIndex(Value *V) {}

bool isPreserveStructIndex(Value *V) {}

bool isPreserveUnionIndex(Value *V) {}

static void removePAICalls(Instruction *Marker) {}

// Look for sequences:
// - llvm.preserve.static.offset -> getelementptr... -> load
// - llvm.preserve.static.offset -> getelementptr... -> store
// And replace those with calls to intrinsics:
// - llvm.bpf.getelementptr.and.load
// - llvm.bpf.getelementptr.and.store
static bool rewriteFunction(Function &F, bool AllowPartial) {}

PreservedAnalyses
llvm::BPFPreserveStaticOffsetPass::run(Function &F,
                                       FunctionAnalysisManager &AM) {}