llvm/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp

//=== WebAssemblyLowerEmscriptenEHSjLj.cpp - Lower exceptions for Emscripten =//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file lowers exception-related instructions and setjmp/longjmp function
/// calls to use Emscripten's library functions. The pass uses JavaScript's try
/// and catch mechanism in case of Emscripten EH/SjLj and Wasm EH intrinsics in
/// case of Emscripten SjLJ.
///
/// * Emscripten exception handling
/// This pass lowers invokes and landingpads into library functions in JS glue
/// code. Invokes are lowered into function wrappers called invoke wrappers that
/// exist in JS side, which wraps the original function call with JS try-catch.
/// If an exception occurred, cxa_throw() function in JS side sets some
/// variables (see below) so we can check whether an exception occurred from
/// wasm code and handle it appropriately.
///
/// * Emscripten setjmp-longjmp handling
/// This pass lowers setjmp to a reasonably-performant approach for emscripten.
/// The idea is that each block with a setjmp is broken up into two parts: the
/// part containing setjmp and the part right after the setjmp. The latter part
/// is either reached from the setjmp, or later from a longjmp. To handle the
/// longjmp, all calls that might longjmp are also called using invoke wrappers
/// and thus JS / try-catch. JS longjmp() function also sets some variables so
/// we can check / whether a longjmp occurred from wasm code. Each block with a
/// function call that might longjmp is also split up after the longjmp call.
/// After the longjmp call, we check whether a longjmp occurred, and if it did,
/// which setjmp it corresponds to, and jump to the right post-setjmp block.
/// We assume setjmp-longjmp handling always run after EH handling, which means
/// we don't expect any exception-related instructions when SjLj runs.
/// FIXME Currently this scheme does not support indirect call of setjmp,
/// because of the limitation of the scheme itself. fastcomp does not support it
/// either.
///
/// In detail, this pass does following things:
///
/// 1) Assumes the existence of global variables: __THREW__, __threwValue
///    __THREW__ and __threwValue are defined in compiler-rt in Emscripten.
///    These variables are used for both exceptions and setjmp/longjmps.
///    __THREW__ indicates whether an exception or a longjmp occurred or not. 0
///    means nothing occurred, 1 means an exception occurred, and other numbers
///    mean a longjmp occurred. In the case of longjmp, __THREW__ variable
///    indicates the corresponding setjmp buffer the longjmp corresponds to.
///    __threwValue is 0 for exceptions, and the argument to longjmp in case of
///    longjmp.
///
/// * Emscripten exception handling
///
/// 2) We assume the existence of setThrew and setTempRet0/getTempRet0 functions
///    at link time. setThrew exists in Emscripten's compiler-rt:
///
///    void setThrew(uintptr_t threw, int value) {
///      if (__THREW__ == 0) {
///        __THREW__ = threw;
///        __threwValue = value;
///      }
///    }
//
///    setTempRet0 is called from __cxa_find_matching_catch() in JS glue code.
///    In exception handling, getTempRet0 indicates the type of an exception
///    caught, and in setjmp/longjmp, it means the second argument to longjmp
///    function.
///
/// 3) Lower
///      invoke @func(arg1, arg2) to label %invoke.cont unwind label %lpad
///    into
///      __THREW__ = 0;
///      call @__invoke_SIG(func, arg1, arg2)
///      %__THREW__.val = __THREW__;
///      __THREW__ = 0;
///      if (%__THREW__.val == 1)
///        goto %lpad
///      else
///         goto %invoke.cont
///    SIG is a mangled string generated based on the LLVM IR-level function
///    signature. After LLVM IR types are lowered to the target wasm types,
///    the names for these wrappers will change based on wasm types as well,
///    as in invoke_vi (function takes an int and returns void). The bodies of
///    these wrappers will be generated in JS glue code, and inside those
///    wrappers we use JS try-catch to generate actual exception effects. It
///    also calls the original callee function. An example wrapper in JS code
///    would look like this:
///      function invoke_vi(index,a1) {
///        try {
///          Module["dynCall_vi"](index,a1); // This calls original callee
///        } catch(e) {
///          if (typeof e !== 'number' && e !== 'longjmp') throw e;
///          _setThrew(1, 0); // setThrew is called here
///        }
///      }
///    If an exception is thrown, __THREW__ will be set to true in a wrapper,
///    so we can jump to the right BB based on this value.
///
/// 4) Lower
///      %val = landingpad catch c1 catch c2 catch c3 ...
///      ... use %val ...
///    into
///      %fmc = call @__cxa_find_matching_catch_N(c1, c2, c3, ...)
///      %val = {%fmc, getTempRet0()}
///      ... use %val ...
///    Here N is a number calculated based on the number of clauses.
///    setTempRet0 is called from __cxa_find_matching_catch() in JS glue code.
///
/// 5) Lower
///      resume {%a, %b}
///    into
///      call @__resumeException(%a)
///    where __resumeException() is a function in JS glue code.
///
/// 6) Lower
///      call @llvm.eh.typeid.for(type) (intrinsic)
///    into
///      call @llvm_eh_typeid_for(type)
///    llvm_eh_typeid_for function will be generated in JS glue code.
///
/// * Emscripten setjmp / longjmp handling
///
/// If there are calls to longjmp()
///
/// 1) Lower
///      longjmp(env, val)
///    into
///      emscripten_longjmp(env, val)
///
/// If there are calls to setjmp()
///
/// 2) In the function entry that calls setjmp, initialize
///    functionInvocationId as follows:
///
///    functionInvocationId = alloca(4)
///
///    Note: the alloca size is not important as this pointer is
///    merely used for pointer comparisions.
///
/// 3) Lower
///      setjmp(env)
///    into
///      __wasm_setjmp(env, label, functionInvocationId)
///
///    __wasm_setjmp records the necessary info (the label and
///    functionInvocationId) to the "env".
///    A BB with setjmp is split into two after setjmp call in order to
///    make the post-setjmp BB the possible destination of longjmp BB.
///
/// 4) Lower every call that might longjmp into
///      __THREW__ = 0;
///      call @__invoke_SIG(func, arg1, arg2)
///      %__THREW__.val = __THREW__;
///      __THREW__ = 0;
///      %__threwValue.val = __threwValue;
///      if (%__THREW__.val != 0 & %__threwValue.val != 0) {
///        %label = __wasm_setjmp_test(%__THREW__.val, functionInvocationId);
///        if (%label == 0)
///          emscripten_longjmp(%__THREW__.val, %__threwValue.val);
///        setTempRet0(%__threwValue.val);
///      } else {
///        %label = -1;
///      }
///      longjmp_result = getTempRet0();
///      switch %label {
///        label 1: goto post-setjmp BB 1
///        label 2: goto post-setjmp BB 2
///        ...
///        default: goto splitted next BB
///      }
///
///    __wasm_setjmp_test examines the jmp buf to see if it was for a matching
///    setjmp call. After calling an invoke wrapper, if a longjmp occurred,
///    __THREW__ will be the address of matching jmp_buf buffer and
///    __threwValue be the second argument to longjmp.
///    __wasm_setjmp_test returns a setjmp label, a unique ID to each setjmp
///    callsite. Label 0 means this longjmp buffer does not correspond to one
///    of the setjmp callsites in this function, so in this case we just chain
///    the longjmp to the caller. Label -1 means no longjmp occurred.
///    Otherwise we jump to the right post-setjmp BB based on the label.
///
/// * Wasm setjmp / longjmp handling
/// This mode still uses some Emscripten library functions but not JavaScript's
/// try-catch mechanism. It instead uses Wasm exception handling intrinsics,
/// which will be lowered to exception handling instructions.
///
/// If there are calls to longjmp()
///
/// 1) Lower
///      longjmp(env, val)
///    into
///      __wasm_longjmp(env, val)
///
/// If there are calls to setjmp()
///
/// 2) and 3): The same as 2) and 3) in Emscripten SjLj.
/// (functionInvocationId initialization + setjmp callsite transformation)
///
/// 4) Create a catchpad with a wasm.catch() intrinsic, which returns the value
/// thrown by __wasm_longjmp function. In the runtime library, we have an
/// equivalent of the following struct:
///
/// struct __WasmLongjmpArgs {
///   void *env;
///   int val;
/// };
///
/// The thrown value here is a pointer to the struct. We use this struct to
/// transfer two values by throwing a single value. Wasm throw and catch
/// instructions are capable of throwing and catching multiple values, but
/// it also requires multivalue support that is currently not very reliable.
/// TODO Switch to throwing and catching two values without using the struct
///
/// All longjmpable function calls will be converted to an invoke that will
/// unwind to this catchpad in case a longjmp occurs. Within the catchpad, we
/// test the thrown values using __wasm_setjmp_test function as we do for
/// Emscripten SjLj. The main difference is, in Emscripten SjLj, we need to
/// transform every longjmpable callsite into a sequence of code including
/// __wasm_setjmp_test() call; in Wasm SjLj we do the testing in only one
/// place, in this catchpad.
///
/// After testing calling __wasm_setjmp_test(), if the longjmp does not
/// correspond to one of the setjmps within the current function, it rethrows
/// the longjmp by calling __wasm_longjmp(). If it corresponds to one of
/// setjmps in the function, we jump to the beginning of the function, which
/// contains a switch to each post-setjmp BB. Again, in Emscripten SjLj, this
/// switch is added for every longjmpable callsite; in Wasm SjLj we do this
/// only once at the top of the function. (after functionInvocationId
/// initialization)
///
/// The below is the pseudocode for what we have described
///
/// entry:
///   Initialize functionInvocationId
///
/// setjmp.dispatch:
///    switch %label {
///      label 1: goto post-setjmp BB 1
///      label 2: goto post-setjmp BB 2
///      ...
///      default: goto splitted next BB
///    }
/// ...
///
/// bb:
///   invoke void @foo() ;; foo is a longjmpable function
///     to label %next unwind label %catch.dispatch.longjmp
/// ...
///
/// catch.dispatch.longjmp:
///   %0 = catchswitch within none [label %catch.longjmp] unwind to caller
///
/// catch.longjmp:
///   %longjmp.args = wasm.catch() ;; struct __WasmLongjmpArgs
///   %env = load 'env' field from __WasmLongjmpArgs
///   %val = load 'val' field from __WasmLongjmpArgs
///   %label = __wasm_setjmp_test(%env, functionInvocationId);
///   if (%label == 0)
///     __wasm_longjmp(%env, %val)
///   catchret to %setjmp.dispatch
///
///===----------------------------------------------------------------------===//

#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssembly.h"
#include "WebAssemblyTargetMachine.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/WasmEHFuncInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
#include <set>

usingnamespacellvm;

#define DEBUG_TYPE

static cl::list<std::string>
    EHAllowlist("emscripten-cxx-exceptions-allowed",
                cl::desc("The list of function names in which Emscripten-style "
                         "exception handling is enabled (see emscripten "
                         "EMSCRIPTEN_CATCHING_ALLOWED options)"),
                cl::CommaSeparated);

namespace {
class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass {};
} // End anonymous namespace

char WebAssemblyLowerEmscriptenEHSjLj::ID =;
INITIALIZE_PASS()

ModulePass *llvm::createWebAssemblyLowerEmscriptenEHSjLj() {}

static bool canThrow(const Value *V) {}

// Get a thread-local global variable with the given name. If it doesn't exist
// declare it, which will generate an import and assume that it will exist at
// link time.
static GlobalVariable *getGlobalVariable(Module &M, Type *Ty,
                                         WebAssemblyTargetMachine &TM,
                                         const char *Name) {}

// Simple function name mangler.
// This function simply takes LLVM's string representation of parameter types
// and concatenate them with '_'. There are non-alphanumeric characters but llc
// is ok with it, and we need to postprocess these names after the lowering
// phase anyway.
static std::string getSignature(FunctionType *FTy) {}

static Function *getEmscriptenFunction(FunctionType *Ty, const Twine &Name,
                                       Module *M) {}

// Returns an integer type for the target architecture's address space.
// i32 for wasm32 and i64 for wasm64.
static Type *getAddrIntType(Module *M) {}

// Returns an integer pointer type for the target architecture's address space.
// i32* for wasm32 and i64* for wasm64. With opaque pointers this is just a ptr
// in address space zero.
static Type *getAddrPtrType(Module *M) {}

// Returns an integer whose type is the integer type for the target's address
// space. Returns (i32 C) for wasm32 and (i64 C) for wasm64, when C is the
// integer.
static Value *getAddrSizeInt(Module *M, uint64_t C) {}

// Returns __cxa_find_matching_catch_N function, where N = NumClauses + 2.
// This is because a landingpad instruction contains two more arguments, a
// personality function and a cleanup bit, and __cxa_find_matching_catch_N
// functions are named after the number of arguments in the original landingpad
// instruction.
Function *
WebAssemblyLowerEmscriptenEHSjLj::getFindMatchingCatch(Module &M,
                                                       unsigned NumClauses) {}

// Generate invoke wrapper seqence with preamble and postamble
// Preamble:
// __THREW__ = 0;
// Postamble:
// %__THREW__.val = __THREW__; __THREW__ = 0;
// Returns %__THREW__.val, which indicates whether an exception is thrown (or
// whether longjmp occurred), for future use.
Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallBase *CI) {}

// Get matching invoke wrapper based on callee signature
Function *WebAssemblyLowerEmscriptenEHSjLj::getInvokeWrapper(CallBase *CI) {}

static bool canLongjmp(const Value *Callee) {}

static bool isEmAsmCall(const Value *Callee) {}

// Generate __wasm_setjmp_test function call seqence with preamble and
// postamble. The code this generates is equivalent to the following
// JavaScript code:
// %__threwValue.val = __threwValue;
// if (%__THREW__.val != 0 & %__threwValue.val != 0) {
//   %label = __wasm_setjmp_test(%__THREW__.val, functionInvocationId);
//   if (%label == 0)
//     emscripten_longjmp(%__THREW__.val, %__threwValue.val);
//   setTempRet0(%__threwValue.val);
// } else {
//   %label = -1;
// }
// %longjmp_result = getTempRet0();
//
// As output parameters. returns %label, %longjmp_result, and the BB the last
// instruction (%longjmp_result = ...) is in.
void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp(
    BasicBlock *BB, DebugLoc DL, Value *Threw, Value *FunctionInvocationId,
    Value *&Label, Value *&LongjmpResult, BasicBlock *&CallEmLongjmpBB,
    PHINode *&CallEmLongjmpBBThrewPHI, PHINode *&CallEmLongjmpBBThrewValuePHI,
    BasicBlock *&EndBB) {}

void WebAssemblyLowerEmscriptenEHSjLj::rebuildSSA(Function &F) {}

// Replace uses of longjmp with a new longjmp function in Emscripten library.
// In Emscripten SjLj, the new function is
//   void emscripten_longjmp(uintptr_t, i32)
// In Wasm SjLj, the new function is
//   void __wasm_longjmp(i8*, i32)
// Because the original libc longjmp function takes (jmp_buf*, i32), we need a
// ptrtoint/bitcast instruction here to make the type match. jmp_buf* will
// eventually be lowered to i32/i64 in the wasm backend.
void WebAssemblyLowerEmscriptenEHSjLj::replaceLongjmpWith(Function *LongjmpF,
                                                          Function *NewF) {}

static bool containsLongjmpableCalls(const Function *F) {}

// When a function contains a setjmp call but not other calls that can longjmp,
// we don't do setjmp transformation for that setjmp. But we need to convert the
// setjmp calls into "i32 0" so they don't cause link time errors. setjmp always
// returns 0 when called directly.
static void nullifySetjmp(Function *F) {}

bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {}

bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {}

// This tries to get debug info from the instruction before which a new
// instruction will be inserted, and if there's no debug info in that
// instruction, tries to get the info instead from the previous instruction (if
// any). If none of these has debug info and a DISubprogram is provided, it
// creates a dummy debug info with the first line of the function, because IR
// verifier requires all inlinable callsites should have debug info when both a
// caller and callee have DISubprogram. If none of these conditions are met,
// returns empty info.
static DebugLoc getOrCreateDebugLoc(const Instruction *InsertBefore,
                                    DISubprogram *SP) {}

bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {}

// Update each call that can longjmp so it can return to the corresponding
// setjmp. Refer to 4) of "Emscripten setjmp/longjmp handling" section in the
// comments at top of the file for details.
void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForEmscriptenSjLj(
    Function &F, Instruction *FunctionInvocationId,
    SmallVectorImpl<PHINode *> &SetjmpRetPHIs) {}

static BasicBlock *getCleanupRetUnwindDest(const CleanupPadInst *CPI) {}

// Create a catchpad in which we catch a longjmp's env and val arguments, test
// if the longjmp corresponds to one of setjmps in the current function, and if
// so, jump to the setjmp dispatch BB from which we go to one of post-setjmp
// BBs. Refer to 4) of "Wasm setjmp/longjmp handling" section in the comments at
// top of the file for details.
void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForWasmSjLj(
    Function &F, Instruction *FunctionInvocationId,
    SmallVectorImpl<PHINode *> &SetjmpRetPHIs) {}