//=== WebAssemblyLowerEmscriptenEHSjLj.cpp - Lower exceptions for Emscripten =// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// This file lowers exception-related instructions and setjmp/longjmp function /// calls to use Emscripten's library functions. The pass uses JavaScript's try /// and catch mechanism in case of Emscripten EH/SjLj and Wasm EH intrinsics in /// case of Emscripten SjLJ. /// /// * Emscripten exception handling /// This pass lowers invokes and landingpads into library functions in JS glue /// code. Invokes are lowered into function wrappers called invoke wrappers that /// exist in JS side, which wraps the original function call with JS try-catch. /// If an exception occurred, cxa_throw() function in JS side sets some /// variables (see below) so we can check whether an exception occurred from /// wasm code and handle it appropriately. /// /// * Emscripten setjmp-longjmp handling /// This pass lowers setjmp to a reasonably-performant approach for emscripten. /// The idea is that each block with a setjmp is broken up into two parts: the /// part containing setjmp and the part right after the setjmp. The latter part /// is either reached from the setjmp, or later from a longjmp. To handle the /// longjmp, all calls that might longjmp are also called using invoke wrappers /// and thus JS / try-catch. JS longjmp() function also sets some variables so /// we can check / whether a longjmp occurred from wasm code. Each block with a /// function call that might longjmp is also split up after the longjmp call. /// After the longjmp call, we check whether a longjmp occurred, and if it did, /// which setjmp it corresponds to, and jump to the right post-setjmp block. /// We assume setjmp-longjmp handling always run after EH handling, which means /// we don't expect any exception-related instructions when SjLj runs. /// FIXME Currently this scheme does not support indirect call of setjmp, /// because of the limitation of the scheme itself. fastcomp does not support it /// either. /// /// In detail, this pass does following things: /// /// 1) Assumes the existence of global variables: __THREW__, __threwValue /// __THREW__ and __threwValue are defined in compiler-rt in Emscripten. /// These variables are used for both exceptions and setjmp/longjmps. /// __THREW__ indicates whether an exception or a longjmp occurred or not. 0 /// means nothing occurred, 1 means an exception occurred, and other numbers /// mean a longjmp occurred. In the case of longjmp, __THREW__ variable /// indicates the corresponding setjmp buffer the longjmp corresponds to. /// __threwValue is 0 for exceptions, and the argument to longjmp in case of /// longjmp. /// /// * Emscripten exception handling /// /// 2) We assume the existence of setThrew and setTempRet0/getTempRet0 functions /// at link time. setThrew exists in Emscripten's compiler-rt: /// /// void setThrew(uintptr_t threw, int value) { /// if (__THREW__ == 0) { /// __THREW__ = threw; /// __threwValue = value; /// } /// } // /// setTempRet0 is called from __cxa_find_matching_catch() in JS glue code. /// In exception handling, getTempRet0 indicates the type of an exception /// caught, and in setjmp/longjmp, it means the second argument to longjmp /// function. /// /// 3) Lower /// invoke @func(arg1, arg2) to label %invoke.cont unwind label %lpad /// into /// __THREW__ = 0; /// call @__invoke_SIG(func, arg1, arg2) /// %__THREW__.val = __THREW__; /// __THREW__ = 0; /// if (%__THREW__.val == 1) /// goto %lpad /// else /// goto %invoke.cont /// SIG is a mangled string generated based on the LLVM IR-level function /// signature. After LLVM IR types are lowered to the target wasm types, /// the names for these wrappers will change based on wasm types as well, /// as in invoke_vi (function takes an int and returns void). The bodies of /// these wrappers will be generated in JS glue code, and inside those /// wrappers we use JS try-catch to generate actual exception effects. It /// also calls the original callee function. An example wrapper in JS code /// would look like this: /// function invoke_vi(index,a1) { /// try { /// Module["dynCall_vi"](index,a1); // This calls original callee /// } catch(e) { /// if (typeof e !== 'number' && e !== 'longjmp') throw e; /// _setThrew(1, 0); // setThrew is called here /// } /// } /// If an exception is thrown, __THREW__ will be set to true in a wrapper, /// so we can jump to the right BB based on this value. /// /// 4) Lower /// %val = landingpad catch c1 catch c2 catch c3 ... /// ... use %val ... /// into /// %fmc = call @__cxa_find_matching_catch_N(c1, c2, c3, ...) /// %val = {%fmc, getTempRet0()} /// ... use %val ... /// Here N is a number calculated based on the number of clauses. /// setTempRet0 is called from __cxa_find_matching_catch() in JS glue code. /// /// 5) Lower /// resume {%a, %b} /// into /// call @__resumeException(%a) /// where __resumeException() is a function in JS glue code. /// /// 6) Lower /// call @llvm.eh.typeid.for(type) (intrinsic) /// into /// call @llvm_eh_typeid_for(type) /// llvm_eh_typeid_for function will be generated in JS glue code. /// /// * Emscripten setjmp / longjmp handling /// /// If there are calls to longjmp() /// /// 1) Lower /// longjmp(env, val) /// into /// emscripten_longjmp(env, val) /// /// If there are calls to setjmp() /// /// 2) In the function entry that calls setjmp, initialize /// functionInvocationId as follows: /// /// functionInvocationId = alloca(4) /// /// Note: the alloca size is not important as this pointer is /// merely used for pointer comparisions. /// /// 3) Lower /// setjmp(env) /// into /// __wasm_setjmp(env, label, functionInvocationId) /// /// __wasm_setjmp records the necessary info (the label and /// functionInvocationId) to the "env". /// A BB with setjmp is split into two after setjmp call in order to /// make the post-setjmp BB the possible destination of longjmp BB. /// /// 4) Lower every call that might longjmp into /// __THREW__ = 0; /// call @__invoke_SIG(func, arg1, arg2) /// %__THREW__.val = __THREW__; /// __THREW__ = 0; /// %__threwValue.val = __threwValue; /// if (%__THREW__.val != 0 & %__threwValue.val != 0) { /// %label = __wasm_setjmp_test(%__THREW__.val, functionInvocationId); /// if (%label == 0) /// emscripten_longjmp(%__THREW__.val, %__threwValue.val); /// setTempRet0(%__threwValue.val); /// } else { /// %label = -1; /// } /// longjmp_result = getTempRet0(); /// switch %label { /// label 1: goto post-setjmp BB 1 /// label 2: goto post-setjmp BB 2 /// ... /// default: goto splitted next BB /// } /// /// __wasm_setjmp_test examines the jmp buf to see if it was for a matching /// setjmp call. After calling an invoke wrapper, if a longjmp occurred, /// __THREW__ will be the address of matching jmp_buf buffer and /// __threwValue be the second argument to longjmp. /// __wasm_setjmp_test returns a setjmp label, a unique ID to each setjmp /// callsite. Label 0 means this longjmp buffer does not correspond to one /// of the setjmp callsites in this function, so in this case we just chain /// the longjmp to the caller. Label -1 means no longjmp occurred. /// Otherwise we jump to the right post-setjmp BB based on the label. /// /// * Wasm setjmp / longjmp handling /// This mode still uses some Emscripten library functions but not JavaScript's /// try-catch mechanism. It instead uses Wasm exception handling intrinsics, /// which will be lowered to exception handling instructions. /// /// If there are calls to longjmp() /// /// 1) Lower /// longjmp(env, val) /// into /// __wasm_longjmp(env, val) /// /// If there are calls to setjmp() /// /// 2) and 3): The same as 2) and 3) in Emscripten SjLj. /// (functionInvocationId initialization + setjmp callsite transformation) /// /// 4) Create a catchpad with a wasm.catch() intrinsic, which returns the value /// thrown by __wasm_longjmp function. In the runtime library, we have an /// equivalent of the following struct: /// /// struct __WasmLongjmpArgs { /// void *env; /// int val; /// }; /// /// The thrown value here is a pointer to the struct. We use this struct to /// transfer two values by throwing a single value. Wasm throw and catch /// instructions are capable of throwing and catching multiple values, but /// it also requires multivalue support that is currently not very reliable. /// TODO Switch to throwing and catching two values without using the struct /// /// All longjmpable function calls will be converted to an invoke that will /// unwind to this catchpad in case a longjmp occurs. Within the catchpad, we /// test the thrown values using __wasm_setjmp_test function as we do for /// Emscripten SjLj. The main difference is, in Emscripten SjLj, we need to /// transform every longjmpable callsite into a sequence of code including /// __wasm_setjmp_test() call; in Wasm SjLj we do the testing in only one /// place, in this catchpad. /// /// After testing calling __wasm_setjmp_test(), if the longjmp does not /// correspond to one of the setjmps within the current function, it rethrows /// the longjmp by calling __wasm_longjmp(). If it corresponds to one of /// setjmps in the function, we jump to the beginning of the function, which /// contains a switch to each post-setjmp BB. Again, in Emscripten SjLj, this /// switch is added for every longjmpable callsite; in Wasm SjLj we do this /// only once at the top of the function. (after functionInvocationId /// initialization) /// /// The below is the pseudocode for what we have described /// /// entry: /// Initialize functionInvocationId /// /// setjmp.dispatch: /// switch %label { /// label 1: goto post-setjmp BB 1 /// label 2: goto post-setjmp BB 2 /// ... /// default: goto splitted next BB /// } /// ... /// /// bb: /// invoke void @foo() ;; foo is a longjmpable function /// to label %next unwind label %catch.dispatch.longjmp /// ... /// /// catch.dispatch.longjmp: /// %0 = catchswitch within none [label %catch.longjmp] unwind to caller /// /// catch.longjmp: /// %longjmp.args = wasm.catch() ;; struct __WasmLongjmpArgs /// %env = load 'env' field from __WasmLongjmpArgs /// %val = load 'val' field from __WasmLongjmpArgs /// %label = __wasm_setjmp_test(%env, functionInvocationId); /// if (%label == 0) /// __wasm_longjmp(%env, %val) /// catchret to %setjmp.dispatch /// ///===----------------------------------------------------------------------===// #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssembly.h" #include "WebAssemblyTargetMachine.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/WasmEHFuncInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/SSAUpdaterBulk.h" #include <set> usingnamespacellvm; #define DEBUG_TYPE … static cl::list<std::string> EHAllowlist("emscripten-cxx-exceptions-allowed", cl::desc("The list of function names in which Emscripten-style " "exception handling is enabled (see emscripten " "EMSCRIPTEN_CATCHING_ALLOWED options)"), cl::CommaSeparated); namespace { class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass { … }; } // End anonymous namespace char WebAssemblyLowerEmscriptenEHSjLj::ID = …; INITIALIZE_PASS(…) ModulePass *llvm::createWebAssemblyLowerEmscriptenEHSjLj() { … } static bool canThrow(const Value *V) { … } // Get a thread-local global variable with the given name. If it doesn't exist // declare it, which will generate an import and assume that it will exist at // link time. static GlobalVariable *getGlobalVariable(Module &M, Type *Ty, WebAssemblyTargetMachine &TM, const char *Name) { … } // Simple function name mangler. // This function simply takes LLVM's string representation of parameter types // and concatenate them with '_'. There are non-alphanumeric characters but llc // is ok with it, and we need to postprocess these names after the lowering // phase anyway. static std::string getSignature(FunctionType *FTy) { … } static Function *getEmscriptenFunction(FunctionType *Ty, const Twine &Name, Module *M) { … } // Returns an integer type for the target architecture's address space. // i32 for wasm32 and i64 for wasm64. static Type *getAddrIntType(Module *M) { … } // Returns an integer pointer type for the target architecture's address space. // i32* for wasm32 and i64* for wasm64. With opaque pointers this is just a ptr // in address space zero. static Type *getAddrPtrType(Module *M) { … } // Returns an integer whose type is the integer type for the target's address // space. Returns (i32 C) for wasm32 and (i64 C) for wasm64, when C is the // integer. static Value *getAddrSizeInt(Module *M, uint64_t C) { … } // Returns __cxa_find_matching_catch_N function, where N = NumClauses + 2. // This is because a landingpad instruction contains two more arguments, a // personality function and a cleanup bit, and __cxa_find_matching_catch_N // functions are named after the number of arguments in the original landingpad // instruction. Function * WebAssemblyLowerEmscriptenEHSjLj::getFindMatchingCatch(Module &M, unsigned NumClauses) { … } // Generate invoke wrapper seqence with preamble and postamble // Preamble: // __THREW__ = 0; // Postamble: // %__THREW__.val = __THREW__; __THREW__ = 0; // Returns %__THREW__.val, which indicates whether an exception is thrown (or // whether longjmp occurred), for future use. Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallBase *CI) { … } // Get matching invoke wrapper based on callee signature Function *WebAssemblyLowerEmscriptenEHSjLj::getInvokeWrapper(CallBase *CI) { … } static bool canLongjmp(const Value *Callee) { … } static bool isEmAsmCall(const Value *Callee) { … } // Generate __wasm_setjmp_test function call seqence with preamble and // postamble. The code this generates is equivalent to the following // JavaScript code: // %__threwValue.val = __threwValue; // if (%__THREW__.val != 0 & %__threwValue.val != 0) { // %label = __wasm_setjmp_test(%__THREW__.val, functionInvocationId); // if (%label == 0) // emscripten_longjmp(%__THREW__.val, %__threwValue.val); // setTempRet0(%__threwValue.val); // } else { // %label = -1; // } // %longjmp_result = getTempRet0(); // // As output parameters. returns %label, %longjmp_result, and the BB the last // instruction (%longjmp_result = ...) is in. void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp( BasicBlock *BB, DebugLoc DL, Value *Threw, Value *FunctionInvocationId, Value *&Label, Value *&LongjmpResult, BasicBlock *&CallEmLongjmpBB, PHINode *&CallEmLongjmpBBThrewPHI, PHINode *&CallEmLongjmpBBThrewValuePHI, BasicBlock *&EndBB) { … } void WebAssemblyLowerEmscriptenEHSjLj::rebuildSSA(Function &F) { … } // Replace uses of longjmp with a new longjmp function in Emscripten library. // In Emscripten SjLj, the new function is // void emscripten_longjmp(uintptr_t, i32) // In Wasm SjLj, the new function is // void __wasm_longjmp(i8*, i32) // Because the original libc longjmp function takes (jmp_buf*, i32), we need a // ptrtoint/bitcast instruction here to make the type match. jmp_buf* will // eventually be lowered to i32/i64 in the wasm backend. void WebAssemblyLowerEmscriptenEHSjLj::replaceLongjmpWith(Function *LongjmpF, Function *NewF) { … } static bool containsLongjmpableCalls(const Function *F) { … } // When a function contains a setjmp call but not other calls that can longjmp, // we don't do setjmp transformation for that setjmp. But we need to convert the // setjmp calls into "i32 0" so they don't cause link time errors. setjmp always // returns 0 when called directly. static void nullifySetjmp(Function *F) { … } bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) { … } bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) { … } // This tries to get debug info from the instruction before which a new // instruction will be inserted, and if there's no debug info in that // instruction, tries to get the info instead from the previous instruction (if // any). If none of these has debug info and a DISubprogram is provided, it // creates a dummy debug info with the first line of the function, because IR // verifier requires all inlinable callsites should have debug info when both a // caller and callee have DISubprogram. If none of these conditions are met, // returns empty info. static DebugLoc getOrCreateDebugLoc(const Instruction *InsertBefore, DISubprogram *SP) { … } bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) { … } // Update each call that can longjmp so it can return to the corresponding // setjmp. Refer to 4) of "Emscripten setjmp/longjmp handling" section in the // comments at top of the file for details. void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForEmscriptenSjLj( Function &F, Instruction *FunctionInvocationId, SmallVectorImpl<PHINode *> &SetjmpRetPHIs) { … } static BasicBlock *getCleanupRetUnwindDest(const CleanupPadInst *CPI) { … } // Create a catchpad in which we catch a longjmp's env and val arguments, test // if the longjmp corresponds to one of setjmps in the current function, and if // so, jump to the setjmp dispatch BB from which we go to one of post-setjmp // BBs. Refer to 4) of "Wasm setjmp/longjmp handling" section in the comments at // top of the file for details. void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForWasmSjLj( Function &F, Instruction *FunctionInvocationId, SmallVectorImpl<PHINode *> &SetjmpRetPHIs) { … }