//===- MveEmitter.cpp - Generate arm_mve.h for use with clang -*- C++ -*-=====// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This set of linked tablegen backends is responsible for emitting the bits // and pieces that implement <arm_mve.h>, which is defined by the ACLE standard // and provides a set of types and functions for (more or less) direct access // to the MVE instruction set, including the scalar shifts as well as the // vector instructions. // // MVE's standard intrinsic functions are unusual in that they have a system of // polymorphism. For example, the function vaddq() can behave like vaddq_u16(), // vaddq_f32(), vaddq_s8(), etc., depending on the types of the vector // arguments you give it. // // This constrains the implementation strategies. The usual approach to making // the user-facing functions polymorphic would be to either use // __attribute__((overloadable)) to make a set of vaddq() functions that are // all inline wrappers on the underlying clang builtins, or to define a single // vaddq() macro which expands to an instance of _Generic. // // The inline-wrappers approach would work fine for most intrinsics, except for // the ones that take an argument required to be a compile-time constant, // because if you wrap an inline function around a call to a builtin, the // constant nature of the argument is not passed through. // // The _Generic approach can be made to work with enough effort, but it takes a // lot of machinery, because of the design feature of _Generic that even the // untaken branches are required to pass all front-end validity checks such as // type-correctness. You can work around that by nesting further _Generics all // over the place to coerce things to the right type in untaken branches, but // what you get out is complicated, hard to guarantee its correctness, and // worst of all, gives _completely unreadable_ error messages if the user gets // the types wrong for an intrinsic call. // // Therefore, my strategy is to introduce a new __attribute__ that allows a // function to be mapped to a clang builtin even though it doesn't have the // same name, and then declare all the user-facing MVE function names with that // attribute, mapping each one directly to the clang builtin. And the // polymorphic ones have __attribute__((overloadable)) as well. So once the // compiler has resolved the overload, it knows the internal builtin ID of the // selected function, and can check the immediate arguments against that; and // if the user gets the types wrong in a call to a polymorphic intrinsic, they // get a completely clear error message showing all the declarations of that // function in the header file and explaining why each one doesn't fit their // call. // // The downside of this is that if every clang builtin has to correspond // exactly to a user-facing ACLE intrinsic, then you can't save work in the // frontend by doing it in the header file: CGBuiltin.cpp has to do the entire // job of converting an ACLE intrinsic call into LLVM IR. So the Tablegen // description for an MVE intrinsic has to contain a full description of the // sequence of IRBuilder calls that clang will need to make. // //===----------------------------------------------------------------------===// #include "llvm/ADT/APInt.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Casting.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/StringToOffsetTable.h" #include <cassert> #include <cstddef> #include <cstdint> #include <list> #include <map> #include <memory> #include <set> #include <string> #include <vector> usingnamespacellvm; namespace { class EmitterBase; class Result; // ----------------------------------------------------------------------------- // A system of classes to represent all the types we'll need to deal with in // the prototypes of intrinsics. // // Query methods include finding out the C name of a type; the "LLVM name" in // the sense of a C++ code snippet that can be used in the codegen function; // the suffix that represents the type in the ACLE intrinsic naming scheme // (e.g. 's32' represents int32_t in intrinsics such as vaddq_s32); whether the // type is floating-point related (hence should be under #ifdef in the MVE // header so that it isn't included in integer-only MVE mode); and the type's // size in bits. Not all subtypes support all these queries. class Type { … }; enum class ScalarTypeKind { … }; inline std::string toLetter(ScalarTypeKind kind) { … } inline std::string toCPrefix(ScalarTypeKind kind) { … } class VoidType : public Type { … }; class PointerType : public Type { … }; // Base class for all the types that have a name of the form // [prefix][numbers]_t, like int32_t, uint16x8_t, float32x4x2_t. // // For this sub-hierarchy we invent a cNameBase() method which returns the // whole name except for the trailing "_t", so that Vector and MultiVector can // append an extra "x2" or whatever to their element type's cNameBase(). Then // the main cName() query method puts "_t" on the end for the final type name. class CRegularNamedType : public Type { … }; class ScalarType : public CRegularNamedType { … }; class VectorType : public CRegularNamedType { … }; class MultiVectorType : public CRegularNamedType { … }; class PredicateType : public CRegularNamedType { … }; // ----------------------------------------------------------------------------- // Class to facilitate merging together the code generation for many intrinsics // by means of varying a few constant or type parameters. // // Most obviously, the intrinsics in a single parametrised family will have // code generation sequences that only differ in a type or two, e.g. vaddq_s8 // and vaddq_u16 will look the same apart from putting a different vector type // in the call to CGM.getIntrinsic(). But also, completely different intrinsics // will often code-generate in the same way, with only a different choice of // _which_ IR intrinsic they lower to (e.g. vaddq_m_s8 and vmulq_m_s8), but // marshalling the arguments and return values of the IR intrinsic in exactly // the same way. And others might differ only in some other kind of constant, // such as a lane index. // // So, when we generate the IR-building code for all these intrinsics, we keep // track of every value that could possibly be pulled out of the code and // stored ahead of time in a local variable. Then we group together intrinsics // by textual equivalence of the code that would result if _all_ those // parameters were stored in local variables. That gives us maximal sets that // can be implemented by a single piece of IR-building code by changing // parameter values ahead of time. // // After we've done that, we do a second pass in which we only allocate _some_ // of the parameters into local variables, by tracking which ones have the same // values as each other (so that a single variable can be reused) and which // ones are the same across the whole set (so that no variable is needed at // all). // // Hence the class below. Its allocParam method is invoked during code // generation by every method of a Result subclass (see below) that wants to // give it the opportunity to pull something out into a switchable parameter. // It returns a variable name for the parameter, or (if it's being used in the // second pass once we've decided that some parameters don't need to be stored // in variables after all) it might just return the input expression unchanged. struct CodeGenParamAllocator { … }; // ----------------------------------------------------------------------------- // System of classes that represent all the intermediate values used during // code-generation for an intrinsic. // // The base class 'Result' can represent a value of the LLVM type 'Value', or // sometimes 'Address' (for loads/stores, including an alignment requirement). // // In the case where the Tablegen provides a value in the codegen dag as a // plain integer literal, the Result object we construct here will be one that // returns true from hasIntegerConstantValue(). This allows the generated C++ // code to use the constant directly in contexts which can take a literal // integer, such as Builder.CreateExtractValue(thing, 1), without going to the // effort of calling llvm::ConstantInt::get() and then pulling the constant // back out of the resulting llvm:Value later. class Result { … }; // Result subclass that retrieves one of the arguments to the clang builtin // function. In cases where the argument has pointer type, we call // EmitPointerWithAlignment and store the result in a variable of type Address, // so that load and store IR nodes can know the right alignment. Otherwise, we // call EmitScalarExpr. // // There are aggregate parameters in the MVE intrinsics API, but we don't deal // with them in this Tablegen back end: they only arise in the vld2q/vld4q and // vst2q/vst4q family, which is few enough that we just write the code by hand // for those in CGBuiltin.cpp. class BuiltinArgResult : public Result { … }; // Result subclass for an integer literal appearing in Tablegen. This may need // to be turned into an llvm::Result by means of llvm::ConstantInt::get(), or // it may be used directly as an integer, depending on which IRBuilder method // it's being passed to. class IntLiteralResult : public Result { … }; // Result subclass representing a cast between different integer types. We use // our own ScalarType abstraction as the representation of the target type, // which gives both size and signedness. class IntCastResult : public Result { … }; // Result subclass representing a cast between different pointer types. class PointerCastResult : public Result { … }; // Result subclass representing a call to an IRBuilder method. Each IRBuilder // method we want to use will have a Tablegen record giving the method name and // describing any important details of how to call it, such as whether a // particular argument should be an integer constant instead of an llvm::Value. class IRBuilderResult : public Result { … }; // Result subclass representing making an Address out of a Value. class AddressResult : public Result { … }; // Result subclass representing a call to an IR intrinsic, which we first have // to look up using an Intrinsic::ID constant and an array of types. class IRIntrinsicResult : public Result { … }; // Result subclass that specifies a type, for use in IRBuilder operations such // as CreateBitCast that take a type argument. class TypeResult : public Result { … }; // ----------------------------------------------------------------------------- // Class that describes a single ACLE intrinsic. // // A Tablegen record will typically describe more than one ACLE intrinsic, by // means of setting the 'list<Type> Params' field to a list of multiple // parameter types, so as to define vaddq_{s8,u8,...,f16,f32} all in one go. // We'll end up with one instance of ACLEIntrinsic for *each* parameter type, // rather than a single one for all of them. Hence, the constructor takes both // a Tablegen record and the current value of the parameter type. class ACLEIntrinsic { … }; // ----------------------------------------------------------------------------- // The top-level class that holds all the state from analyzing the entire // Tablegen input. class EmitterBase { … }; const Type *EmitterBase::getType(Init *I, const Type *Param) { … } const Type *EmitterBase::getType(const Record *R, const Type *Param) { … } const Type *EmitterBase::getType(DagInit *D, const Type *Param) { … } Result::Ptr EmitterBase::getCodeForDag(DagInit *D, const Result::Scope &Scope, const Type *Param) { … } Result::Ptr EmitterBase::getCodeForDagArg(DagInit *D, unsigned ArgNum, const Result::Scope &Scope, const Type *Param) { … } Result::Ptr EmitterBase::getCodeForArg(unsigned ArgNum, const Type *ArgType, bool Promote, bool Immediate) { … } ACLEIntrinsic::ACLEIntrinsic(EmitterBase &ME, const Record *R, const Type *Param) : … { … } EmitterBase::EmitterBase(const RecordKeeper &Records) { … } /// A wrapper on raw_string_ostream that contains its own buffer rather than /// having to point it at one elsewhere. (In other words, it works just like /// std::ostringstream; also, this makes it convenient to declare a whole array /// of them at once.) /// /// We have to set this up using multiple inheritance, to ensure that the /// string member has been constructed before raw_string_ostream's constructor /// is given a pointer to it. class string_holder { … }; class raw_self_contained_string_ostream : private string_holder, public raw_string_ostream { … }; const char LLVMLicenseHeader[] = …; // Machinery for the grouping of intrinsics by similar codegen. // // The general setup is that 'MergeableGroup' stores the things that a set of // similarly shaped intrinsics have in common: the text of their code // generation, and the number and type of their parameter variables. // MergeableGroup is the key in a std::map whose value is a set of // OutputIntrinsic, which stores the ways in which a particular intrinsic // specializes the MergeableGroup's generic description: the function name and // the _values_ of the parameter variables. struct ComparableStringVector : std::vector<std::string> { … }; struct OutputIntrinsic { … }; struct MergeableGroup { … }; void EmitterBase::EmitBuiltinCG(raw_ostream &OS) { … } void EmitterBase::EmitBuiltinAliases(raw_ostream &OS) { … } void EmitterBase::GroupSemaChecks( std::map<std::string, std::set<std::string>> &Checks) { … } // ----------------------------------------------------------------------------- // The class used for generating arm_mve.h and related Clang bits // class MveEmitter : public EmitterBase { … }; void MveEmitter::EmitHeader(raw_ostream &OS) { … } void MveEmitter::EmitBuiltinDef(raw_ostream &OS) { … } void MveEmitter::EmitBuiltinSema(raw_ostream &OS) { … } // ----------------------------------------------------------------------------- // Class that describes an ACLE intrinsic implemented as a macro. // // This class is used when the intrinsic is polymorphic in 2 or 3 types, but we // want to avoid a combinatorial explosion by reinterpreting the arguments to // fixed types. class FunctionMacro { … }; FunctionMacro::FunctionMacro(const Record &R) { … } // ----------------------------------------------------------------------------- // The class used for generating arm_cde.h and related Clang bits // class CdeEmitter : public EmitterBase { … }; CdeEmitter::CdeEmitter(const RecordKeeper &Records) : … { … } void CdeEmitter::EmitHeader(raw_ostream &OS) { … } void CdeEmitter::EmitBuiltinDef(raw_ostream &OS) { … } void CdeEmitter::EmitBuiltinSema(raw_ostream &OS) { … } } // namespace namespace clang { // MVE void EmitMveHeader(const RecordKeeper &Records, raw_ostream &OS) { … } void EmitMveBuiltinDef(const RecordKeeper &Records, raw_ostream &OS) { … } void EmitMveBuiltinSema(const RecordKeeper &Records, raw_ostream &OS) { … } void EmitMveBuiltinCG(const RecordKeeper &Records, raw_ostream &OS) { … } void EmitMveBuiltinAliases(const RecordKeeper &Records, raw_ostream &OS) { … } // CDE void EmitCdeHeader(const RecordKeeper &Records, raw_ostream &OS) { … } void EmitCdeBuiltinDef(const RecordKeeper &Records, raw_ostream &OS) { … } void EmitCdeBuiltinSema(const RecordKeeper &Records, raw_ostream &OS) { … } void EmitCdeBuiltinCG(const RecordKeeper &Records, raw_ostream &OS) { … } void EmitCdeBuiltinAliases(const RecordKeeper &Records, raw_ostream &OS) { … } } // end namespace clang