MveEmitter.cpp | Explore in Territory

//===- MveEmitter.cpp - Generate arm_mve.h for use with clang -*- C++ -*-=====//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This set of linked tablegen backends is responsible for emitting the bits
// and pieces that implement <arm_mve.h>, which is defined by the ACLE standard
// and provides a set of types and functions for (more or less) direct access
// to the MVE instruction set, including the scalar shifts as well as the
// vector instructions.
//
// MVE's standard intrinsic functions are unusual in that they have a system of
// polymorphism. For example, the function vaddq() can behave like vaddq_u16(),
// vaddq_f32(), vaddq_s8(), etc., depending on the types of the vector
// arguments you give it.
//
// This constrains the implementation strategies. The usual approach to making
// the user-facing functions polymorphic would be to either use
// __attribute__((overloadable)) to make a set of vaddq() functions that are
// all inline wrappers on the underlying clang builtins, or to define a single
// vaddq() macro which expands to an instance of _Generic.
//
// The inline-wrappers approach would work fine for most intrinsics, except for
// the ones that take an argument required to be a compile-time constant,
// because if you wrap an inline function around a call to a builtin, the
// constant nature of the argument is not passed through.
//
// The _Generic approach can be made to work with enough effort, but it takes a
// lot of machinery, because of the design feature of _Generic that even the
// untaken branches are required to pass all front-end validity checks such as
// type-correctness. You can work around that by nesting further _Generics all
// over the place to coerce things to the right type in untaken branches, but
// what you get out is complicated, hard to guarantee its correctness, and
// worst of all, gives _completely unreadable_ error messages if the user gets
// the types wrong for an intrinsic call.
//
// Therefore, my strategy is to introduce a new __attribute__ that allows a
// function to be mapped to a clang builtin even though it doesn't have the
// same name, and then declare all the user-facing MVE function names with that
// attribute, mapping each one directly to the clang builtin. And the
// polymorphic ones have __attribute__((overloadable)) as well. So once the
// compiler has resolved the overload, it knows the internal builtin ID of the
// selected function, and can check the immediate arguments against that; and
// if the user gets the types wrong in a call to a polymorphic intrinsic, they
// get a completely clear error message showing all the declarations of that
// function in the header file and explaining why each one doesn't fit their
// call.
//
// The downside of this is that if every clang builtin has to correspond
// exactly to a user-facing ACLE intrinsic, then you can't save work in the
// frontend by doing it in the header file: CGBuiltin.cpp has to do the entire
// job of converting an ACLE intrinsic call into LLVM IR. So the Tablegen
// description for an MVE intrinsic has to contain a full description of the
// sequence of IRBuilder calls that clang will need to make.
//
//===----------------------------------------------------------------------===//

#include "llvm/ADT/APInt.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/StringToOffsetTable.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <list>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <vector>

usingnamespacellvm;

namespace {

class EmitterBase;
class Result;

// -----------------------------------------------------------------------------
// A system of classes to represent all the types we'll need to deal with in
// the prototypes of intrinsics.
//
// Query methods include finding out the C name of a type; the "LLVM name" in
// the sense of a C++ code snippet that can be used in the codegen function;
// the suffix that represents the type in the ACLE intrinsic naming scheme
// (e.g. 's32' represents int32_t in intrinsics such as vaddq_s32); whether the
// type is floating-point related (hence should be under #ifdef in the MVE
// header so that it isn't included in integer-only MVE mode); and the type's
// size in bits. Not all subtypes support all these queries.

class Type { … };

enum class ScalarTypeKind { … };
inline std::string toLetter(ScalarTypeKind kind) { … }
inline std::string toCPrefix(ScalarTypeKind kind) { … }

class VoidType : public Type { … };

class PointerType : public Type { … };

// Base class for all the types that have a name of the form
// [prefix][numbers]_t, like int32_t, uint16x8_t, float32x4x2_t.
//
// For this sub-hierarchy we invent a cNameBase() method which returns the
// whole name except for the trailing "_t", so that Vector and MultiVector can
// append an extra "x2" or whatever to their element type's cNameBase(). Then
// the main cName() query method puts "_t" on the end for the final type name.

class CRegularNamedType : public Type { … };

class ScalarType : public CRegularNamedType { … };

class VectorType : public CRegularNamedType { … };

class MultiVectorType : public CRegularNamedType { … };

class PredicateType : public CRegularNamedType { … };

// -----------------------------------------------------------------------------
// Class to facilitate merging together the code generation for many intrinsics
// by means of varying a few constant or type parameters.
//
// Most obviously, the intrinsics in a single parametrised family will have
// code generation sequences that only differ in a type or two, e.g. vaddq_s8
// and vaddq_u16 will look the same apart from putting a different vector type
// in the call to CGM.getIntrinsic(). But also, completely different intrinsics
// will often code-generate in the same way, with only a different choice of
// _which_ IR intrinsic they lower to (e.g. vaddq_m_s8 and vmulq_m_s8), but
// marshalling the arguments and return values of the IR intrinsic in exactly
// the same way. And others might differ only in some other kind of constant,
// such as a lane index.
//
// So, when we generate the IR-building code for all these intrinsics, we keep
// track of every value that could possibly be pulled out of the code and
// stored ahead of time in a local variable. Then we group together intrinsics
// by textual equivalence of the code that would result if _all_ those
// parameters were stored in local variables. That gives us maximal sets that
// can be implemented by a single piece of IR-building code by changing
// parameter values ahead of time.
//
// After we've done that, we do a second pass in which we only allocate _some_
// of the parameters into local variables, by tracking which ones have the same
// values as each other (so that a single variable can be reused) and which
// ones are the same across the whole set (so that no variable is needed at
// all).
//
// Hence the class below. Its allocParam method is invoked during code
// generation by every method of a Result subclass (see below) that wants to
// give it the opportunity to pull something out into a switchable parameter.
// It returns a variable name for the parameter, or (if it's being used in the
// second pass once we've decided that some parameters don't need to be stored
// in variables after all) it might just return the input expression unchanged.

struct CodeGenParamAllocator { … };

// -----------------------------------------------------------------------------
// System of classes that represent all the intermediate values used during
// code-generation for an intrinsic.
//
// The base class 'Result' can represent a value of the LLVM type 'Value', or
// sometimes 'Address' (for loads/stores, including an alignment requirement).
//
// In the case where the Tablegen provides a value in the codegen dag as a
// plain integer literal, the Result object we construct here will be one that
// returns true from hasIntegerConstantValue(). This allows the generated C++
// code to use the constant directly in contexts which can take a literal
// integer, such as Builder.CreateExtractValue(thing, 1), without going to the
// effort of calling llvm::ConstantInt::get() and then pulling the constant
// back out of the resulting llvm:Value later.

class Result { … };

// Result subclass that retrieves one of the arguments to the clang builtin
// function. In cases where the argument has pointer type, we call
// EmitPointerWithAlignment and store the result in a variable of type Address,
// so that load and store IR nodes can know the right alignment. Otherwise, we
// call EmitScalarExpr.
//
// There are aggregate parameters in the MVE intrinsics API, but we don't deal
// with them in this Tablegen back end: they only arise in the vld2q/vld4q and
// vst2q/vst4q family, which is few enough that we just write the code by hand
// for those in CGBuiltin.cpp.
class BuiltinArgResult : public Result { … };

// Result subclass for an integer literal appearing in Tablegen. This may need
// to be turned into an llvm::Result by means of llvm::ConstantInt::get(), or
// it may be used directly as an integer, depending on which IRBuilder method
// it's being passed to.
class IntLiteralResult : public Result { … };

// Result subclass representing a cast between different integer types. We use
// our own ScalarType abstraction as the representation of the target type,
// which gives both size and signedness.
class IntCastResult : public Result { … };

// Result subclass representing a cast between different pointer types.
class PointerCastResult : public Result { … };

// Result subclass representing a call to an IRBuilder method. Each IRBuilder
// method we want to use will have a Tablegen record giving the method name and
// describing any important details of how to call it, such as whether a
// particular argument should be an integer constant instead of an llvm::Value.
class IRBuilderResult : public Result { … };

// Result subclass representing making an Address out of a Value.
class AddressResult : public Result { … };

// Result subclass representing a call to an IR intrinsic, which we first have
// to look up using an Intrinsic::ID constant and an array of types.
class IRIntrinsicResult : public Result { … };

// Result subclass that specifies a type, for use in IRBuilder operations such
// as CreateBitCast that take a type argument.
class TypeResult : public Result { … };

// -----------------------------------------------------------------------------
// Class that describes a single ACLE intrinsic.
//
// A Tablegen record will typically describe more than one ACLE intrinsic, by
// means of setting the 'list<Type> Params' field to a list of multiple
// parameter types, so as to define vaddq_{s8,u8,...,f16,f32} all in one go.
// We'll end up with one instance of ACLEIntrinsic for *each* parameter type,
// rather than a single one for all of them. Hence, the constructor takes both
// a Tablegen record and the current value of the parameter type.

class ACLEIntrinsic { … };

// -----------------------------------------------------------------------------
// The top-level class that holds all the state from analyzing the entire
// Tablegen input.

class EmitterBase { … };

const Type *EmitterBase::getType(Init *I, const Type *Param) { … }

const Type *EmitterBase::getType(const Record *R, const Type *Param) { … }

const Type *EmitterBase::getType(DagInit *D, const Type *Param) { … }

Result::Ptr EmitterBase::getCodeForDag(DagInit *D, const Result::Scope &Scope,
                                       const Type *Param) { … }

Result::Ptr EmitterBase::getCodeForDagArg(DagInit *D, unsigned ArgNum,
                                          const Result::Scope &Scope,
                                          const Type *Param) { … }

Result::Ptr EmitterBase::getCodeForArg(unsigned ArgNum, const Type *ArgType,
                                       bool Promote, bool Immediate) { … }

ACLEIntrinsic::ACLEIntrinsic(EmitterBase &ME, const Record *R,
                             const Type *Param)
    : … { … }

EmitterBase::EmitterBase(const RecordKeeper &Records) { … }

/// A wrapper on raw_string_ostream that contains its own buffer rather than
/// having to point it at one elsewhere. (In other words, it works just like
/// std::ostringstream; also, this makes it convenient to declare a whole array
/// of them at once.)
///
/// We have to set this up using multiple inheritance, to ensure that the
/// string member has been constructed before raw_string_ostream's constructor
/// is given a pointer to it.
class string_holder { … };
class raw_self_contained_string_ostream : private string_holder,
                                          public raw_string_ostream { … };

const char LLVMLicenseHeader[] = …;

// Machinery for the grouping of intrinsics by similar codegen.
//
// The general setup is that 'MergeableGroup' stores the things that a set of
// similarly shaped intrinsics have in common: the text of their code
// generation, and the number and type of their parameter variables.
// MergeableGroup is the key in a std::map whose value is a set of
// OutputIntrinsic, which stores the ways in which a particular intrinsic
// specializes the MergeableGroup's generic description: the function name and
// the _values_ of the parameter variables.

struct ComparableStringVector : std::vector<std::string> { … };

struct OutputIntrinsic { … };
struct MergeableGroup { … };

void EmitterBase::EmitBuiltinCG(raw_ostream &OS) { … }

void EmitterBase::EmitBuiltinAliases(raw_ostream &OS) { … }

void EmitterBase::GroupSemaChecks(
    std::map<std::string, std::set<std::string>> &Checks) { … }

// -----------------------------------------------------------------------------
// The class used for generating arm_mve.h and related Clang bits
//

class MveEmitter : public EmitterBase { … };

void MveEmitter::EmitHeader(raw_ostream &OS) { … }

void MveEmitter::EmitBuiltinDef(raw_ostream &OS) { … }

void MveEmitter::EmitBuiltinSema(raw_ostream &OS) { … }

// -----------------------------------------------------------------------------
// Class that describes an ACLE intrinsic implemented as a macro.
//
// This class is used when the intrinsic is polymorphic in 2 or 3 types, but we
// want to avoid a combinatorial explosion by reinterpreting the arguments to
// fixed types.

class FunctionMacro { … };

FunctionMacro::FunctionMacro(const Record &R) { … }

// -----------------------------------------------------------------------------
// The class used for generating arm_cde.h and related Clang bits
//

class CdeEmitter : public EmitterBase { … };

CdeEmitter::CdeEmitter(const RecordKeeper &Records) : … { … }

void CdeEmitter::EmitHeader(raw_ostream &OS) { … }

void CdeEmitter::EmitBuiltinDef(raw_ostream &OS) { … }

void CdeEmitter::EmitBuiltinSema(raw_ostream &OS) { … }

} // namespace

namespace clang {

// MVE

void EmitMveHeader(const RecordKeeper &Records, raw_ostream &OS) { … }

void EmitMveBuiltinDef(const RecordKeeper &Records, raw_ostream &OS) { … }

void EmitMveBuiltinSema(const RecordKeeper &Records, raw_ostream &OS) { … }

void EmitMveBuiltinCG(const RecordKeeper &Records, raw_ostream &OS) { … }

void EmitMveBuiltinAliases(const RecordKeeper &Records, raw_ostream &OS) { … }

// CDE

void EmitCdeHeader(const RecordKeeper &Records, raw_ostream &OS) { … }

void EmitCdeBuiltinDef(const RecordKeeper &Records, raw_ostream &OS) { … }

void EmitCdeBuiltinSema(const RecordKeeper &Records, raw_ostream &OS) { … }

void EmitCdeBuiltinCG(const RecordKeeper &Records, raw_ostream &OS) { … }

void EmitCdeBuiltinAliases(const RecordKeeper &Records, raw_ostream &OS) { … }

} // end namespace clang
llvm/clang/utils/TableGen/MveEmitter.cpp