llvm/llvm/utils/TableGen/AsmMatcherEmitter.cpp

//===- AsmMatcherEmitter.cpp - Generate an assembly matcher ---------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This tablegen backend emits a target specifier matcher for converting parsed
// assembly operands in the MCInst structures. It also emits a matcher for
// custom operand parsing.
//
// Converting assembly operands into MCInst structures
// ---------------------------------------------------
//
// The input to the target specific matcher is a list of literal tokens and
// operands. The target specific parser should generally eliminate any syntax
// which is not relevant for matching; for example, comma tokens should have
// already been consumed and eliminated by the parser. Most instructions will
// end up with a single literal token (the instruction name) and some number of
// operands.
//
// Some example inputs, for X86:
//   'addl' (immediate ...) (register ...)
//   'add' (immediate ...) (memory ...)
//   'call' '*' %epc
//
// The assembly matcher is responsible for converting this input into a precise
// machine instruction (i.e., an instruction with a well defined encoding). This
// mapping has several properties which complicate matching:
//
//  - It may be ambiguous; many architectures can legally encode particular
//    variants of an instruction in different ways (for example, using a smaller
//    encoding for small immediates). Such ambiguities should never be
//    arbitrarily resolved by the assembler, the assembler is always responsible
//    for choosing the "best" available instruction.
//
//  - It may depend on the subtarget or the assembler context. Instructions
//    which are invalid for the current mode, but otherwise unambiguous (e.g.,
//    an SSE instruction in a file being assembled for i486) should be accepted
//    and rejected by the assembler front end. However, if the proper encoding
//    for an instruction is dependent on the assembler context then the matcher
//    is responsible for selecting the correct machine instruction for the
//    current mode.
//
// The core matching algorithm attempts to exploit the regularity in most
// instruction sets to quickly determine the set of possibly matching
// instructions, and the simplify the generated code. Additionally, this helps
// to ensure that the ambiguities are intentionally resolved by the user.
//
// The matching is divided into two distinct phases:
//
//   1. Classification: Each operand is mapped to the unique set which (a)
//      contains it, and (b) is the largest such subset for which a single
//      instruction could match all members.
//
//      For register classes, we can generate these subgroups automatically. For
//      arbitrary operands, we expect the user to define the classes and their
//      relations to one another (for example, 8-bit signed immediates as a
//      subset of 32-bit immediates).
//
//      By partitioning the operands in this way, we guarantee that for any
//      tuple of classes, any single instruction must match either all or none
//      of the sets of operands which could classify to that tuple.
//
//      In addition, the subset relation amongst classes induces a partial order
//      on such tuples, which we use to resolve ambiguities.
//
//   2. The input can now be treated as a tuple of classes (static tokens are
//      simple singleton sets). Each such tuple should generally map to a single
//      instruction (we currently ignore cases where this isn't true, whee!!!),
//      which we can emit a simple matcher for.
//
// Custom Operand Parsing
// ----------------------
//
//  Some targets need a custom way to parse operands, some specific instructions
//  can contain arguments that can represent processor flags and other kinds of
//  identifiers that need to be mapped to specific values in the final encoded
//  instructions. The target specific custom operand parsing works in the
//  following way:
//
//   1. A operand match table is built, each entry contains a mnemonic, an
//      operand class, a mask for all operand positions for that same
//      class/mnemonic and target features to be checked while trying to match.
//
//   2. The operand matcher will try every possible entry with the same
//      mnemonic and will check if the target feature for this mnemonic also
//      matches. After that, if the operand to be matched has its index
//      present in the mask, a successful match occurs. Otherwise, fallback
//      to the regular operand parsing.
//
//   3. For a match success, each operand class that has a 'ParserMethod'
//      becomes part of a switch from where the custom method is called.
//
//===----------------------------------------------------------------------===//

#include "Common/CodeGenInstAlias.h"
#include "Common/CodeGenInstruction.h"
#include "Common/CodeGenRegisters.h"
#include "Common/CodeGenTarget.h"
#include "Common/SubtargetFeatureInfo.h"
#include "Common/Types.h"
#include "llvm/ADT/CachedHashString.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/StringMatcher.h"
#include "llvm/TableGen/StringToOffsetTable.h"
#include "llvm/TableGen/TableGenBackend.h"
#include <cassert>
#include <cctype>
#include <forward_list>
#include <map>
#include <set>

usingnamespacellvm;

#define DEBUG_TYPE

cl::OptionCategory AsmMatcherEmitterCat("Options for -gen-asm-matcher");

static cl::opt<std::string>
    MatchPrefix("match-prefix", cl::init(""),
                cl::desc("Only match instructions with the given prefix"),
                cl::cat(AsmMatcherEmitterCat));

namespace {
class AsmMatcherInfo;

// Register sets are used as keys in some second-order sets TableGen creates
// when generating its data structures. This means that the order of two
// RegisterSets can be seen in the outputted AsmMatcher tables occasionally, and
// can even affect compiler output (at least seen in diagnostics produced when
// all matches fail). So we use a type that sorts them consistently.
RegisterSet;

class AsmMatcherEmitter {};

/// ClassInfo - Helper class for storing the information about a particular
/// class of operands which can be matched.
struct ClassInfo {};

class AsmVariantInfo {};

bool getPreferSmallerInstructions(CodeGenTarget const &Target) {}

/// MatchableInfo - Helper class for storing the necessary information for an
/// instruction or alias which is capable of being matched.
struct MatchableInfo {};

struct OperandMatchEntry {};

class AsmMatcherInfo {};

} // end anonymous namespace

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void MatchableInfo::dump() const {
  errs() << TheDef->getName() << " -- "
         << "flattened:\"" << AsmString << "\"\n";

  errs() << "  variant: " << AsmVariantID << "\n";

  for (const auto &[Idx, Op] : enumerate(AsmOperands)) {
    errs() << "  op[" << Idx << "] = " << Op.Class->ClassName << " - ";
    errs() << '\"' << Op.Token << "\"\n";
  }
}
#endif

static std::pair<StringRef, StringRef>
parseTwoOperandConstraint(StringRef S, ArrayRef<SMLoc> Loc) {}

void MatchableInfo::formTwoOperandAlias(StringRef Constraint) {}

/// extractSingletonRegisterForAsmOperand - Extract singleton register,
/// if present, from specified token.
static void extractSingletonRegisterForAsmOperand(MatchableInfo::AsmOperand &Op,
                                                  const AsmMatcherInfo &Info,
                                                  StringRef RegisterPrefix) {}

void MatchableInfo::initialize(
    const AsmMatcherInfo &Info,
    SmallPtrSetImpl<const Record *> &SingletonRegisters,
    AsmVariantInfo const &Variant, bool HasMnemonicFirst) {}

/// Append an AsmOperand for the given substring of AsmString.
void MatchableInfo::addAsmOperand(StringRef Token, bool IsIsolatedToken) {}

/// tokenizeAsmString - Tokenize a simplified assembly string.
void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info,
                                      AsmVariantInfo const &Variant) {}

bool MatchableInfo::validate(StringRef CommentDelimiter, bool IsAlias) const {}

static std::string getEnumNameForToken(StringRef Str) {}

ClassInfo *AsmMatcherInfo::getTokenClass(StringRef Token) {}

ClassInfo *
AsmMatcherInfo::getOperandClass(const CGIOperandList::OperandInfo &OI,
                                int SubOpIdx) {}

ClassInfo *AsmMatcherInfo::getOperandClass(const Record *Rec, int SubOpIdx) {}

struct LessRegisterSet {};

void AsmMatcherInfo::buildRegisterClasses(
    SmallPtrSetImpl<const Record *> &SingletonRegisters) {}

void AsmMatcherInfo::buildOperandClasses() {}

AsmMatcherInfo::AsmMatcherInfo(const Record *asmParser,
                               const CodeGenTarget &target,
                               const RecordKeeper &records)
    :{}

/// buildOperandMatchInfo - Build the necessary information to handle user
/// defined operand parsing methods.
void AsmMatcherInfo::buildOperandMatchInfo() {}

void AsmMatcherInfo::buildInfo() {}

/// buildInstructionOperandReference - The specified operand is a reference to a
/// named operand such as $src.  Resolve the Class and OperandInfo pointers.
void AsmMatcherInfo::buildInstructionOperandReference(MatchableInfo *II,
                                                      StringRef OperandName,
                                                      unsigned AsmOpIdx) {}

/// buildAliasOperandReference - When parsing an operand reference out of the
/// matching string (e.g. "movsx $src, $dst"), determine what the class of the
/// operand reference is by looking it up in the result pattern definition.
void AsmMatcherInfo::buildAliasOperandReference(MatchableInfo *II,
                                                StringRef OperandName,
                                                MatchableInfo::AsmOperand &Op) {}

void MatchableInfo::buildInstructionResultOperands() {}

void MatchableInfo::buildAliasResultOperands(bool AliasConstraintsAreChecked) {}

static unsigned
getConverterOperandID(const std::string &Name,
                      SmallSetVector<CachedHashString, 16> &Table,
                      bool &IsNew) {}

static unsigned
emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
                 std::vector<std::unique_ptr<MatchableInfo>> &Infos,
                 bool HasMnemonicFirst, bool HasOptionalOperands,
                 raw_ostream &OS) {}

/// emitMatchClassEnumeration - Emit the enumeration for match class kinds.
static void emitMatchClassEnumeration(CodeGenTarget &Target,
                                      std::forward_list<ClassInfo> &Infos,
                                      raw_ostream &OS) {}

/// emitMatchClassDiagStrings - Emit a function to get the diagnostic text to be
/// used when an assembly operand does not match the expected operand class.
static void emitOperandMatchErrorDiagStrings(AsmMatcherInfo &Info,
                                             raw_ostream &OS) {}

static void emitRegisterMatchErrorFunc(AsmMatcherInfo &Info, raw_ostream &OS) {}

/// emitValidateOperandClass - Emit the function to validate an operand class.
static void emitValidateOperandClass(AsmMatcherInfo &Info, raw_ostream &OS) {}

/// emitIsSubclass - Emit the subclass predicate function.
static void emitIsSubclass(CodeGenTarget &Target,
                           std::forward_list<ClassInfo> &Infos,
                           raw_ostream &OS) {}

/// emitMatchTokenString - Emit the function to match a token string to the
/// appropriate match class value.
static void emitMatchTokenString(CodeGenTarget &Target,
                                 std::forward_list<ClassInfo> &Infos,
                                 raw_ostream &OS) {}

/// emitMatchRegisterName - Emit the function to match a string to the target
/// specific register enum.
static void emitMatchRegisterName(const CodeGenTarget &Target,
                                  const Record *AsmParser, raw_ostream &OS) {}

/// Emit the function to match a string to the target
/// specific register enum.
static void emitMatchRegisterAltName(const CodeGenTarget &Target,
                                     const Record *AsmParser, raw_ostream &OS) {}

/// emitOperandDiagnosticTypes - Emit the operand matching diagnostic types.
static void emitOperandDiagnosticTypes(AsmMatcherInfo &Info, raw_ostream &OS) {}

/// emitGetSubtargetFeatureName - Emit the helper function to get the
/// user-level name for a subtarget feature.
static void emitGetSubtargetFeatureName(AsmMatcherInfo &Info, raw_ostream &OS) {}

static std::string GetAliasRequiredFeatures(const Record *R,
                                            const AsmMatcherInfo &Info) {}

static void
emitMnemonicAliasVariant(raw_ostream &OS, const AsmMatcherInfo &Info,
                         ArrayRef<const Record *> Aliases, unsigned Indent = 0,
                         StringRef AsmParserVariantName = StringRef()) {}

/// emitMnemonicAliases - If the target has any MnemonicAlias<> definitions,
/// emit a function for them and return true, otherwise return false.
static bool emitMnemonicAliases(raw_ostream &OS, const AsmMatcherInfo &Info,
                                CodeGenTarget &Target) {}

static void
emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
                         const AsmMatcherInfo &Info, StringRef ClassName,
                         const StringToOffsetTable &StringTable,
                         unsigned MaxMnemonicIndex, unsigned MaxFeaturesIndex,
                         bool HasMnemonicFirst, const Record &AsmParser) {}

static void emitAsmTiedOperandConstraints(CodeGenTarget &Target,
                                          AsmMatcherInfo &Info, raw_ostream &OS,
                                          bool HasOptionalOperands) {}

static void emitMnemonicSpellChecker(raw_ostream &OS, CodeGenTarget &Target,
                                     unsigned VariantCount) {}

static void emitMnemonicChecker(raw_ostream &OS, CodeGenTarget &Target,
                                unsigned VariantCount, bool HasMnemonicFirst,
                                bool HasMnemonicAliases) {}

// Emit a function mapping match classes to strings, for debugging.
static void emitMatchClassKindNames(std::forward_list<ClassInfo> &Infos,
                                    raw_ostream &OS) {}

static std::string
getNameForFeatureBitset(ArrayRef<const Record *> FeatureBitset) {}

void AsmMatcherEmitter::run(raw_ostream &OS) {}

static TableGen::Emitter::OptClass<AsmMatcherEmitter>
    X("gen-asm-matcher", "Generate assembly instruction matcher");