//===- AsmMatcherEmitter.cpp - Generate an assembly matcher ---------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This tablegen backend emits a target specifier matcher for converting parsed // assembly operands in the MCInst structures. It also emits a matcher for // custom operand parsing. // // Converting assembly operands into MCInst structures // --------------------------------------------------- // // The input to the target specific matcher is a list of literal tokens and // operands. The target specific parser should generally eliminate any syntax // which is not relevant for matching; for example, comma tokens should have // already been consumed and eliminated by the parser. Most instructions will // end up with a single literal token (the instruction name) and some number of // operands. // // Some example inputs, for X86: // 'addl' (immediate ...) (register ...) // 'add' (immediate ...) (memory ...) // 'call' '*' %epc // // The assembly matcher is responsible for converting this input into a precise // machine instruction (i.e., an instruction with a well defined encoding). This // mapping has several properties which complicate matching: // // - It may be ambiguous; many architectures can legally encode particular // variants of an instruction in different ways (for example, using a smaller // encoding for small immediates). Such ambiguities should never be // arbitrarily resolved by the assembler, the assembler is always responsible // for choosing the "best" available instruction. // // - It may depend on the subtarget or the assembler context. Instructions // which are invalid for the current mode, but otherwise unambiguous (e.g., // an SSE instruction in a file being assembled for i486) should be accepted // and rejected by the assembler front end. However, if the proper encoding // for an instruction is dependent on the assembler context then the matcher // is responsible for selecting the correct machine instruction for the // current mode. // // The core matching algorithm attempts to exploit the regularity in most // instruction sets to quickly determine the set of possibly matching // instructions, and the simplify the generated code. Additionally, this helps // to ensure that the ambiguities are intentionally resolved by the user. // // The matching is divided into two distinct phases: // // 1. Classification: Each operand is mapped to the unique set which (a) // contains it, and (b) is the largest such subset for which a single // instruction could match all members. // // For register classes, we can generate these subgroups automatically. For // arbitrary operands, we expect the user to define the classes and their // relations to one another (for example, 8-bit signed immediates as a // subset of 32-bit immediates). // // By partitioning the operands in this way, we guarantee that for any // tuple of classes, any single instruction must match either all or none // of the sets of operands which could classify to that tuple. // // In addition, the subset relation amongst classes induces a partial order // on such tuples, which we use to resolve ambiguities. // // 2. The input can now be treated as a tuple of classes (static tokens are // simple singleton sets). Each such tuple should generally map to a single // instruction (we currently ignore cases where this isn't true, whee!!!), // which we can emit a simple matcher for. // // Custom Operand Parsing // ---------------------- // // Some targets need a custom way to parse operands, some specific instructions // can contain arguments that can represent processor flags and other kinds of // identifiers that need to be mapped to specific values in the final encoded // instructions. The target specific custom operand parsing works in the // following way: // // 1. A operand match table is built, each entry contains a mnemonic, an // operand class, a mask for all operand positions for that same // class/mnemonic and target features to be checked while trying to match. // // 2. The operand matcher will try every possible entry with the same // mnemonic and will check if the target feature for this mnemonic also // matches. After that, if the operand to be matched has its index // present in the mask, a successful match occurs. Otherwise, fallback // to the regular operand parsing. // // 3. For a match success, each operand class that has a 'ParserMethod' // becomes part of a switch from where the custom method is called. // //===----------------------------------------------------------------------===// #include "Common/CodeGenInstAlias.h" #include "Common/CodeGenInstruction.h" #include "Common/CodeGenRegisters.h" #include "Common/CodeGenTarget.h" #include "Common/SubtargetFeatureInfo.h" #include "Common/Types.h" #include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/StringMatcher.h" #include "llvm/TableGen/StringToOffsetTable.h" #include "llvm/TableGen/TableGenBackend.h" #include <cassert> #include <cctype> #include <forward_list> #include <map> #include <set> usingnamespacellvm; #define DEBUG_TYPE … cl::OptionCategory AsmMatcherEmitterCat("Options for -gen-asm-matcher"); static cl::opt<std::string> MatchPrefix("match-prefix", cl::init(""), cl::desc("Only match instructions with the given prefix"), cl::cat(AsmMatcherEmitterCat)); namespace { class AsmMatcherInfo; // Register sets are used as keys in some second-order sets TableGen creates // when generating its data structures. This means that the order of two // RegisterSets can be seen in the outputted AsmMatcher tables occasionally, and // can even affect compiler output (at least seen in diagnostics produced when // all matches fail). So we use a type that sorts them consistently. RegisterSet; class AsmMatcherEmitter { … }; /// ClassInfo - Helper class for storing the information about a particular /// class of operands which can be matched. struct ClassInfo { … }; class AsmVariantInfo { … }; bool getPreferSmallerInstructions(CodeGenTarget const &Target) { … } /// MatchableInfo - Helper class for storing the necessary information for an /// instruction or alias which is capable of being matched. struct MatchableInfo { … }; struct OperandMatchEntry { … }; class AsmMatcherInfo { … }; } // end anonymous namespace #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void MatchableInfo::dump() const { errs() << TheDef->getName() << " -- " << "flattened:\"" << AsmString << "\"\n"; errs() << " variant: " << AsmVariantID << "\n"; for (const auto &[Idx, Op] : enumerate(AsmOperands)) { errs() << " op[" << Idx << "] = " << Op.Class->ClassName << " - "; errs() << '\"' << Op.Token << "\"\n"; } } #endif static std::pair<StringRef, StringRef> parseTwoOperandConstraint(StringRef S, ArrayRef<SMLoc> Loc) { … } void MatchableInfo::formTwoOperandAlias(StringRef Constraint) { … } /// extractSingletonRegisterForAsmOperand - Extract singleton register, /// if present, from specified token. static void extractSingletonRegisterForAsmOperand(MatchableInfo::AsmOperand &Op, const AsmMatcherInfo &Info, StringRef RegisterPrefix) { … } void MatchableInfo::initialize( const AsmMatcherInfo &Info, SmallPtrSetImpl<const Record *> &SingletonRegisters, AsmVariantInfo const &Variant, bool HasMnemonicFirst) { … } /// Append an AsmOperand for the given substring of AsmString. void MatchableInfo::addAsmOperand(StringRef Token, bool IsIsolatedToken) { … } /// tokenizeAsmString - Tokenize a simplified assembly string. void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info, AsmVariantInfo const &Variant) { … } bool MatchableInfo::validate(StringRef CommentDelimiter, bool IsAlias) const { … } static std::string getEnumNameForToken(StringRef Str) { … } ClassInfo *AsmMatcherInfo::getTokenClass(StringRef Token) { … } ClassInfo * AsmMatcherInfo::getOperandClass(const CGIOperandList::OperandInfo &OI, int SubOpIdx) { … } ClassInfo *AsmMatcherInfo::getOperandClass(const Record *Rec, int SubOpIdx) { … } struct LessRegisterSet { … }; void AsmMatcherInfo::buildRegisterClasses( SmallPtrSetImpl<const Record *> &SingletonRegisters) { … } void AsmMatcherInfo::buildOperandClasses() { … } AsmMatcherInfo::AsmMatcherInfo(const Record *asmParser, const CodeGenTarget &target, const RecordKeeper &records) : … { … } /// buildOperandMatchInfo - Build the necessary information to handle user /// defined operand parsing methods. void AsmMatcherInfo::buildOperandMatchInfo() { … } void AsmMatcherInfo::buildInfo() { … } /// buildInstructionOperandReference - The specified operand is a reference to a /// named operand such as $src. Resolve the Class and OperandInfo pointers. void AsmMatcherInfo::buildInstructionOperandReference(MatchableInfo *II, StringRef OperandName, unsigned AsmOpIdx) { … } /// buildAliasOperandReference - When parsing an operand reference out of the /// matching string (e.g. "movsx $src, $dst"), determine what the class of the /// operand reference is by looking it up in the result pattern definition. void AsmMatcherInfo::buildAliasOperandReference(MatchableInfo *II, StringRef OperandName, MatchableInfo::AsmOperand &Op) { … } void MatchableInfo::buildInstructionResultOperands() { … } void MatchableInfo::buildAliasResultOperands(bool AliasConstraintsAreChecked) { … } static unsigned getConverterOperandID(const std::string &Name, SmallSetVector<CachedHashString, 16> &Table, bool &IsNew) { … } static unsigned emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName, std::vector<std::unique_ptr<MatchableInfo>> &Infos, bool HasMnemonicFirst, bool HasOptionalOperands, raw_ostream &OS) { … } /// emitMatchClassEnumeration - Emit the enumeration for match class kinds. static void emitMatchClassEnumeration(CodeGenTarget &Target, std::forward_list<ClassInfo> &Infos, raw_ostream &OS) { … } /// emitMatchClassDiagStrings - Emit a function to get the diagnostic text to be /// used when an assembly operand does not match the expected operand class. static void emitOperandMatchErrorDiagStrings(AsmMatcherInfo &Info, raw_ostream &OS) { … } static void emitRegisterMatchErrorFunc(AsmMatcherInfo &Info, raw_ostream &OS) { … } /// emitValidateOperandClass - Emit the function to validate an operand class. static void emitValidateOperandClass(AsmMatcherInfo &Info, raw_ostream &OS) { … } /// emitIsSubclass - Emit the subclass predicate function. static void emitIsSubclass(CodeGenTarget &Target, std::forward_list<ClassInfo> &Infos, raw_ostream &OS) { … } /// emitMatchTokenString - Emit the function to match a token string to the /// appropriate match class value. static void emitMatchTokenString(CodeGenTarget &Target, std::forward_list<ClassInfo> &Infos, raw_ostream &OS) { … } /// emitMatchRegisterName - Emit the function to match a string to the target /// specific register enum. static void emitMatchRegisterName(const CodeGenTarget &Target, const Record *AsmParser, raw_ostream &OS) { … } /// Emit the function to match a string to the target /// specific register enum. static void emitMatchRegisterAltName(const CodeGenTarget &Target, const Record *AsmParser, raw_ostream &OS) { … } /// emitOperandDiagnosticTypes - Emit the operand matching diagnostic types. static void emitOperandDiagnosticTypes(AsmMatcherInfo &Info, raw_ostream &OS) { … } /// emitGetSubtargetFeatureName - Emit the helper function to get the /// user-level name for a subtarget feature. static void emitGetSubtargetFeatureName(AsmMatcherInfo &Info, raw_ostream &OS) { … } static std::string GetAliasRequiredFeatures(const Record *R, const AsmMatcherInfo &Info) { … } static void emitMnemonicAliasVariant(raw_ostream &OS, const AsmMatcherInfo &Info, ArrayRef<const Record *> Aliases, unsigned Indent = 0, StringRef AsmParserVariantName = StringRef()) { … } /// emitMnemonicAliases - If the target has any MnemonicAlias<> definitions, /// emit a function for them and return true, otherwise return false. static bool emitMnemonicAliases(raw_ostream &OS, const AsmMatcherInfo &Info, CodeGenTarget &Target) { … } static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target, const AsmMatcherInfo &Info, StringRef ClassName, const StringToOffsetTable &StringTable, unsigned MaxMnemonicIndex, unsigned MaxFeaturesIndex, bool HasMnemonicFirst, const Record &AsmParser) { … } static void emitAsmTiedOperandConstraints(CodeGenTarget &Target, AsmMatcherInfo &Info, raw_ostream &OS, bool HasOptionalOperands) { … } static void emitMnemonicSpellChecker(raw_ostream &OS, CodeGenTarget &Target, unsigned VariantCount) { … } static void emitMnemonicChecker(raw_ostream &OS, CodeGenTarget &Target, unsigned VariantCount, bool HasMnemonicFirst, bool HasMnemonicAliases) { … } // Emit a function mapping match classes to strings, for debugging. static void emitMatchClassKindNames(std::forward_list<ClassInfo> &Infos, raw_ostream &OS) { … } static std::string getNameForFeatureBitset(ArrayRef<const Record *> FeatureBitset) { … } void AsmMatcherEmitter::run(raw_ostream &OS) { … } static TableGen::Emitter::OptClass<AsmMatcherEmitter> X("gen-asm-matcher", "Generate assembly instruction matcher");