llvm/mlir/tools/mlir-tblgen/FormatGen.h

//===- FormatGen.h - Utilities for custom assembly formats ------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains common classes for building custom assembly format parsers
// and generators.
//
//===----------------------------------------------------------------------===//

#ifndef MLIR_TOOLS_MLIRTBLGEN_FORMATGEN_H_
#define MLIR_TOOLS_MLIRTBLGEN_FORMATGEN_H_

#include "mlir/Support/LLVM.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/SMLoc.h"
#include <vector>

namespace llvm {
class SourceMgr;
} // namespace llvm

namespace mlir {
namespace tblgen {

//===----------------------------------------------------------------------===//
// FormatToken
//===----------------------------------------------------------------------===//

/// This class represents a specific token in the input format.
class FormatToken {};

//===----------------------------------------------------------------------===//
// FormatLexer
//===----------------------------------------------------------------------===//

/// This class implements a simple lexer for operation assembly format strings.
class FormatLexer {};

//===----------------------------------------------------------------------===//
// FormatElement
//===----------------------------------------------------------------------===//

/// This class represents a single format element.
///
/// If you squint and take a close look, you can see the outline of a `Format`
/// dialect.
class FormatElement {};

/// The base class for all format elements. This class implements common methods
/// for LLVM-style RTTI.
template <FormatElement::Kind ElementKind>
class FormatElementBase : public FormatElement {};

/// This class represents a literal element. A literal is either one of the
/// supported punctuation characters (e.g. `(` or `,`) or a string literal (e.g.
/// `literal`).
class LiteralElement : public FormatElementBase<FormatElement::Literal> {};

/// This class represents a raw string that can contain arbitrary C++ code.
class StringElement : public FormatElementBase<FormatElement::String> {};

/// This class represents a variable element. A variable refers to some part of
/// the object being parsed, e.g. an attribute or operand on an operation or a
/// parameter on an attribute.
class VariableElement : public FormatElementBase<FormatElement::Variable> {};

/// Base class for variable elements. This class implements common methods for
/// LLVM-style RTTI.
template <VariableElement::Kind VariableKind>
class VariableElementBase : public VariableElement {};

/// This class represents a whitespace element, e.g. a newline or space. It is a
/// literal that is printed but never parsed. When the value is empty, i.e. ``,
/// a space is elided where one would have been printed automatically.
class WhitespaceElement : public FormatElementBase<FormatElement::Whitespace> {};

class DirectiveElement : public FormatElementBase<FormatElement::Directive> {};

/// Base class for directive elements. This class implements common methods for
/// LLVM-style RTTI.
template <DirectiveElement::Kind DirectiveKind>
class DirectiveElementBase : public DirectiveElement {};

/// This class represents a custom format directive that is implemented by the
/// user in C++. The directive accepts a list of arguments that is passed to the
/// C++ function.
class CustomDirective : public DirectiveElementBase<DirectiveElement::Custom> {};

/// This class represents a reference directive. This directive can be used to
/// reference but not bind a previously bound variable or format object. Its
/// current only use is to pass variables as arguments to the custom directive.
class RefDirective : public DirectiveElementBase<DirectiveElement::Ref> {};

/// This class represents a group of elements that are optionally emitted based
/// on an optional variable "anchor" and a group of elements that are emitted
/// when the anchor element is not present.
class OptionalElement : public FormatElementBase<FormatElement::Optional> {};

//===----------------------------------------------------------------------===//
// FormatParserBase
//===----------------------------------------------------------------------===//

/// Base class for a parser that implements an assembly format. This class
/// defines a common assembly format syntax and the creation of format elements.
/// Subclasses will need to implement parsing for the format elements they
/// support.
class FormatParser {};

//===----------------------------------------------------------------------===//
// Utility Functions
//===----------------------------------------------------------------------===//

/// Whether a space needs to be emitted before a literal. E.g., two keywords
/// back-to-back require a space separator, but a keyword followed by '<' does
/// not require a space.
bool shouldEmitSpaceBefore(StringRef value, bool lastWasPunctuation);

/// Returns true if the given string can be formatted as a keyword.
bool canFormatStringAsKeyword(StringRef value,
                              function_ref<void(Twine)> emitError = nullptr);

/// Returns true if the given string is valid format literal element.
/// If `emitError` is provided, it is invoked with the reason for the failure.
bool isValidLiteral(StringRef value,
                    function_ref<void(Twine)> emitError = nullptr);

/// Whether a failure in parsing the assembly format should be a fatal error.
extern llvm::cl::opt<bool> formatErrorIsFatal;

} // namespace tblgen
} // namespace mlir

#endif // MLIR_TOOLS_MLIRTBLGEN_FORMATGEN_H_