llvm/libc/benchmarks/automemcpy/include/automemcpy/FunctionDescriptor.h

//===-- Pod structs to describe a memory function----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_BENCHMARKS_AUTOMEMCPY_COMMON_H
#define LLVM_LIBC_BENCHMARKS_AUTOMEMCPY_COMMON_H

#include <climits>
#include <cstddef>
#include <llvm/ADT/ArrayRef.h>
#include <llvm/ADT/Hashing.h>
#include <llvm/ADT/StringRef.h>
#include <optional>
#include <tuple>

namespace llvm {
namespace automemcpy {

// Boilerplate code to be able to sort and hash types.
#define COMPARABLE_AND_HASHABLE(T, ...)                                        \
  inline auto asTuple() const { return std::tie(__VA_ARGS__); }                \
  bool operator==(const T &O) const { return asTuple() == O.asTuple(); }       \
  bool operator<(const T &O) const { return asTuple() < O.asTuple(); }         \
  struct Hasher {                                                              \
    std::size_t operator()(const T &K) const {                                 \
      return llvm::hash_value(K.asTuple());                                    \
    }                                                                          \
  };

// Represents the maximum value for the size parameter of a memory function.
// This is an `int` so we can use it as an expression in Z3.
// It also allows for a more readable and compact representation when storing
// the SizeSpan in the autogenerated C++ file.
static constexpr int kMaxSize = INT_MAX;

// This mimics the `Arg` type in libc/src/string/memory_utils/elements.h without
// having to depend on it.
enum class AlignArg { _1, _2, ARRAY_SIZE };

// Describes a range of sizes.
// We use the begin/end representation instead of first/last to allow for empty
// range (i.e. Begin == End)
struct SizeSpan {
  size_t Begin = 0;
  size_t End = 0;

  COMPARABLE_AND_HASHABLE(SizeSpan, Begin, End)
};

// Describes a contiguous region.
// In such a region all sizes are handled individually.
// e.g. with Span = {0, 2};
// if(size == 0) return Handle<0>();
// if(size == 1) return Handle<1>();
struct Contiguous {
  SizeSpan Span;

  COMPARABLE_AND_HASHABLE(Contiguous, Span)
};

// This struct represents a range of sizes over which to use an overlapping
// strategy. An overlapping strategy of size N handles all sizes from N to 2xN.
// The span may represent several contiguous overlaps.
// e.g. with Span = {16, 128};
// if(size >= 16 and size < 32) return Handle<Overlap<16>>();
// if(size >= 32 and size < 64) return Handle<Overlap<32>>();
// if(size >= 64 and size < 128) return Handle<Overlap<64>>();
struct Overlap {
  SizeSpan Span;

  COMPARABLE_AND_HASHABLE(Overlap, Span)
};

// Describes a region using a loop handling BlockSize bytes at a time. The
// remaining bytes of the loop are handled with an overlapping operation.
struct Loop {
  SizeSpan Span;
  size_t BlockSize = 0;

  COMPARABLE_AND_HASHABLE(Loop, Span, BlockSize)
};

// Same as `Loop` but starts by aligning a buffer on `Alignment` bytes.
// A first operation handling 'Alignment` bytes is performed followed by a
// sequence of Loop.BlockSize bytes operation. The Loop starts processing from
// the next aligned byte in the chosen buffer. The remaining bytes of the loop
// are handled with an overlapping operation.
struct AlignedLoop {
  Loop Loop;
  size_t Alignment = 0;            // Size of the alignment.
  AlignArg AlignTo = AlignArg::_1; // Which buffer to align.

  COMPARABLE_AND_HASHABLE(AlignedLoop, Loop, Alignment, AlignTo)
};

// Some processors offer special instruction to handle the memory function
// completely, we refer to such instructions as accelerators.
struct Accelerator {
  SizeSpan Span;

  COMPARABLE_AND_HASHABLE(Accelerator, Span)
};

// The memory functions are assembled out of primitives that can be implemented
// with regular scalar operations (SCALAR), with the help of vector or bitcount
// instructions (NATIVE) or by deferring it to the compiler (BUILTIN).
enum class ElementTypeClass {
  SCALAR,
  NATIVE,
  BUILTIN,
};

// A simple enum to categorize which function is being implemented.
enum class FunctionType {
  MEMCPY,
  MEMCMP,
  BCMP,
  MEMSET,
  BZERO,
};

// This struct describes the skeleton of the implementation, it does not go into
// every detail but is enough to uniquely identify the implementation.
struct FunctionDescriptor {
  FunctionType Type;
  std::optional<Contiguous> Contiguous;
  std::optional<Overlap> Overlap;
  std::optional<Loop> Loop;
  std::optional<AlignedLoop> AlignedLoop;
  std::optional<Accelerator> Accelerator;
  ElementTypeClass ElementClass;

  COMPARABLE_AND_HASHABLE(FunctionDescriptor, Type, Contiguous, Overlap, Loop,
                          AlignedLoop, Accelerator, ElementClass)

  inline size_t id() const { return llvm::hash_value(asTuple()); }
};

// Same as above but with the function name.
struct NamedFunctionDescriptor {
  StringRef Name;
  FunctionDescriptor Desc;
};

template <typename T> llvm::hash_code hash_value(const ArrayRef<T> &V) {
  return llvm::hash_combine_range(V.begin(), V.end());
}
template <typename T> llvm::hash_code hash_value(const T &O) {
  return llvm::hash_value(O.asTuple());
}

} // namespace automemcpy
} // namespace llvm

#endif /* LLVM_LIBC_BENCHMARKS_AUTOMEMCPY_COMMON_H */