#include "ruy/block_map.h"
#include <algorithm>
#include <cstdint>
#include <limits>
#ifdef RUY_MAKEBLOCKMAP_DEBUG
#include <cstdio>
#include <cstdlib>
#include <string>
#endif
#include "ruy/check_macros.h"
#include "ruy/opt_set.h"
#include "ruy/profiler/instrumentation.h"
#include "ruy/size_util.h"
#include "ruy/trace.h"
namespace ruy {
namespace {
void DecodeTraversalLinear(int size_log2, std::uint32_t square_index,
SidePair<int>* local_pos) { … }
void DecodeTraversalFractalZ(std::uint32_t square_index,
SidePair<int>* local_pos) { … }
void DecodeTraversalFractalU(std::uint32_t square_index,
SidePair<int>* local_pos) { … }
void DecodeTraversalFractalHilbert(int size_log2, std::uint32_t square_index,
SidePair<int>* local_pos) { … }
}
void GetBlockByIndex(const BlockMap& block_map, int index,
SidePair<int>* block) { … }
namespace {
BlockMapTraversalOrder GetTraversalOrder(
int rows_after_rectangularness_division,
int cols_after_rectangularness_division, int depth, int lhs_scalar_size,
int rhs_scalar_size, const CpuCacheParams& cpu_cache_params) { … }
int floor_log2_quotient(int num, int denom) { … }
void GetRectangularness(int rows, int cols, int kernel_rows, int kernel_cols,
int* rows_rectangularness_log2,
int* cols_rectangularness_log2) { … }
int GetMultithreadingScore(int block_size_log2, int rows, int cols,
int tentative_thread_count) { … }
int GetCacheLocalityScore(int block_size_log2, int rows, int cols, int depth,
int kernel_rows_log2, int kernel_cols_log2,
int lhs_scalar_size, int rhs_scalar_size,
const CpuCacheParams& cpu_cache_params) { … }
int GetKernelAmortizationScore(int block_size_log2, int rows, int cols,
int kernel_rows_log2, int kernel_cols_log2) { … }
}
bool IsObviouslyLinearTraversal(int rows, int cols, int depth,
int lhs_scalar_size, int rhs_scalar_size,
const CpuCacheParams& cpu_cache_params) { … }
void MakeBlockMap(int rows, int cols, int depth, int kernel_rows,
int kernel_cols, int lhs_scalar_size, int rhs_scalar_size,
int tentative_thread_count,
const CpuCacheParams& cpu_cache_params, BlockMap* block_map) { … }
void GetBlockMatrixCoords(Side side, const BlockMap& block_map, int block,
int* start, int* end) { … }
void GetBlockMatrixCoords(const BlockMap& block_map, const SidePair<int>& block,
SidePair<int>* start, SidePair<int>* end) { … }
}