#ifndef GEMMLOWP_INTERNAL_UNPACK_H_
#define GEMMLOWP_INTERNAL_UNPACK_H_
#include "allocator.h"
#include "block_params.h"
#include "output.h"
#include "pack.h"
#include <cmath>
namespace gemmlowp {
class PackedResult { … };
struct MatrixBlockBounds { … };
template <int Rows, int Cols, typename SrcMapType>
void PrefetchResultBlock(const SrcMapType& src,
const VectorMap<const std::int32_t, VectorShape::Col>&
lhs_sums_of_each_slice,
int src_row, int src_col) { … }
template <typename KernelFormat, typename RegisterBlockType,
typename SrcMapType, typename LhsOffset, typename RhsOffset,
typename OutputPipelineExecutorType, typename DstType>
void UnpackResultBlock(const SrcMapType& src,
const OutputPipelineExecutorType& executor, DstType* dst,
const VectorMap<const std::int32_t, VectorShape::Col>&
lhs_sums_of_each_slice,
const VectorMap<const std::int32_t, VectorShape::Row>&
rhs_sums_of_each_slice,
const LhsOffset& lhs_offset, const RhsOffset& rhs_offset,
int depth, int src_row, int src_col, int src_global_row,
int src_global_col, int dst_row, int dst_col) { … }
template <typename KernelFormat, typename ResultBlockType,
typename PackedResultType, typename LhsOffset, typename RhsOffset,
typename OutputPipelineType>
void UnpackResult(ResultBlockType* dst, const MatrixBlockBounds& dst_block,
const PackedResultType& src, int depth,
const std::int32_t* lhs_sums_of_each_slice_ptr,
const std::int32_t* rhs_sums_of_each_slice_ptr,
const LhsOffset& lhs_offset, const RhsOffset& rhs_offset,
const OutputPipelineType& output_pipeline) { … }
}
#endif