// Copyright 2015 The Gemmlowp Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // pack.h: packing blocks of the LHS and RHS into the data layout // that is expected by compute.h and eventually by kernels. // Because this data layout depends on the kernel format, code here // is templated in KernelLhsFormat/KernelRhsFormat. // // Readers note: an important theme around here is that we try hard // to handle both Lhs and Rhs with a single piece of code. We indifferently // refer to the Lhs and Rhs as a 'Side'. Instead of addressing matrices // by (row, column) indices, we address them by (width, depth), as explained // in kernel.h. This allows us to handle both Lhs and Rhs on an equal footing, // at once. #ifndef GEMMLOWP_INTERNAL_PACK_H_ #define GEMMLOWP_INTERNAL_PACK_H_ #include <cstring> #include "allocator.h" #include "block_params.h" #include "common.h" #include "kernel.h" namespace gemmlowp { // A PackedSideBlock instance is a packed block of either the LHS or RHS // (whence the generic 'Side' name). // // 'Packed' means that it is laid out in the storage order that // is expected by the specified kernel format. From a block of the input // LHS or RHS matrix, one obtains a PackedSideBlock by calling PackLhs() // or PackRhs(). template <typename tKernelSideFormat> class PackedSideBlock { … }; // WidthMajor and DepthMajor are custom phrases modelled after the // standard terminology 'row-major' and 'column-major'. Their meaning // should be transparent once one has read the explanation in kernel.h: // for example, in the Lhs, the 'width' dimension is the rows dimension, // so there WidthMajor means RowMajor, while in the Rhs it is the opposite. // Another way to put it: WidthMajor means that contiguous storage is used // for entries having the same 'width' index. enum class SideMapOrder { … }; // Similar to MatrixMap from map.h, but in terms of width/depth instead of // rows/columns. Used to address blocks of the input LHS/RHS matrices when // packing them. template <typename tScalar, SideMapOrder tOrder> class SideMap { … }; // A PackingRegisterBlock is a small fixed-size block of a matrix being // packed. This class is the generic non-optimized implementation, // it is inherited by the generic implementation of PackingRegisterBlock, // which may be overriden by template specialization. Overriding it is how // one may provide optimized packing code paths. // // The packing of a block proceeds in two steps: // 1. Ensuring that we have a complete block of source data, i.e. a block of // the compile-time prescribed size. This is where we handle unaligned // boundaries: if we don't have a complete block of source data, then // we copy and zero-extend it into a local temporary (complete_src_), // see MakeCompleteSrc. In the generic case, we do have a complete block, // so we just use it in-place, see UseCompleteSrcInPlace. // 2. Packing a complete block into the destination, see Pack. This is the // most critical part, so it's convenient that unaligned boundaries have // already been handled in step 1. template <typename SrcMapType, typename PackedSideBlock> class PackingRegisterBlockBase { … }; template <typename SrcMapType, typename PackedSideBlock> class PackingRegisterBlock : public PackingRegisterBlockBase<SrcMapType, PackedSideBlock> { … }; // Large-scale implementation of packing. template <typename SrcMapType, typename PackedSideBlock> class PackSideBlockImpl { … }; // Packs a block of the input LHS matrix, into a PackedSideBlock. template <typename PackedSideBlock, typename MatrixMapType> void PackLhs(PackedSideBlock* dst, const MatrixMapType& src) { … } // Packs a block of the input RHS matrix, into a PackedSideBlock. template <typename PackedSideBlock, typename MatrixMapType> void PackRhs(PackedSideBlock* dst, const MatrixMapType& src) { … } } // namespace gemmlowp #ifdef GEMMLOWP_NEON #include "pack_neon.h" #elif defined(GEMMLOWP_SSE4) #include "pack_sse.h" #elif defined(GEMMLOWP_AVX2) #include "pack_avx.h" #elif defined(GEMMLOWP_MSA) #include "pack_msa.h" #endif #endif // GEMMLOWP_INTERNAL_PACK_H_