/* Copyright 2019 Google LLC. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ // The 'middle-end' in ruy. See TrMul function comment. #include "ruy/trmul.h" #include <algorithm> #include <atomic> #include <cstdint> #include <cstring> #include <limits> #include <memory> #include <vector> #include "ruy/allocator.h" #include "ruy/block_map.h" #include "ruy/check_macros.h" #include "ruy/cpu_cache_params.h" #include "ruy/cpuinfo.h" #include "ruy/ctx.h" #include "ruy/denormal.h" #include "ruy/mat.h" #include "ruy/matrix.h" #include "ruy/mul_params.h" #include "ruy/strategy_controls.h" #include "ruy/opt_set.h" #include "ruy/profiler/instrumentation.h" #include "ruy/side_pair.h" #include "ruy/size_util.h" #include "ruy/thread_pool.h" #include "ruy/trace.h" #include "ruy/tune.h" namespace ruy { namespace { // Enum to track the packingstatus of a block of the LHS or RHS matrix. enum class PackingStatus : std::uint8_t { … }; // TrMulTask is the task that a ruy thread runs to perform the TrMul operation. class TrMulTask final : public Task { … }; int GetTentativeThreadCount(Ctx* ctx, int rows, int cols, int depth) { … } bool GetUseSimpleLoop(int tentative_thread_count, int rows, int cols, int depth, int lhs_scalar_size, int rhs_scalar_size, const CpuCacheParams& cpu_cache_params) { … } } // namespace // TrMul is the ruy middle-end. It contains the high-level logic to perform // a ruy::Mul's work, down to calls to back-end Kernel and Pack functions. // This includes determining how many threads to use, computing the BlockMap, // executing tasks on a thread-pool. The TrMul function itself runs on the main // thread, the code that is potentially running on worker threads is in // TrMulTask::Run(). void TrMul(Ctx* ctx, TrMulParams* params) { … } } // namespace ruy