// Copyright (c) 2018 Google LLC. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "source/opt/loop_unroller.h" #include <limits> #include <memory> #include <unordered_map> #include <utility> #include <vector> #include "source/opt/ir_builder.h" #include "source/opt/loop_utils.h" // Implements loop util unrolling functionality for fully and partially // unrolling loops. Given a factor it will duplicate the loop that many times, // appending each one to the end of the old loop and removing backedges, to // create a new unrolled loop. // // 1 - User calls LoopUtils::FullyUnroll or LoopUtils::PartiallyUnroll with a // loop they wish to unroll. LoopUtils::CanPerformUnroll is used to // validate that a given loop can be unrolled. That method (along with the // constructor of loop) checks that the IR is in the expected canonicalised // format. // // 2 - The LoopUtils methods create a LoopUnrollerUtilsImpl object to actually // perform the unrolling. This implements helper methods to copy the loop basic // blocks and remap the ids of instructions used inside them. // // 3 - The core of LoopUnrollerUtilsImpl is the Unroll method, this method // actually performs the loop duplication. It does this by creating a // LoopUnrollState object and then copying the loop as given by the factor // parameter. The LoopUnrollState object retains the state of the unroller // between the loop body copies as each iteration needs information on the last // to adjust the phi induction variable, adjust the OpLoopMerge instruction in // the main loop header, and change the previous continue block to point to the // new header and the new continue block to the main loop header. // // 4 - If the loop is to be fully unrolled then it is simply closed after step // 3, with the OpLoopMerge being deleted, the backedge removed, and the // condition blocks folded. // // 5 - If it is being partially unrolled: if the unrolling factor leaves the // loop with an even number of bodies with respect to the number of loop // iterations then step 3 is all that is needed. If it is uneven then we need to // duplicate the loop completely and unroll the duplicated loop to cover the // residual part and adjust the first loop to cover only the "even" part. For // instance if you request an unroll factor of 3 on a loop with 10 iterations // then copying the body three times would leave you with three bodies in the // loop // where the loop still iterates over each 4 times. So we make two loops one // iterating once then a second loop of three iterating 3 times. namespace spvtools { namespace opt { namespace { // Loop control constant value for DontUnroll flag. constexpr uint32_t kLoopControlDontUnrollIndex = …; // Operand index of the loop control parameter of the OpLoopMerge. constexpr uint32_t kLoopControlIndex = …; // This utility class encapsulates some of the state we need to maintain between // loop unrolls. Specifically it maintains key blocks and the induction variable // in the current loop duplication step and the blocks from the previous one. // This is because each step of the unroll needs to use data from both the // preceding step and the original loop. struct LoopUnrollState { … }; // This class implements the actual unrolling. It uses a LoopUnrollState to // maintain the state of the unrolling in between steps. class LoopUnrollerUtilsImpl { … }; /* * Static helper functions. */ // Retrieve the index of the OpPhi instruction |phi| which corresponds to the // incoming |block| id. uint32_t GetPhiIndexFromLabel(const BasicBlock* block, const Instruction* phi) { … } void LoopUnrollerUtilsImpl::Init(Loop* loop) { … } // This function is used to partially unroll the loop when the factor provided // would normally lead to an illegal optimization. Instead of just unrolling the // loop it creates two loops and unrolls one and adjusts the condition on the // other. The end result being that the new loop pair iterates over the correct // number of bodies. void LoopUnrollerUtilsImpl::PartiallyUnrollResidualFactor(Loop* loop, size_t factor) { … } // Mark this loop as DontUnroll as it will already be unrolled and it may not // be safe to unroll a previously partially unrolled loop. void LoopUnrollerUtilsImpl::MarkLoopControlAsDontUnroll(Loop* loop) const { … } // Duplicate the |loop| body |factor| - 1 number of times while keeping the loop // backedge intact. This will leave the loop with |factor| number of bodies // after accounting for the initial body. void LoopUnrollerUtilsImpl::Unroll(Loop* loop, size_t factor) { … } void LoopUnrollerUtilsImpl::RemoveDeadInstructions() { … } void LoopUnrollerUtilsImpl::ReplaceInductionUseWithFinalValue(Loop* loop) { … } // Fully unroll the loop by partially unrolling it by the number of loop // iterations minus one for the body already accounted for. void LoopUnrollerUtilsImpl::FullyUnroll(Loop* loop) { … } void LoopUnrollerUtilsImpl::KillDebugDeclares(BasicBlock* bb) { … } // Copy a given basic block, give it a new result_id, and store the new block // and the id mapping in the state. |preserve_instructions| is used to determine // whether or not this function should edit instructions other than the // |result_id|. void LoopUnrollerUtilsImpl::CopyBasicBlock(Loop* loop, const BasicBlock* itr, bool preserve_instructions) { … } void LoopUnrollerUtilsImpl::CopyBody(Loop* loop, bool eliminate_conditions) { … } uint32_t LoopUnrollerUtilsImpl::GetPhiDefID(const Instruction* phi, uint32_t label) const { … } void LoopUnrollerUtilsImpl::FoldConditionBlock(BasicBlock* condition_block, uint32_t operand_label) { … } void LoopUnrollerUtilsImpl::CloseUnrolledLoop(Loop* loop) { … } // Uses the first loop to create a copy of the loop with new IDs. void LoopUnrollerUtilsImpl::DuplicateLoop(Loop* old_loop, Loop* new_loop) { … } // Whenever the utility copies a block it stores it in a temporary buffer, this // function adds the buffer into the Function. The blocks will be inserted // after the block |insert_point|. void LoopUnrollerUtilsImpl::AddBlocksToFunction( const BasicBlock* insert_point) { … } // Assign all result_ids in |basic_block| instructions to new IDs and preserve // the mapping of new ids to old ones. void LoopUnrollerUtilsImpl::AssignNewResultIds(BasicBlock* basic_block) { … } void LoopUnrollerUtilsImpl::RemapOperands(Instruction* inst) { … } void LoopUnrollerUtilsImpl::RemapOperands(BasicBlock* basic_block) { … } // Generate the ordered list of basic blocks in the |loop| and cache it for // later use. void LoopUnrollerUtilsImpl::ComputeLoopOrderedBlocks(Loop* loop) { … } // Adds the blocks_to_add_ to both the loop and to the parent. void LoopUnrollerUtilsImpl::AddBlocksToLoop(Loop* loop) const { … } void LoopUnrollerUtilsImpl::LinkLastPhisToStart(Loop* loop) const { … } // Duplicate the |loop| body |factor| number of times while keeping the loop // backedge intact. void LoopUnrollerUtilsImpl::PartiallyUnroll(Loop* loop, size_t factor) { … } /* * End LoopUtilsImpl. */ } // namespace /* * * Begin Utils. * * */ bool LoopUtils::CanPerformUnroll() { … } bool LoopUtils::PartiallyUnroll(size_t factor) { … } bool LoopUtils::FullyUnroll() { … } void LoopUtils::Finalize() { … } /* * * Begin Pass. * */ Pass::Status LoopUnroller::Process() { … } } // namespace opt } // namespace spvtools