// Copyright 2022 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "src/compiler/string-builder-optimizer.h" #include <algorithm> #include <optional> #include "src/base/bits.h" #include "src/base/logging.h" #include "src/base/small-vector.h" #include "src/compiler/access-builder.h" #include "src/compiler/graph-assembler.h" #include "src/compiler/js-graph.h" #include "src/compiler/js-heap-broker.h" #include "src/compiler/js-operator.h" #include "src/compiler/node-matchers.h" #include "src/compiler/node-properties.h" #include "src/compiler/node.h" #include "src/compiler/opcodes.h" #include "src/compiler/operator.h" #include "src/compiler/schedule.h" #include "src/compiler/types.h" #include "src/objects/code.h" #include "src/objects/map-inl.h" #include "src/utils/utils.h" #include "src/zone/zone-containers.h" namespace v8 { namespace internal { namespace compiler { namespace { // Returns true if {node} is a kStringConcat or a kNewConsString. bool IsConcat(Node* node) { … } // Returns true if {node} is considered as a literal string by the string // builder optimizer: // - it's a literal string // - or it's a kStringFromSingleCharCode bool IsLiteralString(Node* node, JSHeapBroker* broker) { … } // Returns true if {node} has at least one concatenation or phi in its uses. bool HasConcatOrPhiUse(Node* node) { … } } // namespace OneOrTwoByteAnalysis::State OneOrTwoByteAnalysis::ConcatResultIsOneOrTwoByte( State a, State b) { … } std::optional<std::pair<int64_t, int64_t>> OneOrTwoByteAnalysis::TryGetRange( Node* node) { … } // Tries to determine whether {node} is a 1-byte or a 2-byte string. This // function assumes that {node} is part of a string builder: if it's a // concatenation and its left hand-side is something else than a literal string, // it returns only whether the right hand-side is 1/2-byte: the String builder // analysis will take care of propagating the state of the left hand-side. OneOrTwoByteAnalysis::State OneOrTwoByteAnalysis::OneOrTwoByte(Node* node) { … } bool StringBuilderOptimizer::BlockShouldFinalizeStringBuilders( BasicBlock* block) { … } ZoneVector<Node*> StringBuilderOptimizer::GetStringBuildersToFinalize( BasicBlock* block) { … } OneOrTwoByteAnalysis::State StringBuilderOptimizer::GetOneOrTwoByte( Node* node) { … } bool StringBuilderOptimizer::IsStringBuilderEnd(Node* node) { … } bool StringBuilderOptimizer::IsNonLoopPhiStringBuilderEnd(Node* node) { … } bool StringBuilderOptimizer::IsStringBuilderConcatInput(Node* node) { … } bool StringBuilderOptimizer::ConcatIsInStringBuilder(Node* node) { … } int StringBuilderOptimizer::GetStringBuilderIdForConcat(Node* node) { … } bool StringBuilderOptimizer::IsFirstConcatInStringBuilder(Node* node) { … } // Duplicates the {input_idx}th input of {node} if it has multiple uses, so that // the replacement only has one use and can safely be marked as // State::kConfirmedInStringBuilder and properly optimized in // EffectControlLinearizer (in particular, this will allow to safely remove // StringFromSingleCharCode that are only used for a StringConcat that we // optimize). void StringBuilderOptimizer::ReplaceConcatInputIfNeeded(Node* node, int input_idx) { … } // If all of the predecessors of {node} are part of a string builder and have // the same id, returns this id. Otherwise, returns kInvalidId. int StringBuilderOptimizer::GetPhiPredecessorsCommonId(Node* node) { … } namespace { // Returns true if {first} comes before {second} in {block}. bool ComesBeforeInBlock(Node* first, Node* second, BasicBlock* block) { … } static constexpr int kMaxPredecessors = …; // Compute up to {kMaxPredecessors} predecessors of {start} that are not past // {end}, and store them in {dst}. Returns true if there are less than // {kMaxPredecessors} such predecessors and false otherwise. bool ComputePredecessors( BasicBlock* start, BasicBlock* end, base::SmallVector<BasicBlock*, kMaxPredecessors>* dst) { … } // Returns false if {node} makes its string input escape this use. For instance, // a Phi or a Store make their input escape, but a kStringLength consumes its // inputs. bool OpcodeIsAllowed(IrOpcode::Value op) { … } // Returns true if {sb_child_block} can be a valid successor for // {previous_block} in the string builder, considering that {other_child_block} // is another successor of {previous_block} (which uses the string builder that // is in {previous_block}).We are mainly checking for the following scenario: // // | // v // +---> LoopPhi // | | // | v // | node ----------> other_child // | | // | v // | child // | ... // | | // +-------+ // // Where {node} and {child} are inside a loop (and could be part of a string // builder), but {other_child} is not, and the control flow doesn't exit the // loop in between {node} and {child}. The string builder should not be used in // such situations, because by the time {other_child} is reached, its input will // be invalid, because {child} will have mutated it. (here, node's block would // be {previous_block}, child's would be {sb_child_block} and other_child's // would be {other_child_block}). bool ValidControlFlowForStringBuilder(BasicBlock* sb_child_block, BasicBlock* other_child_block, BasicBlock* previous_block, ZoneVector<BasicBlock*> loop_headers) { … } // Return true if {maybe_dominator} dominates {maybe_dominee} and is less than // {kMaxDominatorSteps} steps away (to avoid going back too far if // {maybe_dominee} is much deeper in the graph that {maybe_dominator}). bool IsClosebyDominator(BasicBlock* maybe_dominator, BasicBlock* maybe_dominee) { … } // Returns true if {node} is a Phi that has both {input1} and {input2} as // inputs. bool IsPhiContainingGivenInputs(Node* node, Node* input1, Node* input2, Schedule* schedule) { … } // Returns true if {phi} has 3 inputs (including the Loop or Merge), and its // first two inputs are either Phi themselves, or StringConcat/NewConsString. // This is used to quickly eliminate Phi nodes that cannot be part of a String // Builder. bool PhiInputsAreConcatsOrPhi(Node* phi) { … } } // namespace // Check that the uses of {node} are valid, assuming that {string_builder_child} // is the following node in the string builder. In a nutshell, for uses of a // node (that is part of the string builder) to be valid, they need to all // appear before the next node of the string builder (because after, the node is // not valid anymore because we mutate SlicedString and the backing store in // place). For instance: // // s1 = "123" + "abc"; // s2 = s1 + "def"; // l = s1.length(); // // In this snippet, if `s1` and `s2` are part of the string builder, then the // uses of `s1` are not actually valid, because `s1.length()` appears after the // next node of the string builder (`s2`) has been computed. bool StringBuilderOptimizer::CheckNodeUses(Node* node, Node* string_builder_child, Status status) { … } // Check that the uses of the predecessor(s) of {child} in the string builder // are valid, with respect to {child}. This sounds a bit backwards, but we can't // check if uses are valid before having computed what the next node in the // string builder is. Hence, once we've established that {child} is in the // string builder, we check that the uses of the previous node(s) of the // string builder are valid. For non-loop phis (ie, merge phis), we simply check // that the uses of their 2 predecessors are valid. For loop phis, this function // is called twice: one for the outside-the-loop input (with {input_if_loop_phi} // = 0), and once for the inside-the-loop input (with {input_if_loop_phi} = 1). bool StringBuilderOptimizer::CheckPreviousNodeUses(Node* child, Status status, int input_if_loop_phi) { … } void StringBuilderOptimizer::VisitNode(Node* node, BasicBlock* block) { … } // For each potential string builder, checks that their beginning has status // kBeginStringBuilder, and that they contain at least one phi. Then, all of // their "valid" nodes are switched from status State::InStringBuilder to status // State::kConfirmedInStringBuilder (and "valid" kBeginStringBuilder are left // as kBeginStringBuilder while invalid ones are switched to kInvalid). Nodes // are considered "valid" if they are before any kPendingPhi in the string // builder. Put otherwise, switching status from kInStringBuilder to // kConfirmedInStringBuilder is a cheap way of getting rid of kInStringBuilder // nodes that are invalid before one of their predecessor is a kPendingPhi that // was never switched to kInStringBuilder. An example: // // StringConcat [1] // kBeginStringBuilder // | // | // v // -----> Loop Phi [2] --------------- // | kInStringBuilder | // | | | // | | | // | v v // | StringConcat [3] StringConcat [4] // | kInStringBuilder kInStringBuilder // | | | // ----------| | // v // -----> Loop Phi [5] ------------> // | kPendingPhi // | | // | | // | v // | StringConcat [6] // | kInStringBuilder // | | // -----------| // // In this graph, nodes [1], [2], [3] and [4] are part of the string builder. In // particular, node 2 has at some point been assigned the status kPendingPhi // (because all loop phis start as kPendingPhi), but was later switched to // status kInStringBuilder (because its uses inside the loop were compatible // with the string builder), which implicitly made node [3] a valid part of the // string builder. On the other hand, node [5] was never switched to status // kInStringBuilder, which means that it is not valid, and any successor of [5] // isn't valid either (remember that we speculatively set nodes following a // kPendingPhi to kInStringBuilder). Thus, rather than having to iterate through // the successors of kPendingPhi nodes to invalidate them, we simply update the // status of valid nodes to kConfirmedInStringBuilder, after which any // kInStringBuilder node is actually invalid. // // In this function, we also collect all the possible ends for each string // builder (their can be multiple possible ends if there is a branch before the // end of a string builder), as well as where trimming for a given string // builder should be done (either right after the last node, or at the beginning // of the blocks following this node). For an example of string builder with // multiple ends, consider this code: // // let s = "a" + "b" // for (...) { // s += "..."; // } // if (...) return s + "abc"; // else return s + "def"; // // Which would produce a graph that looks like: // // kStringConcat // | // | // v // -------> Loop Phi--------------- // | | | // | | | // | v | // | kStringConcat | // | | | // -------------| | // | // | // ------------------------------------------ // | | // | | // | | // v v // kStringConcat [1] kStringConcat [2] // | | // | | // v v // Return Return // // In this case, both kStringConcat [1] and [2] are valid ends for the string // builder. void StringBuilderOptimizer::FinalizeStringBuilders() { … } void StringBuilderOptimizer::VisitGraph() { … } void StringBuilderOptimizer::Run() { … } StringBuilderOptimizer::StringBuilderOptimizer(JSGraph* jsgraph, Schedule* schedule, Zone* temp_zone, JSHeapBroker* broker) : … { … } } // namespace compiler } // namespace internal } // namespace v8