// Copyright 2022 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef V8_COMPILER_TURBOSHAFT_DUPLICATION_OPTIMIZATION_REDUCER_H_ #define V8_COMPILER_TURBOSHAFT_DUPLICATION_OPTIMIZATION_REDUCER_H_ #include "src/compiler/turboshaft/assembler.h" #include "src/compiler/turboshaft/graph.h" #include "src/compiler/turboshaft/index.h" #include "src/compiler/turboshaft/operations.h" #include "src/compiler/turboshaft/value-numbering-reducer.h" namespace v8::internal::compiler::turboshaft { // DuplicationOptimizationReducer introduces duplication where this can be // beneficial for generated code. It should run late in the pipeline so that the // duplication isn't optimized away by some other phases (such as GVN). // // In particular, it introduces duplication in 2 places: // // 1. Branch condition duplication: it tries to ensure that the condition nodes // of branches are used only once (under some conditions). When it finds a // branch node whose condition has multiples uses, this condition is duplicated. // // Doing this enables the InstructionSelector to generate more efficient code // for branches. For instance, consider this code: // // c = a + b; // if (c == 0) { /* some code */ } // if (c == 0) { /* more code */ } // // Then the generated code will be something like (using registers "ra" for "a" // and "rb" for "b", and "rt" a temporary register): // // add ra, rb ; a + b // cmp ra, 0 ; a + b == 0 // sete rt ; rt = (a + b == 0) // cmp rt, 0 ; rt == 0 // jz // ... // cmp rt, 0 ; rt == 0 // jz // // As you can see, TurboFan materialized the == bit into a temporary register. // However, since the "add" instruction sets the ZF flag (on x64), it can be // used to determine wether the jump should be taken or not. The code we'd like // to generate instead if thus: // // add ra, rb // jnz // ... // add ra, rb // jnz // // However, this requires to generate twice the instruction "add ra, rb". Due to // how virtual registers are assigned in TurboFan (there is a map from node ID // to virtual registers), both "add" instructions will use the same virtual // register as output, which will break SSA. // // In order to overcome this issue, BranchConditionDuplicator duplicates branch // conditions that are used more than once, so that they can be generated right // before each branch without worrying about breaking SSA. // // 2. Load/Store flexible second operand duplication: on Arm64, it tries to // duplicate the "index" input of Loads/Stores when it's a shift by a constant. // This allows the Instruction Selector to compute said shift using a flexible // second operand, which in most cases on recent Arm64 CPUs should be for free. #include "src/compiler/turboshaft/define-assembler-macros.inc" template <class Next> class DuplicationOptimizationReducer : public Next { … }; #include "src/compiler/turboshaft/undef-assembler-macros.inc" } // namespace v8::internal::compiler::turboshaft #endif // V8_COMPILER_TURBOSHAFT_DUPLICATION_OPTIMIZATION_REDUCER_H_