chromium/v8/src/compiler/turboshaft/duplication-optimization-reducer.h

// Copyright 2022 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef V8_COMPILER_TURBOSHAFT_DUPLICATION_OPTIMIZATION_REDUCER_H_
#define V8_COMPILER_TURBOSHAFT_DUPLICATION_OPTIMIZATION_REDUCER_H_

#include "src/compiler/turboshaft/assembler.h"
#include "src/compiler/turboshaft/graph.h"
#include "src/compiler/turboshaft/index.h"
#include "src/compiler/turboshaft/operations.h"
#include "src/compiler/turboshaft/value-numbering-reducer.h"

namespace v8::internal::compiler::turboshaft {

// DuplicationOptimizationReducer introduces duplication where this can be
// beneficial for generated code. It should run late in the pipeline so that the
// duplication isn't optimized away by some other phases (such as GVN).
//
// In particular, it introduces duplication in 2 places:
//
// 1. Branch condition duplication: it tries to ensure that the condition nodes
// of branches are used only once (under some conditions). When it finds a
// branch node whose condition has multiples uses, this condition is duplicated.
//
// Doing this enables the InstructionSelector to generate more efficient code
// for branches. For instance, consider this code:
//
//     c = a + b;
//     if (c == 0) { /* some code */ }
//     if (c == 0) { /* more code */ }
//
// Then the generated code will be something like (using registers "ra" for "a"
// and "rb" for "b", and "rt" a temporary register):
//
//     add ra, rb  ; a + b
//     cmp ra, 0   ; a + b == 0
//     sete rt     ; rt = (a + b == 0)
//     cmp rt, 0   ; rt == 0
//     jz
//     ...
//     cmp rt, 0   ; rt == 0
//     jz
//
// As you can see, TurboFan materialized the == bit into a temporary register.
// However, since the "add" instruction sets the ZF flag (on x64), it can be
// used to determine wether the jump should be taken or not. The code we'd like
// to generate instead if thus:
//
//     add ra, rb
//     jnz
//     ...
//     add ra, rb
//     jnz
//
// However, this requires to generate twice the instruction "add ra, rb". Due to
// how virtual registers are assigned in TurboFan (there is a map from node ID
// to virtual registers), both "add" instructions will use the same virtual
// register as output, which will break SSA.
//
// In order to overcome this issue, BranchConditionDuplicator duplicates branch
// conditions that are used more than once, so that they can be generated right
// before each branch without worrying about breaking SSA.
//
// 2. Load/Store flexible second operand duplication: on Arm64, it tries to
// duplicate the "index" input of Loads/Stores when it's a shift by a constant.
// This allows the Instruction Selector to compute said shift using a flexible
// second operand, which in most cases on recent Arm64 CPUs should be for free.

#include "src/compiler/turboshaft/define-assembler-macros.inc"

template <class Next>
class DuplicationOptimizationReducer : public Next {};

#include "src/compiler/turboshaft/undef-assembler-macros.inc"

}  // namespace v8::internal::compiler::turboshaft

#endif  // V8_COMPILER_TURBOSHAFT_DUPLICATION_OPTIMIZATION_REDUCER_H_