chromium/v8/src/compiler/turboshaft/late-load-elimination-reducer.h

// Copyright 2023 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef V8_COMPILER_TURBOSHAFT_LATE_LOAD_ELIMINATION_REDUCER_H_
#define V8_COMPILER_TURBOSHAFT_LATE_LOAD_ELIMINATION_REDUCER_H_

#include <optional>

#include "src/base/doubly-threaded-list.h"
#include "src/compiler/turboshaft/analyzer-iterator.h"
#include "src/compiler/turboshaft/assembler.h"
#include "src/compiler/turboshaft/graph.h"
#include "src/compiler/turboshaft/index.h"
#include "src/compiler/turboshaft/loop-finder.h"
#include "src/compiler/turboshaft/operations.h"
#include "src/compiler/turboshaft/opmasks.h"
#include "src/compiler/turboshaft/phase.h"
#include "src/compiler/turboshaft/representations.h"
#include "src/compiler/turboshaft/sidetable.h"
#include "src/compiler/turboshaft/snapshot-table-opindex.h"
#include "src/compiler/turboshaft/utils.h"
#include "src/zone/zone-containers.h"
#include "src/zone/zone.h"

namespace v8::internal::compiler::turboshaft {

#include "src/compiler/turboshaft/define-assembler-macros.inc"

// Design doc:
// https://docs.google.com/document/d/1AEl4dATNLu8GlLyUBQFXJoCxoAT5BeG7RCWxoEtIBJE/edit?usp=sharing

// Load Elimination removes redundant loads. Loads can be redundant because:
//
//   - they follow a store to the same address. For instance:
//
//       x.a = 42;
//       y = x.a;
//
//   - or, they follow the same load. For instance:
//
//       y = x.a;
//       z = x.a;
//
// The "annoying" part of load elimination is that object can alias, and stores
// to dynamically computed indices tend to invalidate the whole state. For
// instance, if we don't know anything about aliasing regarding `a` and `b`,
// then, in this situation:
//
//     x.a = 42
//     y.a = 25
//     z = x.a
//
// We can't load-eliminate `z = x.a`, since `y` could alias with `x`, and `y.a =
// 25` could have overwritten `x.a`. Similarly, if we have something like:
//
//     x[0] = 42
//     y[i] = 25
//     z = x[0]
//
// We can't load-eliminate `z = x[0]`, since `y` could alias with `x`, and
// `y[i]` thus have overwritten `x[0]`.
//
//
// Implementation:
//
//   - In a `MemoryContentTable` (a SnapshotTable), we keep track of known
//     memory values.
//     * When we visit a Store:
//       + if it's to a constant offset, we invalidate all of the known values
//         at the same offset (for all bases).
//       + if it's to a dynamic index, we invalidate everything (because things
//         could alias).
//       We then update the known value at the address of the store.
//     * When we visit a Call, we invalidate everything (since the function
//       called could change any memory through aliases).
//     * When we visit a Load:
//       + if there is a known value at the address, we replace the Load by this
//         value.
//       + otherwise, the result of the Load becomes the known value at the load
//         address.
//
//   - We keep track (using a SparseOpIndexSnapshotTable) of some objects that
//     are known to not alias with anything: freshly allocated objects, until
//     they are passed to a function call, stored in an object, or flow in a
//     Phi. When storing in a fresh object, we only need to invalidate things in
//     the same object, leaving the rest of the state untouched. When storing in
//     a non-fresh object, we don't invalidate the state for fresh objects.
//
//   - We keep track (using a SparseOpIndexSnapshotTable) of the maps of some
//     objects (which we get from AssumeMap operations, which are inserted when
//     lowering CheckMaps). We use them to know if some objects can alias or
//     not: 2 objects with different maps cannot alias.
//
//   - When a loop contains a Store or a Call, it could invalidate previously
//     eliminated loads in the beginning of the loop. Thus, once we reach the
//     end of a loop, we recompute the header's snapshot using {header,
//     backedge} as predecessors, and if anything is invalidated by the
//     backedge, we revisit the loop.
//
// How we "keep track" of objects:
//
// We need the following operation:
//     1. Load the value for a {base, index, offset}.
//     2. Store that {base, index, offset} = value
//     3. Invalidate everything at a given offset + everything at an index (for
//        when storing to a base that could alias with other things).
//     4. Invalidate everything in a base (for when said base is passed to a
//        function, or when there is an indexed store in this base).
//     5. Invalidate everything (for an indexed store into an arbitrary base)
//
// To have 1. in constant time, we maintain a global hashmap (`all_keys`) from
// MemoryAddress (= {base, index, offset, element_size_log2, size}) to Keys, and
// from these Keys, we have constant-time lookup in the SnapshotTable.
// To have 3. efficiently, we maintain a Map from offsets to lists of every
// MemoryAddress at this offset (`offset_keys_`).
// To have 4. efficiently, we have a similar map from bases to lists of every
// MemoryAddress at this base (`base_keys_`).
// For 5., we can use either `offset_keys_` or `base_keys_`. In practice, we use
// the latter because it allows us to efficiently skip bases that are known to
// have no aliases.

// MapMask and related functions are an attempt to avoid having to store sets of
// maps for each AssumeMap that we encounter by compressing all of the maps into
// a single uint64_t.
//
// For each object, we keep in a MapMaskAndOr the "minimum" and "maximum" of
// all of its potential maps, where
//   - "or_" is computed using the union (logical or) of all of its potential
//     maps.
//   - "and_" is computed using the intersection (logical and) of all of its
//     potential maps.
//
// Then, given two objects A and B, if A.and_ has a bit set that isn't set in
// B.or_, it means that all of the maps of A have a bit that none of the maps of
// B have, ie, A and B are guaranteed to not have a map in common.
MapMask;
struct MapMaskAndOr {};
inline bool is_empty(MapMaskAndOr minmax) {}
inline MapMask ComputeMapHash(MapRef map) {}
inline MapMaskAndOr ComputeMinMaxHash(ZoneRefSet<Map> maps) {}
inline MapMaskAndOr CombineMinMax(MapMaskAndOr a, MapMaskAndOr b) {}
// Returns true if {a} and {b} could have a map in common.
inline bool CouldHaveSameMap(MapMaskAndOr a, MapMaskAndOr b) {}

struct MemoryAddress {};

inline size_t hash_value(MemoryAddress const& mem) {}

struct KeyData {};

struct OffsetListTraits {};

struct BaseListTraits {};

struct BaseData {};

class LoadEliminationReplacement {};

V8_EXPORT_PRIVATE bool IsInt32TruncatedLoadPattern(
    const Graph& graph, OpIndex change_idx, const ChangeOp& change,
    OpIndex* bitcast_idx = nullptr, OpIndex* load_idx = nullptr);

class MemoryContentTable
    : public ChangeTrackingSnapshotTable<MemoryContentTable, OpIndex, KeyData> {};

class V8_EXPORT_PRIVATE LateLoadEliminationAnalyzer {};

template <class Next>
class V8_EXPORT_PRIVATE LateLoadEliminationReducer : public Next {};

#include "src/compiler/turboshaft/undef-assembler-macros.inc"

}  // namespace v8::internal::compiler::turboshaft

#endif  // V8_COMPILER_TURBOSHAFT_LATE_LOAD_ELIMINATION_REDUCER_H_