// Copyright 2023 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef V8_COMPILER_TURBOSHAFT_LATE_LOAD_ELIMINATION_REDUCER_H_ #define V8_COMPILER_TURBOSHAFT_LATE_LOAD_ELIMINATION_REDUCER_H_ #include <optional> #include "src/base/doubly-threaded-list.h" #include "src/compiler/turboshaft/analyzer-iterator.h" #include "src/compiler/turboshaft/assembler.h" #include "src/compiler/turboshaft/graph.h" #include "src/compiler/turboshaft/index.h" #include "src/compiler/turboshaft/loop-finder.h" #include "src/compiler/turboshaft/operations.h" #include "src/compiler/turboshaft/opmasks.h" #include "src/compiler/turboshaft/phase.h" #include "src/compiler/turboshaft/representations.h" #include "src/compiler/turboshaft/sidetable.h" #include "src/compiler/turboshaft/snapshot-table-opindex.h" #include "src/compiler/turboshaft/utils.h" #include "src/zone/zone-containers.h" #include "src/zone/zone.h" namespace v8::internal::compiler::turboshaft { #include "src/compiler/turboshaft/define-assembler-macros.inc" // Design doc: // https://docs.google.com/document/d/1AEl4dATNLu8GlLyUBQFXJoCxoAT5BeG7RCWxoEtIBJE/edit?usp=sharing // Load Elimination removes redundant loads. Loads can be redundant because: // // - they follow a store to the same address. For instance: // // x.a = 42; // y = x.a; // // - or, they follow the same load. For instance: // // y = x.a; // z = x.a; // // The "annoying" part of load elimination is that object can alias, and stores // to dynamically computed indices tend to invalidate the whole state. For // instance, if we don't know anything about aliasing regarding `a` and `b`, // then, in this situation: // // x.a = 42 // y.a = 25 // z = x.a // // We can't load-eliminate `z = x.a`, since `y` could alias with `x`, and `y.a = // 25` could have overwritten `x.a`. Similarly, if we have something like: // // x[0] = 42 // y[i] = 25 // z = x[0] // // We can't load-eliminate `z = x[0]`, since `y` could alias with `x`, and // `y[i]` thus have overwritten `x[0]`. // // // Implementation: // // - In a `MemoryContentTable` (a SnapshotTable), we keep track of known // memory values. // * When we visit a Store: // + if it's to a constant offset, we invalidate all of the known values // at the same offset (for all bases). // + if it's to a dynamic index, we invalidate everything (because things // could alias). // We then update the known value at the address of the store. // * When we visit a Call, we invalidate everything (since the function // called could change any memory through aliases). // * When we visit a Load: // + if there is a known value at the address, we replace the Load by this // value. // + otherwise, the result of the Load becomes the known value at the load // address. // // - We keep track (using a SparseOpIndexSnapshotTable) of some objects that // are known to not alias with anything: freshly allocated objects, until // they are passed to a function call, stored in an object, or flow in a // Phi. When storing in a fresh object, we only need to invalidate things in // the same object, leaving the rest of the state untouched. When storing in // a non-fresh object, we don't invalidate the state for fresh objects. // // - We keep track (using a SparseOpIndexSnapshotTable) of the maps of some // objects (which we get from AssumeMap operations, which are inserted when // lowering CheckMaps). We use them to know if some objects can alias or // not: 2 objects with different maps cannot alias. // // - When a loop contains a Store or a Call, it could invalidate previously // eliminated loads in the beginning of the loop. Thus, once we reach the // end of a loop, we recompute the header's snapshot using {header, // backedge} as predecessors, and if anything is invalidated by the // backedge, we revisit the loop. // // How we "keep track" of objects: // // We need the following operation: // 1. Load the value for a {base, index, offset}. // 2. Store that {base, index, offset} = value // 3. Invalidate everything at a given offset + everything at an index (for // when storing to a base that could alias with other things). // 4. Invalidate everything in a base (for when said base is passed to a // function, or when there is an indexed store in this base). // 5. Invalidate everything (for an indexed store into an arbitrary base) // // To have 1. in constant time, we maintain a global hashmap (`all_keys`) from // MemoryAddress (= {base, index, offset, element_size_log2, size}) to Keys, and // from these Keys, we have constant-time lookup in the SnapshotTable. // To have 3. efficiently, we maintain a Map from offsets to lists of every // MemoryAddress at this offset (`offset_keys_`). // To have 4. efficiently, we have a similar map from bases to lists of every // MemoryAddress at this base (`base_keys_`). // For 5., we can use either `offset_keys_` or `base_keys_`. In practice, we use // the latter because it allows us to efficiently skip bases that are known to // have no aliases. // MapMask and related functions are an attempt to avoid having to store sets of // maps for each AssumeMap that we encounter by compressing all of the maps into // a single uint64_t. // // For each object, we keep in a MapMaskAndOr the "minimum" and "maximum" of // all of its potential maps, where // - "or_" is computed using the union (logical or) of all of its potential // maps. // - "and_" is computed using the intersection (logical and) of all of its // potential maps. // // Then, given two objects A and B, if A.and_ has a bit set that isn't set in // B.or_, it means that all of the maps of A have a bit that none of the maps of // B have, ie, A and B are guaranteed to not have a map in common. MapMask; struct MapMaskAndOr { … }; inline bool is_empty(MapMaskAndOr minmax) { … } inline MapMask ComputeMapHash(MapRef map) { … } inline MapMaskAndOr ComputeMinMaxHash(ZoneRefSet<Map> maps) { … } inline MapMaskAndOr CombineMinMax(MapMaskAndOr a, MapMaskAndOr b) { … } // Returns true if {a} and {b} could have a map in common. inline bool CouldHaveSameMap(MapMaskAndOr a, MapMaskAndOr b) { … } struct MemoryAddress { … }; inline size_t hash_value(MemoryAddress const& mem) { … } struct KeyData { … }; struct OffsetListTraits { … }; struct BaseListTraits { … }; struct BaseData { … }; class LoadEliminationReplacement { … }; V8_EXPORT_PRIVATE bool IsInt32TruncatedLoadPattern( const Graph& graph, OpIndex change_idx, const ChangeOp& change, OpIndex* bitcast_idx = nullptr, OpIndex* load_idx = nullptr); class MemoryContentTable : public ChangeTrackingSnapshotTable<MemoryContentTable, OpIndex, KeyData> { … }; class V8_EXPORT_PRIVATE LateLoadEliminationAnalyzer { … }; template <class Next> class V8_EXPORT_PRIVATE LateLoadEliminationReducer : public Next { … }; #include "src/compiler/turboshaft/undef-assembler-macros.inc" } // namespace v8::internal::compiler::turboshaft #endif // V8_COMPILER_TURBOSHAFT_LATE_LOAD_ELIMINATION_REDUCER_H_