//===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines the TypeBasedAliasAnalysis pass, which implements // metadata-based TBAA. // // In LLVM IR, memory does not have types, so LLVM's own type system is not // suitable for doing TBAA. Instead, metadata is added to the IR to describe // a type system of a higher level language. This can be used to implement // typical C/C++ TBAA, but it can also be used to implement custom alias // analysis behavior for other languages. // // We now support two types of metadata format: scalar TBAA and struct-path // aware TBAA. After all testing cases are upgraded to use struct-path aware // TBAA and we can auto-upgrade existing bc files, the support for scalar TBAA // can be dropped. // // The scalar TBAA metadata format is very simple. TBAA MDNodes have up to // three fields, e.g.: // !0 = !{ !"an example type tree" } // !1 = !{ !"int", !0 } // !2 = !{ !"float", !0 } // !3 = !{ !"const float", !2, i64 1 } // // The first field is an identity field. It can be any value, usually // an MDString, which uniquely identifies the type. The most important // name in the tree is the name of the root node. Two trees with // different root node names are entirely disjoint, even if they // have leaves with common names. // // The second field identifies the type's parent node in the tree, or // is null or omitted for a root node. A type is considered to alias // all of its descendants and all of its ancestors in the tree. Also, // a type is considered to alias all types in other trees, so that // bitcode produced from multiple front-ends is handled conservatively. // // If the third field is present, it's an integer which if equal to 1 // indicates that the type is "constant" (meaning pointsToConstantMemory // should return true; see // http://llvm.org/docs/AliasAnalysis.html#OtherItfs). // // With struct-path aware TBAA, the MDNodes attached to an instruction using // "!tbaa" are called path tag nodes. // // The path tag node has 4 fields with the last field being optional. // // The first field is the base type node, it can be a struct type node // or a scalar type node. The second field is the access type node, it // must be a scalar type node. The third field is the offset into the base type. // The last field has the same meaning as the last field of our scalar TBAA: // it's an integer which if equal to 1 indicates that the access is "constant". // // The struct type node has a name and a list of pairs, one pair for each member // of the struct. The first element of each pair is a type node (a struct type // node or a scalar type node), specifying the type of the member, the second // element of each pair is the offset of the member. // // Given an example // typedef struct { // short s; // } A; // typedef struct { // uint16_t s; // A a; // } B; // // For an access to B.a.s, we attach !5 (a path tag node) to the load/store // instruction. The base type is !4 (struct B), the access type is !2 (scalar // type short) and the offset is 4. // // !0 = !{!"Simple C/C++ TBAA"} // !1 = !{!"omnipotent char", !0} // Scalar type node // !2 = !{!"short", !1} // Scalar type node // !3 = !{!"A", !2, i64 0} // Struct type node // !4 = !{!"B", !2, i64 0, !3, i64 4} // // Struct type node // !5 = !{!4, !2, i64 4} // Path tag node // // The struct type nodes and the scalar type nodes form a type DAG. // Root (!0) // char (!1) -- edge to Root // short (!2) -- edge to char // A (!3) -- edge with offset 0 to short // B (!4) -- edge with offset 0 to short and edge with offset 4 to A // // To check if two tags (tagX and tagY) can alias, we start from the base type // of tagX, follow the edge with the correct offset in the type DAG and adjust // the offset until we reach the base type of tagY or until we reach the Root // node. // If we reach the base type of tagY, compare the adjusted offset with // offset of tagY, return Alias if the offsets are the same, return NoAlias // otherwise. // If we reach the Root node, perform the above starting from base type of tagY // to see if we reach base type of tagX. // // If they have different roots, they're part of different potentially // unrelated type systems, so we return Alias to be conservative. // If neither node is an ancestor of the other and they have the same root, // then we say NoAlias. // //===----------------------------------------------------------------------===// #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include <cassert> #include <cstdint> usingnamespacellvm; // A handy option for disabling TBAA functionality. The same effect can also be // achieved by stripping the !tbaa tags from IR, but this option is sometimes // more convenient. static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true), cl::Hidden); namespace { /// isNewFormatTypeNode - Return true iff the given type node is in the new /// size-aware format. static bool isNewFormatTypeNode(const MDNode *N) { … } /// This is a simple wrapper around an MDNode which provides a higher-level /// interface by hiding the details of how alias analysis information is encoded /// in its operands. template<typename MDNodeTy> class TBAANodeImpl { … }; /// \name Specializations of \c TBAANodeImpl for const and non const qualified /// \c MDNode. /// @{ TBAANode; MutableTBAANode; /// @} /// This is a simple wrapper around an MDNode which provides a /// higher-level interface by hiding the details of how alias analysis /// information is encoded in its operands. template<typename MDNodeTy> class TBAAStructTagNodeImpl { … }; /// \name Specializations of \c TBAAStructTagNodeImpl for const and non const /// qualified \c MDNods. /// @{ TBAAStructTagNode; MutableTBAAStructTagNode; /// @} /// This is a simple wrapper around an MDNode which provides a /// higher-level interface by hiding the details of how alias analysis /// information is encoded in its operands. class TBAAStructTypeNode { … }; } // end anonymous namespace /// Check the first operand of the tbaa tag node, if it is a MDNode, we treat /// it as struct-path aware TBAA format, otherwise, we treat it as scalar TBAA /// format. static bool isStructPathTBAA(const MDNode *MD) { … } AliasResult TypeBasedAAResult::alias(const MemoryLocation &LocA, const MemoryLocation &LocB, AAQueryInfo &AAQI, const Instruction *) { … } ModRefInfo TypeBasedAAResult::getModRefInfoMask(const MemoryLocation &Loc, AAQueryInfo &AAQI, bool IgnoreLocals) { … } MemoryEffects TypeBasedAAResult::getMemoryEffects(const CallBase *Call, AAQueryInfo &AAQI) { … } MemoryEffects TypeBasedAAResult::getMemoryEffects(const Function *F) { … } ModRefInfo TypeBasedAAResult::getModRefInfo(const CallBase *Call, const MemoryLocation &Loc, AAQueryInfo &AAQI) { … } ModRefInfo TypeBasedAAResult::getModRefInfo(const CallBase *Call1, const CallBase *Call2, AAQueryInfo &AAQI) { … } bool MDNode::isTBAAVtableAccess() const { … } static bool matchAccessTags(const MDNode *A, const MDNode *B, const MDNode **GenericTag = nullptr); MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { … } static const MDNode *getLeastCommonType(const MDNode *A, const MDNode *B) { … } AAMDNodes AAMDNodes::merge(const AAMDNodes &Other) const { … } AAMDNodes AAMDNodes::concat(const AAMDNodes &Other) const { … } static const MDNode *createAccessTag(const MDNode *AccessType) { … } static bool hasField(TBAAStructTypeNode BaseType, TBAAStructTypeNode FieldType) { … } /// Return true if for two given accesses, one of the accessed objects may be a /// subobject of the other. The \p BaseTag and \p SubobjectTag parameters /// describe the accesses to the base object and the subobject respectively. /// \p CommonType must be the metadata node describing the common type of the /// accessed objects. On return, \p MayAlias is set to true iff these accesses /// may alias and \p Generic, if not null, points to the most generic access /// tag for the given two. static bool mayBeAccessToSubobjectOf(TBAAStructTagNode BaseTag, TBAAStructTagNode SubobjectTag, const MDNode *CommonType, const MDNode **GenericTag, bool &MayAlias) { … } /// matchTags - Return true if the given couple of accesses are allowed to /// overlap. If \arg GenericTag is not null, then on return it points to the /// most generic access descriptor for the given two. static bool matchAccessTags(const MDNode *A, const MDNode *B, const MDNode **GenericTag) { … } /// Aliases - Test whether the access represented by tag A may alias the /// access represented by tag B. bool TypeBasedAAResult::Aliases(const MDNode *A, const MDNode *B) const { … } AnalysisKey TypeBasedAA::Key; TypeBasedAAResult TypeBasedAA::run(Function &F, FunctionAnalysisManager &AM) { … } char TypeBasedAAWrapperPass::ID = …; INITIALIZE_PASS(…) ImmutablePass *llvm::createTypeBasedAAWrapperPass() { … } TypeBasedAAWrapperPass::TypeBasedAAWrapperPass() : … { … } bool TypeBasedAAWrapperPass::doInitialization(Module &M) { … } bool TypeBasedAAWrapperPass::doFinalization(Module &M) { … } void TypeBasedAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { … } MDNode *AAMDNodes::shiftTBAA(MDNode *MD, size_t Offset) { … } MDNode *AAMDNodes::shiftTBAAStruct(MDNode *MD, size_t Offset) { … } MDNode *AAMDNodes::extendToTBAA(MDNode *MD, ssize_t Len) { … } AAMDNodes AAMDNodes::adjustForAccess(unsigned AccessSize) { … } AAMDNodes AAMDNodes::adjustForAccess(size_t Offset, Type *AccessTy, const DataLayout &DL) { … } AAMDNodes AAMDNodes::adjustForAccess(size_t Offset, unsigned AccessSize) { … }