//===- IRSimilarityIdentifierTest.cpp - IRSimilarityIdentifier unit tests -===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Tests for components for finding similarity such as the instruction mapper, // suffix tree usage, and structural analysis. // //===----------------------------------------------------------------------===// #include "llvm/Analysis/IRSimilarityIdentifier.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/AsmParser/Parser.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/SourceMgr.h" #include "gtest/gtest.h" usingnamespacellvm; usingnamespaceIRSimilarity; extern llvm::cl::opt<bool> UseNewDbgInfoFormat; extern cl::opt<cl::boolOrDefault> PreserveInputDbgFormat; extern bool WriteNewDbgInfoFormatToBitcode; extern cl::opt<bool> WriteNewDbgInfoFormat; static std::unique_ptr<Module> makeLLVMModule(LLVMContext &Context, StringRef ModuleStr) { … } void getVectors(Module &M, IRInstructionMapper &Mapper, std::vector<IRInstructionData *> &InstrList, std::vector<unsigned> &UnsignedVec) { … } void getSimilarities( Module &M, std::vector<std::vector<IRSimilarityCandidate>> &SimilarityCandidates) { … } // Checks that different opcodes are mapped to different values TEST(IRInstructionMapper, OpcodeDifferentiation) { … } // Checks that the same opcodes and types are mapped to the same values. TEST(IRInstructionMapper, OpcodeTypeSimilarity) { … } // Checks that the same opcode and different types are mapped to different // values. TEST(IRInstructionMapper, TypeDifferentiation) { … } // Checks that different predicates map to different values. TEST(IRInstructionMapper, PredicateDifferentiation) { … } // Checks that predicates where that can be considered the same when the // operands are swapped, i.e. greater than to less than are mapped to the same // unsigned integer. TEST(IRInstructionMapper, PredicateIsomorphism) { … } // Checks that the same predicate maps to the same value. TEST(IRInstructionMapper, PredicateSimilarity) { … } // Checks that the same predicate maps to the same value for floating point // CmpInsts. TEST(IRInstructionMapper, FPPredicateSimilarity) { … } // Checks that the different predicate maps to a different value for floating // point CmpInsts. TEST(IRInstructionMapper, FPPredicatDifference) { … } // Checks that the zexts that have the same type parameters map to the same // unsigned integer. TEST(IRInstructionMapper, ZextTypeSimilarity) { … } // Checks that the sexts that have the same type parameters map to the same // unsigned integer. TEST(IRInstructionMapper, SextTypeSimilarity) { … } // Checks that the zexts that have the different type parameters map to the // different unsigned integers. TEST(IRInstructionMapper, ZextTypeDifference) { … } // Checks that the sexts that have the different type parameters map to the // different unsigned integers. TEST(IRInstructionMapper, SextTypeDifference) { … } // Checks that loads that have the same type are mapped to the same unsigned // integer. TEST(IRInstructionMapper, LoadSimilarType) { … } // Checks that loads that have the different types are mapped to // different unsigned integers. TEST(IRInstructionMapper, LoadDifferentType) { … } // Checks that loads that have the different aligns are mapped to different // unsigned integers. TEST(IRInstructionMapper, LoadDifferentAlign) { … } // Checks that loads that have the different volatile settings are mapped to // different unsigned integers. TEST(IRInstructionMapper, LoadDifferentVolatile) { … } // Checks that loads that have the same volatile settings are mapped to // different unsigned integers. TEST(IRInstructionMapper, LoadSameVolatile) { … } // Checks that loads that have the different atomicity settings are mapped to // different unsigned integers. TEST(IRInstructionMapper, LoadDifferentAtomic) { … } // Checks that loads that have the same atomicity settings are mapped to // different unsigned integers. TEST(IRInstructionMapper, LoadSameAtomic) { … } // Checks that stores that have the same type are mapped to the same unsigned // integer. TEST(IRInstructionMapper, StoreSimilarType) { … } // Checks that stores that have the different types are mapped to // different unsigned integers. TEST(IRInstructionMapper, StoreDifferentType) { … } // Checks that stores that have the different aligns are mapped to different // unsigned integers. TEST(IRInstructionMapper, StoreDifferentAlign) { … } // Checks that stores that have the different volatile settings are mapped to // different unsigned integers. TEST(IRInstructionMapper, StoreDifferentVolatile) { … } // Checks that stores that have the same volatile settings are mapped to // different unsigned integers. TEST(IRInstructionMapper, StoreSameVolatile) { … } // Checks that loads that have the same atomicity settings are mapped to // different unsigned integers. TEST(IRInstructionMapper, StoreSameAtomic) { … } // Checks that loads that have the different atomicity settings are mapped to // different unsigned integers. TEST(IRInstructionMapper, StoreDifferentAtomic) { … } // Checks that the branch is mapped to legal when the option is set. TEST(IRInstructionMapper, BranchLegal) { … } // Checks that a PHINode is mapped to be legal. TEST(IRInstructionMapper, PhiLegal) { … } // Checks that a PHINode is mapped to be legal. TEST(IRInstructionMapper, PhiIllegal) { … } // In most cases, the illegal instructions we are collecting don't require any // sort of setup. In these cases, we can just only have illegal instructions, // and the mapper will create 0 length vectors, and we can check that. // In cases where we have legal instructions needed to set up the illegal // instruction, to check illegal instructions are assigned unsigned integers // from the maximum value decreasing to 0, it will be greater than a legal // instruction that comes after. So to check that we have an illegal // instruction, we place a legal instruction after an illegal instruction, and // check that the illegal unsigned integer is greater than the unsigned integer // of the legal instruction. // Checks that an alloca instruction is mapped to be illegal. TEST(IRInstructionMapper, AllocaIllegal) { … } // Checks that an getelementptr instruction is mapped to be legal. And that // the operands in getelementpointer instructions are the exact same after the // first element operand, which only requires the same type. TEST(IRInstructionMapper, GetElementPtrSameEndOperands) { … } // Check that when the operands in getelementpointer instructions are not the // exact same after the first element operand, the instructions are mapped to // different values. TEST(IRInstructionMapper, GetElementPtrDifferentEndOperands) { … } // Check that when the operands in getelementpointer instructions are not the // same initial base type, each instruction is mapped to a different value. TEST(IRInstructionMapper, GetElementPtrDifferentBaseType) { … } // Check that when the operands in getelementpointer instructions do not have // the same inbounds modifier, they are not counted as the same. TEST(IRInstructionMapper, GetElementPtrDifferentInBounds) { … } // Checks that indirect call instructions are mapped to be illegal when it is // specified to disallow them. TEST(IRInstructionMapper, CallsIllegalIndirect) { … } // Checks that indirect call instructions are mapped to be legal when it is not // specified to disallow them. TEST(IRInstructionMapper, CallsLegalIndirect) { … } // Checks that a call instruction is mapped to be legal. Here we check that // a call with the same name, and same types are mapped to the same // value. TEST(IRInstructionMapper, CallsSameTypeSameName) { … } // Here we check that a calls with different names, but the same arguments types // are mapped to different value when specified that the name must match. TEST(IRInstructionMapper, CallsSameArgTypeDifferentNameDisallowed) { … } // Here we check that a calls with different names, but the same arguments types // are mapped to the same value when it is not specifed that they must match. TEST(IRInstructionMapper, CallsSameArgTypeDifferentName) { … } // Here we check that a calls with different names, and different arguments // types are mapped to different value. TEST(IRInstructionMapper, CallsDifferentArgTypeDifferentName) { … } // Here we check that calls with different names, and different return // types are mapped to different value. TEST(IRInstructionMapper, CallsDifferentReturnTypeDifferentName) { … } // Here we check that calls with the same name, types, and parameters map to the // same unsigned integer. TEST(IRInstructionMapper, CallsSameParameters) { … } // Here we check that calls with different tail call settings are mapped to // different values. TEST(IRInstructionMapper, CallsDifferentTails) { … } // Here we check that calls with different calling convention settings are // mapped to different values. TEST(IRInstructionMapper, CallsDifferentCallingConventions) { … } // Checks that an invoke instruction is mapped to be illegal. Invoke // instructions are considered to be illegal because of the change in the // control flow that is currently not recognized. TEST(IRInstructionMapper, InvokeIllegal) { … } // Checks that an callbr instructions are considered to be illegal. Callbr // instructions are considered to be illegal because of the change in the // control flow that is currently not recognized. TEST(IRInstructionMapper, CallBrInstIllegal) { … } // Checks that an debuginfo records are mapped to be invisible. Since they // do not semantically change the program, they can be recognized as similar. TEST(IRInstructionMapper, DebugInfoInvisible) { … } // The following are all exception handling intrinsics. We do not currently // handle these instruction because they are very context dependent. // Checks that an eh.typeid.for intrinsic is mapped to be illegal. TEST(IRInstructionMapper, ExceptionHandlingTypeIdIllegal) { … } // Checks that an eh.exceptioncode intrinsic is mapped to be illegal. TEST(IRInstructionMapper, ExceptionHandlingExceptionCodeIllegal) { … } // Checks that an eh.unwind intrinsic is mapped to be illegal. TEST(IRInstructionMapper, ExceptionHandlingUnwindIllegal) { … } // Checks that an eh.exceptionpointer intrinsic is mapped to be illegal. TEST(IRInstructionMapper, ExceptionHandlingExceptionPointerIllegal) { … } // Checks that a catchpad instruction is mapped to an illegal value. TEST(IRInstructionMapper, CatchpadIllegal) { … } // Checks that a cleanuppad instruction is mapped to an illegal value. TEST(IRInstructionMapper, CleanuppadIllegal) { … } // The following three instructions are memory transfer and setting based, which // are considered illegal since is extra checking needed to handle the address // space checking. // Checks that a memset instruction is mapped to an illegal value when // specified. TEST(IRInstructionMapper, MemSetIllegal) { … } // Checks that a memcpy instruction is mapped to an illegal value when // specified. TEST(IRInstructionMapper, MemCpyIllegal) { … } // Checks that a memmove instruction is mapped to an illegal value when // specified. TEST(IRInstructionMapper, MemMoveIllegal) { … } // Checks that mem* instructions are mapped to an legal value when not // specified, and that all the intrinsics are marked differently. TEST(IRInstructionMapper, MemOpsLegal) { … } // Checks that a variable argument instructions are mapped to an illegal value. // We exclude variable argument instructions since variable arguments // requires extra checking of the argument list. TEST(IRInstructionMapper, VarArgsIllegal) { … } // Check the length of adding two illegal instructions one after th other. We // should find that only one element is added for each illegal range. TEST(IRInstructionMapper, RepeatedIllegalLength) { … } // A helper function that accepts an instruction list from a module made up of // two blocks of two legal instructions and terminator, and checks them for // instruction similarity. static bool longSimCandCompare(std::vector<IRInstructionData *> &InstrList, bool Structure = false, unsigned Length = 2, unsigned StartIdxOne = 0, unsigned StartIdxTwo = 3) { … } // Checks that two adds with commuted operands are considered to be the same // instructions. TEST(IRSimilarityCandidate, CheckIdenticalInstructions) { … } // Checks that comparison instructions are found to be similar instructions // when the operands are flipped and the predicate is also swapped. TEST(IRSimilarityCandidate, PredicateIsomorphism) { … } // Checks that IRSimilarityCandidates wrapping these two regions of instructions // are able to differentiate between instructions that have different opcodes. TEST(IRSimilarityCandidate, CheckRegionsDifferentInstruction) { … } // Checks that IRSimilarityCandidates wrapping these two regions of instructions // are able to differentiate between instructions that have different types. TEST(IRSimilarityCandidate, CheckRegionsDifferentTypes) { … } // Check that debug records do not impact similarity. They are marked as // invisible. TEST(IRSimilarityCandidate, IdenticalWithDebug) { … } // Checks that IRSimilarityCandidates that include illegal instructions, are not // considered to be the same set of instructions. In these sets of instructions // the allocas are illegal. TEST(IRSimilarityCandidate, IllegalInCandidate) { … } // Checks that different structure, in this case, where we introduce a new // needed input in one region, is recognized as different. TEST(IRSimilarityCandidate, DifferentStructure) { … } // Checks that comparison instructions are found to have the same structure // when the operands are flipped and the predicate is also swapped. TEST(IRSimilarityCandidate, PredicateIsomorphismStructure) { … } // Checks that different predicates are counted as diferent. TEST(IRSimilarityCandidate, PredicateDifference) { … } // Checks that the same structure is recognized between two candidates. The // items %a and %b are used in the same way in both sets of instructions. TEST(IRSimilarityCandidate, SameStructure) { … } // Checks that the canonical numbering between two candidates matches the found // mapping between two candidates. TEST(IRSimilarityCandidate, CanonicalNumbering) { … } // Checks that the same structure is recognized between two candidates. While // the input names are reversed, they still perform the same overall operation. TEST(IRSimilarityCandidate, DifferentNameSameStructure) { … } // Checks that the same structure is recognized between two candidates when // the branches target other blocks inside the same region, the relative // distance between the blocks must be the same. TEST(IRSimilarityCandidate, SameBranchStructureInternal) { … } // Checks that the different structure is recognized between two candidates, // when the branches target other blocks inside the same region, the relative // distance between the blocks must be the same. TEST(IRSimilarityCandidate, DifferentBranchStructureInternal) { … } // Checks that the same structure is recognized between two candidates, when // the branches target other blocks outside region, the relative distance // does not need to be the same. TEST(IRSimilarityCandidate, SameBranchStructureOutside) { … } // Checks that the same structure is recognized between two candidates, when // the branches target other blocks outside region, the relative distance // does not need to be the same. TEST(IRSimilarityCandidate, DifferentBranchStructureOutside) { … } // Checks that the same structure is recognized between two candidates, // when the phi predecessor are other blocks inside the same region, // the relative distance between the blocks must be the same. TEST(IRSimilarityCandidate, SamePHIStructureInternal) { … } // Checks that the different structure is recognized between two candidates, // when the phi predecessor are other blocks inside the same region, // the relative distance between the blocks must be the same. TEST(IRSimilarityCandidate, DifferentPHIStructureInternal) { … } // Checks that two sets of identical instructions are found to be the same. // Both sequences of adds have the same operand ordering, and the same // instructions, making them strcturally equivalent. TEST(IRSimilarityIdentifier, IdentitySimilarity) { … } // Checks that incorrect sequences are not found as similar. In this case, // we have different sequences of instructions. TEST(IRSimilarityIdentifier, InstructionDifference) { … } // This test checks to see whether we can detect similarity for commutative // instructions where the operands have been reversed. TEST(IRSimilarityIdentifier, CommutativeSimilarity) { … } // This test ensures that when the first instruction in a sequence is // a commutative instruction with the same value (mcomm_inst_same_val), but the // corresponding instruction (comm_inst_diff_val) is not, we mark the regions // and not similar. TEST(IRSimilarityIdentifier, CommutativeSameValueFirstMisMatch) { … } // This test makes sure that intrinsic functions that are marked commutative // are still treated as non-commutative since they are function calls. TEST(IRSimilarityIdentifier, IntrinsicCommutative) { … } // This test checks to see whether we can detect different structure in // commutative instructions. In this case, the second operand in the second // add is different. TEST(IRSimilarityIdentifier, NoCommutativeSimilarity) { … } // Check that we are not finding similarity in non commutative // instructions. That is, while the instruction and operands used are the same // in the two subtraction sequences, they are in a different order, and cannot // be counted as the same since a subtraction is not commutative. TEST(IRSimilarityIdentifier, NonCommutativeDifference) { … } // Check that we find similarity despite changing the register names. TEST(IRSimilarityIdentifier, MappingSimilarity) { … } // Check that we find instances of swapped predicate isomorphism. That is, // for predicates that can be flipped, e.g. greater than to less than, // we can identify that instances of these different literal predicates, but are // the same within a single swap can be found. TEST(IRSimilarityIdentifier, PredicateIsomorphism) { … } // Checks that constants are detected as the same operand in each use in the // sequences of instructions. Also checks that we can find structural // equivalence using constants. In this case the 1 has the same use pattern as // %a. TEST(IRSimilarityIdentifier, ConstantMappingSimilarity) { … } // Check that constants are uniquely identified. i.e. two different constants // are not considered the same. This means that this should not find any // structural similarity. TEST(IRSimilarityIdentifier, ConstantMappingDifference) { … }