//===- AArch64ErrataFix.cpp -----------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // This file implements Section Patching for the purpose of working around // the AArch64 Cortex-53 errata 843419 that affects r0p0, r0p1, r0p2 and r0p4 // versions of the core. // // The general principle is that an erratum sequence of one or // more instructions is detected in the instruction stream, one of the // instructions in the sequence is replaced with a branch to a patch sequence // of replacement instructions. At the end of the replacement sequence the // patch branches back to the instruction stream. // This technique is only suitable for fixing an erratum when: // - There is a set of necessary conditions required to trigger the erratum that // can be detected at static link time. // - There is a set of replacement instructions that can be used to remove at // least one of the necessary conditions that trigger the erratum. // - We can overwrite an instruction in the erratum sequence with a branch to // the replacement sequence. // - We can place the replacement sequence within range of the branch. //===----------------------------------------------------------------------===// #include "AArch64ErrataFix.h" #include "InputFiles.h" #include "LinkerScript.h" #include "OutputSections.h" #include "Relocations.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" #include "lld/Common/CommonLinkerContext.h" #include "lld/Common/Strings.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Endian.h" #include <algorithm> usingnamespacellvm; usingnamespacellvm::ELF; usingnamespacellvm::object; usingnamespacellvm::support; usingnamespacellvm::support::endian; usingnamespacelld; usingnamespacelld::elf; // Helper functions to identify instructions and conditions needed to trigger // the Cortex-A53-843419 erratum. // ADRP // | 1 | immlo (2) | 1 | 0 0 0 0 | immhi (19) | Rd (5) | static bool isADRP(uint32_t instr) { … } // Load and store bit patterns from ARMv8-A. // Instructions appear in order of appearance starting from table in // C4.1.3 Loads and Stores. // All loads and stores have 1 (at bit position 27), (0 at bit position 25). // | op0 x op1 (2) | 1 op2 0 op3 (2) | x | op4 (5) | xxxx | op5 (2) | x (10) | static bool isLoadStoreClass(uint32_t instr) { … } // LDN/STN multiple no offset // | 0 Q 00 | 1100 | 0 L 00 | 0000 | opcode (4) | size (2) | Rn (5) | Rt (5) | // LDN/STN multiple post-indexed // | 0 Q 00 | 1100 | 1 L 0 | Rm (5)| opcode (4) | size (2) | Rn (5) | Rt (5) | // L == 0 for stores. // Utility routine to decode opcode field of LDN/STN multiple structure // instructions to find the ST1 instructions. // opcode == 0010 ST1 4 registers. // opcode == 0110 ST1 3 registers. // opcode == 0111 ST1 1 register. // opcode == 1010 ST1 2 registers. static bool isST1MultipleOpcode(uint32_t instr) { … } static bool isST1Multiple(uint32_t instr) { … } // Writes to Rn (writeback). static bool isST1MultiplePost(uint32_t instr) { … } // LDN/STN single no offset // | 0 Q 00 | 1101 | 0 L R 0 | 0000 | opc (3) S | size (2) | Rn (5) | Rt (5)| // LDN/STN single post-indexed // | 0 Q 00 | 1101 | 1 L R | Rm (5) | opc (3) S | size (2) | Rn (5) | Rt (5)| // L == 0 for stores // Utility routine to decode opcode field of LDN/STN single structure // instructions to find the ST1 instructions. // R == 0 for ST1 and ST3, R == 1 for ST2 and ST4. // opcode == 000 ST1 8-bit. // opcode == 010 ST1 16-bit. // opcode == 100 ST1 32 or 64-bit (Size determines which). static bool isST1SingleOpcode(uint32_t instr) { … } static bool isST1Single(uint32_t instr) { … } // Writes to Rn (writeback). static bool isST1SinglePost(uint32_t instr) { … } static bool isST1(uint32_t instr) { … } // Load/store exclusive // | size (2) 00 | 1000 | o2 L o1 | Rs (5) | o0 | Rt2 (5) | Rn (5) | Rt (5) | // L == 0 for Stores. static bool isLoadStoreExclusive(uint32_t instr) { … } static bool isLoadExclusive(uint32_t instr) { … } // Load register literal // | opc (2) 01 | 1 V 00 | imm19 | Rt (5) | static bool isLoadLiteral(uint32_t instr) { … } // Load/store no-allocate pair // (offset) // | opc (2) 10 | 1 V 00 | 0 L | imm7 | Rt2 (5) | Rn (5) | Rt (5) | // L == 0 for stores. // Never writes to register static bool isSTNP(uint32_t instr) { … } // Load/store register pair // (post-indexed) // | opc (2) 10 | 1 V 00 | 1 L | imm7 | Rt2 (5) | Rn (5) | Rt (5) | // L == 0 for stores, V == 0 for Scalar, V == 1 for Simd/FP // Writes to Rn. static bool isSTPPost(uint32_t instr) { … } // (offset) // | opc (2) 10 | 1 V 01 | 0 L | imm7 | Rt2 (5) | Rn (5) | Rt (5) | static bool isSTPOffset(uint32_t instr) { … } // (pre-index) // | opc (2) 10 | 1 V 01 | 1 L | imm7 | Rt2 (5) | Rn (5) | Rt (5) | // Writes to Rn. static bool isSTPPre(uint32_t instr) { … } static bool isSTP(uint32_t instr) { … } // Load/store register (unscaled immediate) // | size (2) 11 | 1 V 00 | opc (2) 0 | imm9 | 00 | Rn (5) | Rt (5) | // V == 0 for Scalar, V == 1 for Simd/FP. static bool isLoadStoreUnscaled(uint32_t instr) { … } // Load/store register (immediate post-indexed) // | size (2) 11 | 1 V 00 | opc (2) 0 | imm9 | 01 | Rn (5) | Rt (5) | static bool isLoadStoreImmediatePost(uint32_t instr) { … } // Load/store register (unprivileged) // | size (2) 11 | 1 V 00 | opc (2) 0 | imm9 | 10 | Rn (5) | Rt (5) | static bool isLoadStoreUnpriv(uint32_t instr) { … } // Load/store register (immediate pre-indexed) // | size (2) 11 | 1 V 00 | opc (2) 0 | imm9 | 11 | Rn (5) | Rt (5) | static bool isLoadStoreImmediatePre(uint32_t instr) { … } // Load/store register (register offset) // | size (2) 11 | 1 V 00 | opc (2) 1 | Rm (5) | option (3) S | 10 | Rn | Rt | static bool isLoadStoreRegisterOff(uint32_t instr) { … } // Load/store register (unsigned immediate) // | size (2) 11 | 1 V 01 | opc (2) | imm12 | Rn (5) | Rt (5) | static bool isLoadStoreRegisterUnsigned(uint32_t instr) { … } // Rt is always in bit position 0 - 4. static uint32_t getRt(uint32_t instr) { … } // Rn is always in bit position 5 - 9. static uint32_t getRn(uint32_t instr) { … } // C4.1.2 Branches, Exception Generating and System instructions // | op0 (3) 1 | 01 op1 (4) | x (22) | // op0 == 010 101 op1 == 0xxx Conditional Branch. // op0 == 110 101 op1 == 1xxx Unconditional Branch Register. // op0 == x00 101 op1 == xxxx Unconditional Branch immediate. // op0 == x01 101 op1 == 0xxx Compare and branch immediate. // op0 == x01 101 op1 == 1xxx Test and branch immediate. static bool isBranch(uint32_t instr) { … } static bool isV8SingleRegisterNonStructureLoadStore(uint32_t instr) { … } // Note that this function refers to v8.0 only and does not include the // additional load and store instructions added for in later revisions of // the architecture such as the Atomic memory operations introduced // in v8.1. static bool isV8NonStructureLoad(uint32_t instr) { … } // The following decode instructions are only complete up to the instructions // needed for errata 843419. // Instruction with writeback updates the index register after the load/store. static bool hasWriteback(uint32_t instr) { … } // For the load and store class of instructions, a load can write to the // destination register, a load and a store can write to the base register when // the instruction has writeback. static bool doesLoadStoreWriteToReg(uint32_t instr, uint32_t reg) { … } // Scanner for Cortex-A53 errata 843419 // Full details are available in the Cortex A53 MPCore revision 0 Software // Developers Errata Notice (ARM-EPM-048406). // // The instruction sequence that triggers the erratum is common in compiled // AArch64 code, however it is sensitive to the offset of the sequence within // a 4k page. This means that by scanning and fixing the patch after we have // assigned addresses we only need to disassemble and fix instances of the // sequence in the range of affected offsets. // // In summary the erratum conditions are a series of 4 instructions: // 1.) An ADRP instruction that writes to register Rn with low 12 bits of // address of instruction either 0xff8 or 0xffc. // 2.) A load or store instruction that can be: // - A single register load or store, of either integer or vector registers. // - An STP or STNP, of either integer or vector registers. // - An Advanced SIMD ST1 store instruction. // - Must not write to Rn, but may optionally read from it. // 3.) An optional instruction that is not a branch and does not write to Rn. // 4.) A load or store from the Load/store register (unsigned immediate) class // that uses Rn as the base address register. // // Note that we do not attempt to scan for Sequence 2 as described in the // Software Developers Errata Notice as this has been assessed to be extremely // unlikely to occur in compiled code. This matches gold and ld.bfd behavior. // Return true if the Instruction sequence Adrp, Instr2, and Instr4 match // the erratum sequence. The Adrp, Instr2 and Instr4 correspond to 1.), 2.), // and 4.) in the Scanner for Cortex-A53 errata comment above. static bool is843419ErratumSequence(uint32_t instr1, uint32_t instr2, uint32_t instr4) { … } // Scan the instruction sequence starting at Offset Off from the base of // InputSection isec. We update Off in this function rather than in the caller // as we can skip ahead much further into the section when we know how many // instructions we've scanned. // Return the offset of the load or store instruction in isec that we want to // patch or 0 if no patch required. static uint64_t scanCortexA53Errata843419(InputSection *isec, uint64_t &off, uint64_t limit) { … } class elf::Patch843419Section final : public SyntheticSection { … }; Patch843419Section::Patch843419Section(Ctx &ctx, InputSection *p, uint64_t off) : … { … } uint64_t Patch843419Section::getLDSTAddr() const { … } void Patch843419Section::writeTo(uint8_t *buf) { … } void AArch64Err843419Patcher::init() { … } // Insert the PatchSections we have created back into the // InputSectionDescription. As inserting patches alters the addresses of // InputSections that follow them, we try and place the patches after all the // executable sections, although we may need to insert them earlier if the // InputSectionDescription is larger than the maximum branch range. void AArch64Err843419Patcher::insertPatches( InputSectionDescription &isd, std::vector<Patch843419Section *> &patches) { … } // Given an erratum sequence that starts at address adrpAddr, with an // instruction that we need to patch at patcheeOffset from the start of // InputSection isec, create a Patch843419 Section and add it to the // Patches that we need to insert. static void implementPatch(Ctx &ctx, uint64_t adrpAddr, uint64_t patcheeOffset, InputSection *isec, std::vector<Patch843419Section *> &patches) { … } // Scan all the instructions in InputSectionDescription, for each instance of // the erratum sequence create a Patch843419Section. We return the list of // Patch843419Sections that need to be applied to the InputSectionDescription. std::vector<Patch843419Section *> AArch64Err843419Patcher::patchInputSectionDescription( InputSectionDescription &isd) { … } // For each InputSectionDescription make one pass over the executable sections // looking for the erratum sequence; creating a synthetic Patch843419Section // for each instance found. We insert these synthetic patch sections after the // executable code in each InputSectionDescription. // // PreConditions: // The Output and Input Sections have had their final addresses assigned. // // PostConditions: // Returns true if at least one patch was added. The addresses of the // Output and Input Sections may have been changed. // Returns false if no patches were required and no changes were made. bool AArch64Err843419Patcher::createFixes() { … }