llvm/llvm/test/CodeGen/PowerPC/aix-tls-le-ldst-float.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s \
; RUN:      --check-prefix=SMALL64
; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large < %s \
; RUN:      | FileCheck %s --check-prefix=LARGE64
; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
; RUN:      -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s \
; RUN:      --check-prefix=SMALL32
; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
; RUN:      -mtriple powerpc-ibm-aix-xcoff --code-model=large < %s \
; RUN:      | FileCheck %s --check-prefix=LARGE32

@ThreadLocalVarInit = thread_local(localexec) global float 0x401D333340000000, align 4
@VarInit = global float 0x4021666660000000, align 4
@IThreadLocalVarUninit = internal thread_local(localexec) global float 0.000000e+00, align 4
@IThreadLocalVarInit = internal thread_local(localexec) global float 0x4018CCCCC0000000, align 4
@ThreadLocalVarUninit = thread_local(localexec) global float 0.000000e+00, align 4
declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull) #1

define void @storeITLUninit(float noundef %x) {
; SMALL64-LABEL: storeITLUninit:
; SMALL64:       # %bb.0: # %entry
; SMALL64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
; SMALL64-NEXT:    stfsx f1, r13, r3
; SMALL64-NEXT:    blr
;
; LARGE64-LABEL: storeITLUninit:
; LARGE64:       # %bb.0: # %entry
; LARGE64-NEXT:    addis r3, L..C0@u(r2)
; LARGE64-NEXT:    ld r3, L..C0@l(r3)
; LARGE64-NEXT:    stfsx f1, r13, r3
; LARGE64-NEXT:    blr
;
; SMALL32-LABEL: storeITLUninit:
; SMALL32:       # %bb.0: # %entry
; SMALL32-NEXT:    mflr r0
; SMALL32-NEXT:    stwu r1, -32(r1)
; SMALL32-NEXT:    lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
; SMALL32-NEXT:    bla .__get_tpointer[PR]
; SMALL32-NEXT:    stw r0, 40(r1)
; SMALL32-NEXT:    stfsx f1, r3, r4
; SMALL32-NEXT:    addi r1, r1, 32
; SMALL32-NEXT:    lwz r0, 8(r1)
; SMALL32-NEXT:    mtlr r0
; SMALL32-NEXT:    blr
;
; LARGE32-LABEL: storeITLUninit:
; LARGE32:       # %bb.0: # %entry
; LARGE32-NEXT:    mflr r0
; LARGE32-NEXT:    stwu r1, -32(r1)
; LARGE32-NEXT:    stw r0, 40(r1)
; LARGE32-NEXT:    addis r3, L..C0@u(r2)
; LARGE32-NEXT:    lwz r4, L..C0@l(r3)
; LARGE32-NEXT:    bla .__get_tpointer[PR]
; LARGE32-NEXT:    stfsx f1, r3, r4
; LARGE32-NEXT:    addi r1, r1, 32
; LARGE32-NEXT:    lwz r0, 8(r1)
; LARGE32-NEXT:    mtlr r0
; LARGE32-NEXT:    blr
entry:
  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit)
  store float %x, ptr %0, align 4
  ret void
}

define void @storeITLInit(float noundef %x) {
; SMALL64-LABEL: storeITLInit:
; SMALL64:       # %bb.0: # %entry
; SMALL64-NEXT:    ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
; SMALL64-NEXT:    stfsx f1, r13, r3
; SMALL64-NEXT:    blr
;
; LARGE64-LABEL: storeITLInit:
; LARGE64:       # %bb.0: # %entry
; LARGE64-NEXT:    addis r3, L..C1@u(r2)
; LARGE64-NEXT:    ld r3, L..C1@l(r3)
; LARGE64-NEXT:    stfsx f1, r13, r3
; LARGE64-NEXT:    blr
;
; SMALL32-LABEL: storeITLInit:
; SMALL32:       # %bb.0: # %entry
; SMALL32-NEXT:    mflr r0
; SMALL32-NEXT:    stwu r1, -32(r1)
; SMALL32-NEXT:    lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
; SMALL32-NEXT:    bla .__get_tpointer[PR]
; SMALL32-NEXT:    stw r0, 40(r1)
; SMALL32-NEXT:    stfsx f1, r3, r4
; SMALL32-NEXT:    addi r1, r1, 32
; SMALL32-NEXT:    lwz r0, 8(r1)
; SMALL32-NEXT:    mtlr r0
; SMALL32-NEXT:    blr
;
; LARGE32-LABEL: storeITLInit:
; LARGE32:       # %bb.0: # %entry
; LARGE32-NEXT:    mflr r0
; LARGE32-NEXT:    stwu r1, -32(r1)
; LARGE32-NEXT:    stw r0, 40(r1)
; LARGE32-NEXT:    addis r3, L..C1@u(r2)
; LARGE32-NEXT:    lwz r4, L..C1@l(r3)
; LARGE32-NEXT:    bla .__get_tpointer[PR]
; LARGE32-NEXT:    stfsx f1, r3, r4
; LARGE32-NEXT:    addi r1, r1, 32
; LARGE32-NEXT:    lwz r0, 8(r1)
; LARGE32-NEXT:    mtlr r0
; LARGE32-NEXT:    blr
entry:
  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
  store float %x, ptr %0, align 4
  ret void
}

define void @storeTLUninit(float noundef %x) {
; SMALL64-LABEL: storeTLUninit:
; SMALL64:       # %bb.0: # %entry
; SMALL64-NEXT:    ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
; SMALL64-NEXT:    stfsx f1, r13, r3
; SMALL64-NEXT:    blr
;
; LARGE64-LABEL: storeTLUninit:
; LARGE64:       # %bb.0: # %entry
; LARGE64-NEXT:    addis r3, L..C2@u(r2)
; LARGE64-NEXT:    ld r3, L..C2@l(r3)
; LARGE64-NEXT:    stfsx f1, r13, r3
; LARGE64-NEXT:    blr
;
; SMALL32-LABEL: storeTLUninit:
; SMALL32:       # %bb.0: # %entry
; SMALL32-NEXT:    mflr r0
; SMALL32-NEXT:    stwu r1, -32(r1)
; SMALL32-NEXT:    lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
; SMALL32-NEXT:    bla .__get_tpointer[PR]
; SMALL32-NEXT:    stw r0, 40(r1)
; SMALL32-NEXT:    stfsx f1, r3, r4
; SMALL32-NEXT:    addi r1, r1, 32
; SMALL32-NEXT:    lwz r0, 8(r1)
; SMALL32-NEXT:    mtlr r0
; SMALL32-NEXT:    blr
;
; LARGE32-LABEL: storeTLUninit:
; LARGE32:       # %bb.0: # %entry
; LARGE32-NEXT:    mflr r0
; LARGE32-NEXT:    stwu r1, -32(r1)
; LARGE32-NEXT:    stw r0, 40(r1)
; LARGE32-NEXT:    addis r3, L..C2@u(r2)
; LARGE32-NEXT:    lwz r4, L..C2@l(r3)
; LARGE32-NEXT:    bla .__get_tpointer[PR]
; LARGE32-NEXT:    stfsx f1, r3, r4
; LARGE32-NEXT:    addi r1, r1, 32
; LARGE32-NEXT:    lwz r0, 8(r1)
; LARGE32-NEXT:    mtlr r0
; LARGE32-NEXT:    blr
entry:
  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit)
  store float %x, ptr %0, align 4
  ret void
}

define void @storeTLInit(float noundef %x) {
; SMALL64-LABEL: storeTLInit:
; SMALL64:       # %bb.0: # %entry
; SMALL64-NEXT:    ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
; SMALL64-NEXT:    stfsx f1, r13, r3
; SMALL64-NEXT:    blr
;
; LARGE64-LABEL: storeTLInit:
; LARGE64:       # %bb.0: # %entry
; LARGE64-NEXT:    addis r3, L..C3@u(r2)
; LARGE64-NEXT:    ld r3, L..C3@l(r3)
; LARGE64-NEXT:    stfsx f1, r13, r3
; LARGE64-NEXT:    blr
;
; SMALL32-LABEL: storeTLInit:
; SMALL32:       # %bb.0: # %entry
; SMALL32-NEXT:    mflr r0
; SMALL32-NEXT:    stwu r1, -32(r1)
; SMALL32-NEXT:    lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
; SMALL32-NEXT:    bla .__get_tpointer[PR]
; SMALL32-NEXT:    stw r0, 40(r1)
; SMALL32-NEXT:    stfsx f1, r3, r4
; SMALL32-NEXT:    addi r1, r1, 32
; SMALL32-NEXT:    lwz r0, 8(r1)
; SMALL32-NEXT:    mtlr r0
; SMALL32-NEXT:    blr
;
; LARGE32-LABEL: storeTLInit:
; LARGE32:       # %bb.0: # %entry
; LARGE32-NEXT:    mflr r0
; LARGE32-NEXT:    stwu r1, -32(r1)
; LARGE32-NEXT:    stw r0, 40(r1)
; LARGE32-NEXT:    addis r3, L..C3@u(r2)
; LARGE32-NEXT:    lwz r4, L..C3@l(r3)
; LARGE32-NEXT:    bla .__get_tpointer[PR]
; LARGE32-NEXT:    stfsx f1, r3, r4
; LARGE32-NEXT:    addi r1, r1, 32
; LARGE32-NEXT:    lwz r0, 8(r1)
; LARGE32-NEXT:    mtlr r0
; LARGE32-NEXT:    blr
entry:
  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
  store float %x, ptr %0, align 4
  ret void
}

define float @loadITLUninit() {
; SMALL64-LABEL: loadITLUninit:
; SMALL64:       # %bb.0: # %entry
; SMALL64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
; SMALL64-NEXT:    lfsx f1, r13, r3
; SMALL64-NEXT:    blr
;
; LARGE64-LABEL: loadITLUninit:
; LARGE64:       # %bb.0: # %entry
; LARGE64-NEXT:    addis r3, L..C0@u(r2)
; LARGE64-NEXT:    ld r3, L..C0@l(r3)
; LARGE64-NEXT:    lfsx f1, r13, r3
; LARGE64-NEXT:    blr
;
; SMALL32-LABEL: loadITLUninit:
; SMALL32:       # %bb.0: # %entry
; SMALL32-NEXT:    mflr r0
; SMALL32-NEXT:    stwu r1, -32(r1)
; SMALL32-NEXT:    lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
; SMALL32-NEXT:    stw r0, 40(r1)
; SMALL32-NEXT:    bla .__get_tpointer[PR]
; SMALL32-NEXT:    lfsx f1, r3, r4
; SMALL32-NEXT:    addi r1, r1, 32
; SMALL32-NEXT:    lwz r0, 8(r1)
; SMALL32-NEXT:    mtlr r0
; SMALL32-NEXT:    blr
;
; LARGE32-LABEL: loadITLUninit:
; LARGE32:       # %bb.0: # %entry
; LARGE32-NEXT:    mflr r0
; LARGE32-NEXT:    stwu r1, -32(r1)
; LARGE32-NEXT:    stw r0, 40(r1)
; LARGE32-NEXT:    addis r3, L..C0@u(r2)
; LARGE32-NEXT:    lwz r4, L..C0@l(r3)
; LARGE32-NEXT:    bla .__get_tpointer[PR]
; LARGE32-NEXT:    lfsx f1, r3, r4
; LARGE32-NEXT:    addi r1, r1, 32
; LARGE32-NEXT:    lwz r0, 8(r1)
; LARGE32-NEXT:    mtlr r0
; LARGE32-NEXT:    blr
entry:
  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit)
  %1 = load float, ptr %0, align 4
  ret float %1
}

define float @loadITLUninit2() {
; SMALL64-LABEL: loadITLUninit2:
; SMALL64:       # %bb.0: # %entry
; SMALL64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
; SMALL64-NEXT:    lfsx f0, r13, r3
; SMALL64-NEXT:    ld r3, L..C4(r2) # @VarInit
; SMALL64-NEXT:    lfs f1, 0(r3)
; SMALL64-NEXT:    fadds f1, f0, f1
; SMALL64-NEXT:    blr
;
; LARGE64-LABEL: loadITLUninit2:
; LARGE64:       # %bb.0: # %entry
; LARGE64-NEXT:    addis r3, L..C0@u(r2)
; LARGE64-NEXT:    ld r3, L..C0@l(r3)
; LARGE64-NEXT:    lfsx f0, r13, r3
; LARGE64-NEXT:    addis r3, L..C4@u(r2)
; LARGE64-NEXT:    ld r3, L..C4@l(r3)
; LARGE64-NEXT:    lfs f1, 0(r3)
; LARGE64-NEXT:    fadds f1, f0, f1
; LARGE64-NEXT:    blr
;
; SMALL32-LABEL: loadITLUninit2:
; SMALL32:       # %bb.0: # %entry
; SMALL32-NEXT:    mflr r0
; SMALL32-NEXT:    stwu r1, -32(r1)
; SMALL32-NEXT:    lwz r4, L..C0(r2) # target-flags(ppc-tprel) @IThreadLocalVarUninit
; SMALL32-NEXT:    stw r0, 40(r1)
; SMALL32-NEXT:    bla .__get_tpointer[PR]
; SMALL32-NEXT:    lfsx f0, r3, r4
; SMALL32-NEXT:    lwz r3, L..C4(r2) # @VarInit
; SMALL32-NEXT:    lfs f1, 0(r3)
; SMALL32-NEXT:    fadds f1, f0, f1
; SMALL32-NEXT:    addi r1, r1, 32
; SMALL32-NEXT:    lwz r0, 8(r1)
; SMALL32-NEXT:    mtlr r0
; SMALL32-NEXT:    blr
;
; LARGE32-LABEL: loadITLUninit2:
; LARGE32:       # %bb.0: # %entry
; LARGE32-NEXT:    mflr r0
; LARGE32-NEXT:    stwu r1, -32(r1)
; LARGE32-NEXT:    stw r0, 40(r1)
; LARGE32-NEXT:    addis r3, L..C0@u(r2)
; LARGE32-NEXT:    lwz r4, L..C0@l(r3)
; LARGE32-NEXT:    bla .__get_tpointer[PR]
; LARGE32-NEXT:    lfsx f0, r3, r4
; LARGE32-NEXT:    addis r3, L..C4@u(r2)
; LARGE32-NEXT:    lwz r3, L..C4@l(r3)
; LARGE32-NEXT:    lfs f1, 0(r3)
; LARGE32-NEXT:    fadds f1, f0, f1
; LARGE32-NEXT:    addi r1, r1, 32
; LARGE32-NEXT:    lwz r0, 8(r1)
; LARGE32-NEXT:    mtlr r0
; LARGE32-NEXT:    blr
entry:
  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarUninit)
  %1 = load float, ptr %0, align 4
  %2 = load float, ptr @VarInit, align 4
  %add = fadd float %1, %2
  ret float %add
}

define float @loadITLInit() {
; SMALL64-LABEL: loadITLInit:
; SMALL64:       # %bb.0: # %entry
; SMALL64-NEXT:    ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
; SMALL64-NEXT:    lfsx f1, r13, r3
; SMALL64-NEXT:    blr
;
; LARGE64-LABEL: loadITLInit:
; LARGE64:       # %bb.0: # %entry
; LARGE64-NEXT:    addis r3, L..C1@u(r2)
; LARGE64-NEXT:    ld r3, L..C1@l(r3)
; LARGE64-NEXT:    lfsx f1, r13, r3
; LARGE64-NEXT:    blr
;
; SMALL32-LABEL: loadITLInit:
; SMALL32:       # %bb.0: # %entry
; SMALL32-NEXT:    mflr r0
; SMALL32-NEXT:    stwu r1, -32(r1)
; SMALL32-NEXT:    lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
; SMALL32-NEXT:    stw r0, 40(r1)
; SMALL32-NEXT:    bla .__get_tpointer[PR]
; SMALL32-NEXT:    lfsx f1, r3, r4
; SMALL32-NEXT:    addi r1, r1, 32
; SMALL32-NEXT:    lwz r0, 8(r1)
; SMALL32-NEXT:    mtlr r0
; SMALL32-NEXT:    blr
;
; LARGE32-LABEL: loadITLInit:
; LARGE32:       # %bb.0: # %entry
; LARGE32-NEXT:    mflr r0
; LARGE32-NEXT:    stwu r1, -32(r1)
; LARGE32-NEXT:    stw r0, 40(r1)
; LARGE32-NEXT:    addis r3, L..C1@u(r2)
; LARGE32-NEXT:    lwz r4, L..C1@l(r3)
; LARGE32-NEXT:    bla .__get_tpointer[PR]
; LARGE32-NEXT:    lfsx f1, r3, r4
; LARGE32-NEXT:    addi r1, r1, 32
; LARGE32-NEXT:    lwz r0, 8(r1)
; LARGE32-NEXT:    mtlr r0
; LARGE32-NEXT:    blr
entry:
  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
  %1 = load float, ptr %0, align 4
  ret float %1
}

define float @loadITLInit2() {
; SMALL64-LABEL: loadITLInit2:
; SMALL64:       # %bb.0: # %entry
; SMALL64-NEXT:    ld r3, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
; SMALL64-NEXT:    lfsx f0, r13, r3
; SMALL64-NEXT:    ld r3, L..C4(r2) # @VarInit
; SMALL64-NEXT:    lfs f1, 0(r3)
; SMALL64-NEXT:    fadds f1, f0, f1
; SMALL64-NEXT:    blr
;
; LARGE64-LABEL: loadITLInit2:
; LARGE64:       # %bb.0: # %entry
; LARGE64-NEXT:    addis r3, L..C1@u(r2)
; LARGE64-NEXT:    ld r3, L..C1@l(r3)
; LARGE64-NEXT:    lfsx f0, r13, r3
; LARGE64-NEXT:    addis r3, L..C4@u(r2)
; LARGE64-NEXT:    ld r3, L..C4@l(r3)
; LARGE64-NEXT:    lfs f1, 0(r3)
; LARGE64-NEXT:    fadds f1, f0, f1
; LARGE64-NEXT:    blr
;
; SMALL32-LABEL: loadITLInit2:
; SMALL32:       # %bb.0: # %entry
; SMALL32-NEXT:    mflr r0
; SMALL32-NEXT:    stwu r1, -32(r1)
; SMALL32-NEXT:    lwz r4, L..C1(r2) # target-flags(ppc-tprel) @IThreadLocalVarInit
; SMALL32-NEXT:    stw r0, 40(r1)
; SMALL32-NEXT:    bla .__get_tpointer[PR]
; SMALL32-NEXT:    lfsx f0, r3, r4
; SMALL32-NEXT:    lwz r3, L..C4(r2) # @VarInit
; SMALL32-NEXT:    lfs f1, 0(r3)
; SMALL32-NEXT:    fadds f1, f0, f1
; SMALL32-NEXT:    addi r1, r1, 32
; SMALL32-NEXT:    lwz r0, 8(r1)
; SMALL32-NEXT:    mtlr r0
; SMALL32-NEXT:    blr
;
; LARGE32-LABEL: loadITLInit2:
; LARGE32:       # %bb.0: # %entry
; LARGE32-NEXT:    mflr r0
; LARGE32-NEXT:    stwu r1, -32(r1)
; LARGE32-NEXT:    stw r0, 40(r1)
; LARGE32-NEXT:    addis r3, L..C1@u(r2)
; LARGE32-NEXT:    lwz r4, L..C1@l(r3)
; LARGE32-NEXT:    bla .__get_tpointer[PR]
; LARGE32-NEXT:    lfsx f0, r3, r4
; LARGE32-NEXT:    addis r3, L..C4@u(r2)
; LARGE32-NEXT:    lwz r3, L..C4@l(r3)
; LARGE32-NEXT:    lfs f1, 0(r3)
; LARGE32-NEXT:    fadds f1, f0, f1
; LARGE32-NEXT:    addi r1, r1, 32
; LARGE32-NEXT:    lwz r0, 8(r1)
; LARGE32-NEXT:    mtlr r0
; LARGE32-NEXT:    blr
entry:
  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @IThreadLocalVarInit)
  %1 = load float, ptr %0, align 4
  %2 = load float, ptr @VarInit, align 4
  %add = fadd float %1, %2
  ret float %add
}

define float @loadTLUninit() {
; SMALL64-LABEL: loadTLUninit:
; SMALL64:       # %bb.0: # %entry
; SMALL64-NEXT:    ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
; SMALL64-NEXT:    lfsx f1, r13, r3
; SMALL64-NEXT:    blr
;
; LARGE64-LABEL: loadTLUninit:
; LARGE64:       # %bb.0: # %entry
; LARGE64-NEXT:    addis r3, L..C2@u(r2)
; LARGE64-NEXT:    ld r3, L..C2@l(r3)
; LARGE64-NEXT:    lfsx f1, r13, r3
; LARGE64-NEXT:    blr
;
; SMALL32-LABEL: loadTLUninit:
; SMALL32:       # %bb.0: # %entry
; SMALL32-NEXT:    mflr r0
; SMALL32-NEXT:    stwu r1, -32(r1)
; SMALL32-NEXT:    lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
; SMALL32-NEXT:    stw r0, 40(r1)
; SMALL32-NEXT:    bla .__get_tpointer[PR]
; SMALL32-NEXT:    lfsx f1, r3, r4
; SMALL32-NEXT:    addi r1, r1, 32
; SMALL32-NEXT:    lwz r0, 8(r1)
; SMALL32-NEXT:    mtlr r0
; SMALL32-NEXT:    blr
;
; LARGE32-LABEL: loadTLUninit:
; LARGE32:       # %bb.0: # %entry
; LARGE32-NEXT:    mflr r0
; LARGE32-NEXT:    stwu r1, -32(r1)
; LARGE32-NEXT:    stw r0, 40(r1)
; LARGE32-NEXT:    addis r3, L..C2@u(r2)
; LARGE32-NEXT:    lwz r4, L..C2@l(r3)
; LARGE32-NEXT:    bla .__get_tpointer[PR]
; LARGE32-NEXT:    lfsx f1, r3, r4
; LARGE32-NEXT:    addi r1, r1, 32
; LARGE32-NEXT:    lwz r0, 8(r1)
; LARGE32-NEXT:    mtlr r0
; LARGE32-NEXT:    blr
entry:
  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit)
  %1 = load float, ptr %0, align 4
  ret float %1
}

define float @loadTLUninit2() {
; SMALL64-LABEL: loadTLUninit2:
; SMALL64:       # %bb.0: # %entry
; SMALL64-NEXT:    ld r3, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
; SMALL64-NEXT:    lfsx f0, r13, r3
; SMALL64-NEXT:    ld r3, L..C4(r2) # @VarInit
; SMALL64-NEXT:    lfs f1, 0(r3)
; SMALL64-NEXT:    fadds f1, f0, f1
; SMALL64-NEXT:    blr
;
; LARGE64-LABEL: loadTLUninit2:
; LARGE64:       # %bb.0: # %entry
; LARGE64-NEXT:    addis r3, L..C2@u(r2)
; LARGE64-NEXT:    ld r3, L..C2@l(r3)
; LARGE64-NEXT:    lfsx f0, r13, r3
; LARGE64-NEXT:    addis r3, L..C4@u(r2)
; LARGE64-NEXT:    ld r3, L..C4@l(r3)
; LARGE64-NEXT:    lfs f1, 0(r3)
; LARGE64-NEXT:    fadds f1, f0, f1
; LARGE64-NEXT:    blr
;
; SMALL32-LABEL: loadTLUninit2:
; SMALL32:       # %bb.0: # %entry
; SMALL32-NEXT:    mflr r0
; SMALL32-NEXT:    stwu r1, -32(r1)
; SMALL32-NEXT:    lwz r4, L..C2(r2) # target-flags(ppc-tprel) @ThreadLocalVarUninit
; SMALL32-NEXT:    stw r0, 40(r1)
; SMALL32-NEXT:    bla .__get_tpointer[PR]
; SMALL32-NEXT:    lfsx f0, r3, r4
; SMALL32-NEXT:    lwz r3, L..C4(r2) # @VarInit
; SMALL32-NEXT:    lfs f1, 0(r3)
; SMALL32-NEXT:    fadds f1, f0, f1
; SMALL32-NEXT:    addi r1, r1, 32
; SMALL32-NEXT:    lwz r0, 8(r1)
; SMALL32-NEXT:    mtlr r0
; SMALL32-NEXT:    blr
;
; LARGE32-LABEL: loadTLUninit2:
; LARGE32:       # %bb.0: # %entry
; LARGE32-NEXT:    mflr r0
; LARGE32-NEXT:    stwu r1, -32(r1)
; LARGE32-NEXT:    stw r0, 40(r1)
; LARGE32-NEXT:    addis r3, L..C2@u(r2)
; LARGE32-NEXT:    lwz r4, L..C2@l(r3)
; LARGE32-NEXT:    bla .__get_tpointer[PR]
; LARGE32-NEXT:    lfsx f0, r3, r4
; LARGE32-NEXT:    addis r3, L..C4@u(r2)
; LARGE32-NEXT:    lwz r3, L..C4@l(r3)
; LARGE32-NEXT:    lfs f1, 0(r3)
; LARGE32-NEXT:    fadds f1, f0, f1
; LARGE32-NEXT:    addi r1, r1, 32
; LARGE32-NEXT:    lwz r0, 8(r1)
; LARGE32-NEXT:    mtlr r0
; LARGE32-NEXT:    blr
entry:
  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarUninit)
  %1 = load float, ptr %0, align 4
  %2 = load float, ptr @VarInit, align 4
  %add = fadd float %1, %2
  ret float %add
}

define float @loadTLInit() {
; SMALL64-LABEL: loadTLInit:
; SMALL64:       # %bb.0: # %entry
; SMALL64-NEXT:    ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
; SMALL64-NEXT:    lfsx f1, r13, r3
; SMALL64-NEXT:    blr
;
; LARGE64-LABEL: loadTLInit:
; LARGE64:       # %bb.0: # %entry
; LARGE64-NEXT:    addis r3, L..C3@u(r2)
; LARGE64-NEXT:    ld r3, L..C3@l(r3)
; LARGE64-NEXT:    lfsx f1, r13, r3
; LARGE64-NEXT:    blr
;
; SMALL32-LABEL: loadTLInit:
; SMALL32:       # %bb.0: # %entry
; SMALL32-NEXT:    mflr r0
; SMALL32-NEXT:    stwu r1, -32(r1)
; SMALL32-NEXT:    lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
; SMALL32-NEXT:    stw r0, 40(r1)
; SMALL32-NEXT:    bla .__get_tpointer[PR]
; SMALL32-NEXT:    lfsx f1, r3, r4
; SMALL32-NEXT:    addi r1, r1, 32
; SMALL32-NEXT:    lwz r0, 8(r1)
; SMALL32-NEXT:    mtlr r0
; SMALL32-NEXT:    blr
;
; LARGE32-LABEL: loadTLInit:
; LARGE32:       # %bb.0: # %entry
; LARGE32-NEXT:    mflr r0
; LARGE32-NEXT:    stwu r1, -32(r1)
; LARGE32-NEXT:    stw r0, 40(r1)
; LARGE32-NEXT:    addis r3, L..C3@u(r2)
; LARGE32-NEXT:    lwz r4, L..C3@l(r3)
; LARGE32-NEXT:    bla .__get_tpointer[PR]
; LARGE32-NEXT:    lfsx f1, r3, r4
; LARGE32-NEXT:    addi r1, r1, 32
; LARGE32-NEXT:    lwz r0, 8(r1)
; LARGE32-NEXT:    mtlr r0
; LARGE32-NEXT:    blr
entry:
  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
  %1 = load float, ptr %0, align 4
  ret float %1
}

define float @loadTLInit2() {
; SMALL64-LABEL: loadTLInit2:
; SMALL64:       # %bb.0: # %entry
; SMALL64-NEXT:    ld r3, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
; SMALL64-NEXT:    lfsx f0, r13, r3
; SMALL64-NEXT:    ld r3, L..C4(r2) # @VarInit
; SMALL64-NEXT:    lfs f1, 0(r3)
; SMALL64-NEXT:    fadds f1, f0, f1
; SMALL64-NEXT:    blr
;
; LARGE64-LABEL: loadTLInit2:
; LARGE64:       # %bb.0: # %entry
; LARGE64-NEXT:    addis r3, L..C3@u(r2)
; LARGE64-NEXT:    ld r3, L..C3@l(r3)
; LARGE64-NEXT:    lfsx f0, r13, r3
; LARGE64-NEXT:    addis r3, L..C4@u(r2)
; LARGE64-NEXT:    ld r3, L..C4@l(r3)
; LARGE64-NEXT:    lfs f1, 0(r3)
; LARGE64-NEXT:    fadds f1, f0, f1
; LARGE64-NEXT:    blr
;
; SMALL32-LABEL: loadTLInit2:
; SMALL32:       # %bb.0: # %entry
; SMALL32-NEXT:    mflr r0
; SMALL32-NEXT:    stwu r1, -32(r1)
; SMALL32-NEXT:    lwz r4, L..C3(r2) # target-flags(ppc-tprel) @ThreadLocalVarInit
; SMALL32-NEXT:    stw r0, 40(r1)
; SMALL32-NEXT:    bla .__get_tpointer[PR]
; SMALL32-NEXT:    lfsx f0, r3, r4
; SMALL32-NEXT:    lwz r3, L..C4(r2) # @VarInit
; SMALL32-NEXT:    lfs f1, 0(r3)
; SMALL32-NEXT:    fadds f1, f0, f1
; SMALL32-NEXT:    addi r1, r1, 32
; SMALL32-NEXT:    lwz r0, 8(r1)
; SMALL32-NEXT:    mtlr r0
; SMALL32-NEXT:    blr
;
; LARGE32-LABEL: loadTLInit2:
; LARGE32:       # %bb.0: # %entry
; LARGE32-NEXT:    mflr r0
; LARGE32-NEXT:    stwu r1, -32(r1)
; LARGE32-NEXT:    stw r0, 40(r1)
; LARGE32-NEXT:    addis r3, L..C3@u(r2)
; LARGE32-NEXT:    lwz r4, L..C3@l(r3)
; LARGE32-NEXT:    bla .__get_tpointer[PR]
; LARGE32-NEXT:    lfsx f0, r3, r4
; LARGE32-NEXT:    addis r3, L..C4@u(r2)
; LARGE32-NEXT:    lwz r3, L..C4@l(r3)
; LARGE32-NEXT:    lfs f1, 0(r3)
; LARGE32-NEXT:    fadds f1, f0, f1
; LARGE32-NEXT:    addi r1, r1, 32
; LARGE32-NEXT:    lwz r0, 8(r1)
; LARGE32-NEXT:    mtlr r0
; LARGE32-NEXT:    blr
entry:
  %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @ThreadLocalVarInit)
  %1 = load float, ptr %0, align 4
  %2 = load float, ptr @VarInit, align 4
  %add = fadd float %1, %2
  ret float %add
}

; TOC Entry Checks.

; SMALL64-LABEL: .toc
; SMALL64-LABEL: L..C0:
; SMALL64-NEXT: .tc IThreadLocalVarUninit[TC],IThreadLocalVarUninit[UL]@le
; SMALL64-LABEL: L..C1:
; SMALL64-NEXT: .tc IThreadLocalVarInit[TC],IThreadLocalVarInit[TL]@le
; SMALL64-LABEL: L..C2:
; SMALL64-NEXT: .tc ThreadLocalVarUninit[TC],ThreadLocalVarUninit[TL]@le
; SMALL64-LABEL: L..C3:
; SMALL64-NEXT: .tc ThreadLocalVarInit[TC],ThreadLocalVarInit[TL]@le
; SMALL64-LABEL: L..C4:
; SMALL64-NEXT: .tc VarInit[TC],VarInit[RW]

; LARGE64-LABEL: .toc
; LARGE64-LABEL: L..C0:
; LARGE64-NEXT: .tc IThreadLocalVarUninit[TE],IThreadLocalVarUninit[UL]@le
; LARGE64-LABEL: L..C1:
; LARGE64-NEXT: .tc IThreadLocalVarInit[TE],IThreadLocalVarInit[TL]@le
; LARGE64-LABEL: L..C2:
; LARGE64-NEXT: .tc ThreadLocalVarUninit[TE],ThreadLocalVarUninit[TL]@le
; LARGE64-LABEL: L..C3:
; LARGE64-NEXT: .tc ThreadLocalVarInit[TE],ThreadLocalVarInit[TL]@le
; LARGE64-LABEL: L..C4:
; LARGE64-NEXT: .tc VarInit[TE],VarInit[RW]

; SMALL32-LABEL: .toc
; SMALL32-LABEL: L..C0:
; SMALL32-NEXT: .tc IThreadLocalVarUninit[TC],IThreadLocalVarUninit[UL]@le
; SMALL32-LABEL: L..C1:
; SMALL32-NEXT: .tc IThreadLocalVarInit[TC],IThreadLocalVarInit[TL]@le
; SMALL32-LABEL: L..C2:
; SMALL32-NEXT: .tc ThreadLocalVarUninit[TC],ThreadLocalVarUninit[TL]@le
; SMALL32-LABEL: L..C3:
; SMALL32-NEXT: .tc ThreadLocalVarInit[TC],ThreadLocalVarInit[TL]@le
; SMALL32-LABEL: L..C4:
; SMALL32-NEXT: .tc VarInit[TC],VarInit[RW]

; LARGE32-LABEL: .toc
; LARGE32-LABEL: L..C0:
; LARGE32-NEXT: .tc IThreadLocalVarUninit[TE],IThreadLocalVarUninit[UL]@le
; LARGE32-LABEL: L..C1:
; LARGE32-NEXT: .tc IThreadLocalVarInit[TE],IThreadLocalVarInit[TL]@le
; LARGE32-LABEL: L..C2:
; LARGE32-NEXT: .tc ThreadLocalVarUninit[TE],ThreadLocalVarUninit[TL]@le
; LARGE32-LABEL: L..C3:
; LARGE32-NEXT: .tc ThreadLocalVarInit[TE],ThreadLocalVarInit[TL]@le
; LARGE32-LABEL: L..C4:
; LARGE32-NEXT: .tc VarInit[TE],VarInit[RW]