llvm/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \
; RUN:   -mcpu=pwr8 -ppc-asm-full-reg-names -enable-subreg-liveness \
; RUN:   < %s | FileCheck --check-prefix=P8 %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown -mcpu=pwr7 \
; RUN:   -ppc-asm-full-reg-names \
; RUN:   -enable-subreg-liveness < %s | FileCheck --check-prefix=PWR7 %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 \
; RUN:   -ppc-asm-full-reg-names -enable-subreg-liveness < %s | FileCheck \
; RUN:   --check-prefix=LE-PWR8 %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-freebsd -mcpu=pwr8 \
; RUN:   -ppc-asm-full-reg-names -enable-subreg-liveness < %s | FileCheck \
; RUN:   --check-prefix=LE-PWR8 %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix -mcpu=pwr8 \
; RUN:   -ppc-asm-full-reg-names -enable-subreg-liveness < %s | FileCheck \
; RUN:   --check-prefix=AIX64-PWR8 %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-unknown -mcpu=pwr8 \
; RUN:   -ppc-asm-full-reg-names -enable-subreg-liveness < %s \
; RUN: | FileCheck --check-prefix=PPC-PWR8 %s

define dso_local i128 @lq_unordered(ptr %src) {
; P8-LABEL: lq_unordered:
; P8:       # %bb.0: # %entry
; P8-NEXT:    lq r4, 0(r3)
; P8-NEXT:    mr r3, r4
; P8-NEXT:    mr r4, r5
; P8-NEXT:    blr
;
; PWR7-LABEL: lq_unordered:
; PWR7:       # %bb.0: # %entry
; PWR7-NEXT:    mflr r0
; PWR7-NEXT:    stdu r1, -112(r1)
; PWR7-NEXT:    std r0, 128(r1)
; PWR7-NEXT:    .cfi_def_cfa_offset 112
; PWR7-NEXT:    .cfi_offset lr, 16
; PWR7-NEXT:    li r4, 0
; PWR7-NEXT:    bl __atomic_load_16
; PWR7-NEXT:    nop
; PWR7-NEXT:    addi r1, r1, 112
; PWR7-NEXT:    ld r0, 16(r1)
; PWR7-NEXT:    mtlr r0
; PWR7-NEXT:    blr
;
; LE-PWR8-LABEL: lq_unordered:
; LE-PWR8:       # %bb.0: # %entry
; LE-PWR8-NEXT:    lq r4, 0(r3)
; LE-PWR8-NEXT:    mr r3, r5
; LE-PWR8-NEXT:    blr
;
; AIX64-PWR8-LABEL: lq_unordered:
; AIX64-PWR8:       # %bb.0: # %entry
; AIX64-PWR8-NEXT:    lq r4, 0(r3)
; AIX64-PWR8-NEXT:    mr r3, r4
; AIX64-PWR8-NEXT:    mr r4, r5
; AIX64-PWR8-NEXT:    blr
;
; PPC-PWR8-LABEL: lq_unordered:
; PPC-PWR8:       # %bb.0: # %entry
; PPC-PWR8-NEXT:    mflr r0
; PPC-PWR8-NEXT:    stwu r1, -32(r1)
; PPC-PWR8-NEXT:    stw r0, 36(r1)
; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
; PPC-PWR8-NEXT:    .cfi_offset lr, 4
; PPC-PWR8-NEXT:    mr r4, r3
; PPC-PWR8-NEXT:    addi r5, r1, 16
; PPC-PWR8-NEXT:    li r3, 16
; PPC-PWR8-NEXT:    li r6, 0
; PPC-PWR8-NEXT:    bl __atomic_load
; PPC-PWR8-NEXT:    lwz r6, 28(r1)
; PPC-PWR8-NEXT:    lwz r5, 24(r1)
; PPC-PWR8-NEXT:    lwz r4, 20(r1)
; PPC-PWR8-NEXT:    lwz r3, 16(r1)
; PPC-PWR8-NEXT:    lwz r0, 36(r1)
; PPC-PWR8-NEXT:    addi r1, r1, 32
; PPC-PWR8-NEXT:    mtlr r0
; PPC-PWR8-NEXT:    blr
entry:
  %0 = load atomic i128, ptr %src unordered, align 16
  ret i128 %0
}

define dso_local i128 @lqx_unordered(ptr %src, i64 %idx) {
; P8-LABEL: lqx_unordered:
; P8:       # %bb.0: # %entry
; P8-NEXT:    sldi r4, r4, 4
; P8-NEXT:    add r3, r3, r4
; P8-NEXT:    lq r4, 0(r3)
; P8-NEXT:    mr r3, r4
; P8-NEXT:    mr r4, r5
; P8-NEXT:    blr
;
; PWR7-LABEL: lqx_unordered:
; PWR7:       # %bb.0: # %entry
; PWR7-NEXT:    mflr r0
; PWR7-NEXT:    stdu r1, -112(r1)
; PWR7-NEXT:    std r0, 128(r1)
; PWR7-NEXT:    .cfi_def_cfa_offset 112
; PWR7-NEXT:    .cfi_offset lr, 16
; PWR7-NEXT:    sldi r4, r4, 4
; PWR7-NEXT:    add r3, r3, r4
; PWR7-NEXT:    li r4, 0
; PWR7-NEXT:    bl __atomic_load_16
; PWR7-NEXT:    nop
; PWR7-NEXT:    addi r1, r1, 112
; PWR7-NEXT:    ld r0, 16(r1)
; PWR7-NEXT:    mtlr r0
; PWR7-NEXT:    blr
;
; LE-PWR8-LABEL: lqx_unordered:
; LE-PWR8:       # %bb.0: # %entry
; LE-PWR8-NEXT:    sldi r4, r4, 4
; LE-PWR8-NEXT:    std r2, -8(r1) # 8-byte Folded Spill
; LE-PWR8-NEXT:    add r4, r3, r4
; LE-PWR8-NEXT:    lq r2, 0(r4)
; LE-PWR8-NEXT:    mr r4, r2
; LE-PWR8-NEXT:    ld r2, -8(r1) # 8-byte Folded Reload
; LE-PWR8-NEXT:    blr
;
; AIX64-PWR8-LABEL: lqx_unordered:
; AIX64-PWR8:       # %bb.0: # %entry
; AIX64-PWR8-NEXT:    sldi r4, r4, 4
; AIX64-PWR8-NEXT:    add r3, r3, r4
; AIX64-PWR8-NEXT:    lq r4, 0(r3)
; AIX64-PWR8-NEXT:    mr r3, r4
; AIX64-PWR8-NEXT:    mr r4, r5
; AIX64-PWR8-NEXT:    blr
;
; PPC-PWR8-LABEL: lqx_unordered:
; PPC-PWR8:       # %bb.0: # %entry
; PPC-PWR8-NEXT:    mflr r0
; PPC-PWR8-NEXT:    stwu r1, -32(r1)
; PPC-PWR8-NEXT:    stw r0, 36(r1)
; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
; PPC-PWR8-NEXT:    .cfi_offset lr, 4
; PPC-PWR8-NEXT:    slwi r4, r6, 4
; PPC-PWR8-NEXT:    addi r5, r1, 16
; PPC-PWR8-NEXT:    li r6, 0
; PPC-PWR8-NEXT:    add r4, r3, r4
; PPC-PWR8-NEXT:    li r3, 16
; PPC-PWR8-NEXT:    bl __atomic_load
; PPC-PWR8-NEXT:    lwz r6, 28(r1)
; PPC-PWR8-NEXT:    lwz r5, 24(r1)
; PPC-PWR8-NEXT:    lwz r4, 20(r1)
; PPC-PWR8-NEXT:    lwz r3, 16(r1)
; PPC-PWR8-NEXT:    lwz r0, 36(r1)
; PPC-PWR8-NEXT:    addi r1, r1, 32
; PPC-PWR8-NEXT:    mtlr r0
; PPC-PWR8-NEXT:    blr
entry:
  %0 = getelementptr i128, ptr %src, i64 %idx
  %1 = load atomic i128, ptr %0 unordered, align 16
  ret i128 %1
}

define dso_local i128 @lq_big_offset_unordered(ptr %src) {
; P8-LABEL: lq_big_offset_unordered:
; P8:       # %bb.0: # %entry
; P8-NEXT:    lis r4, 32
; P8-NEXT:    add r3, r3, r4
; P8-NEXT:    lq r4, 0(r3)
; P8-NEXT:    mr r3, r4
; P8-NEXT:    mr r4, r5
; P8-NEXT:    blr
;
; PWR7-LABEL: lq_big_offset_unordered:
; PWR7:       # %bb.0: # %entry
; PWR7-NEXT:    mflr r0
; PWR7-NEXT:    stdu r1, -112(r1)
; PWR7-NEXT:    std r0, 128(r1)
; PWR7-NEXT:    .cfi_def_cfa_offset 112
; PWR7-NEXT:    .cfi_offset lr, 16
; PWR7-NEXT:    addis r3, r3, 32
; PWR7-NEXT:    li r4, 0
; PWR7-NEXT:    bl __atomic_load_16
; PWR7-NEXT:    nop
; PWR7-NEXT:    addi r1, r1, 112
; PWR7-NEXT:    ld r0, 16(r1)
; PWR7-NEXT:    mtlr r0
; PWR7-NEXT:    blr
;
; LE-PWR8-LABEL: lq_big_offset_unordered:
; LE-PWR8:       # %bb.0: # %entry
; LE-PWR8-NEXT:    lis r4, 32
; LE-PWR8-NEXT:    std r2, -8(r1) # 8-byte Folded Spill
; LE-PWR8-NEXT:    add r4, r3, r4
; LE-PWR8-NEXT:    lq r2, 0(r4)
; LE-PWR8-NEXT:    mr r4, r2
; LE-PWR8-NEXT:    ld r2, -8(r1) # 8-byte Folded Reload
; LE-PWR8-NEXT:    blr
;
; AIX64-PWR8-LABEL: lq_big_offset_unordered:
; AIX64-PWR8:       # %bb.0: # %entry
; AIX64-PWR8-NEXT:    lis r4, 32
; AIX64-PWR8-NEXT:    add r3, r3, r4
; AIX64-PWR8-NEXT:    lq r4, 0(r3)
; AIX64-PWR8-NEXT:    mr r3, r4
; AIX64-PWR8-NEXT:    mr r4, r5
; AIX64-PWR8-NEXT:    blr
;
; PPC-PWR8-LABEL: lq_big_offset_unordered:
; PPC-PWR8:       # %bb.0: # %entry
; PPC-PWR8-NEXT:    mflr r0
; PPC-PWR8-NEXT:    stwu r1, -32(r1)
; PPC-PWR8-NEXT:    stw r0, 36(r1)
; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
; PPC-PWR8-NEXT:    .cfi_offset lr, 4
; PPC-PWR8-NEXT:    addis r4, r3, 32
; PPC-PWR8-NEXT:    addi r5, r1, 16
; PPC-PWR8-NEXT:    li r3, 16
; PPC-PWR8-NEXT:    li r6, 0
; PPC-PWR8-NEXT:    bl __atomic_load
; PPC-PWR8-NEXT:    lwz r6, 28(r1)
; PPC-PWR8-NEXT:    lwz r5, 24(r1)
; PPC-PWR8-NEXT:    lwz r4, 20(r1)
; PPC-PWR8-NEXT:    lwz r3, 16(r1)
; PPC-PWR8-NEXT:    lwz r0, 36(r1)
; PPC-PWR8-NEXT:    addi r1, r1, 32
; PPC-PWR8-NEXT:    mtlr r0
; PPC-PWR8-NEXT:    blr
entry:
  %0 = getelementptr i128, ptr %src, i64 131072
  %1 = load atomic i128, ptr %0 unordered, align 16
  ret i128 %1
}

define dso_local i128 @lq_monotonic(ptr %src) {
; P8-LABEL: lq_monotonic:
; P8:       # %bb.0: # %entry
; P8-NEXT:    lq r4, 0(r3)
; P8-NEXT:    mr r3, r4
; P8-NEXT:    mr r4, r5
; P8-NEXT:    blr
;
; PWR7-LABEL: lq_monotonic:
; PWR7:       # %bb.0: # %entry
; PWR7-NEXT:    mflr r0
; PWR7-NEXT:    stdu r1, -112(r1)
; PWR7-NEXT:    std r0, 128(r1)
; PWR7-NEXT:    .cfi_def_cfa_offset 112
; PWR7-NEXT:    .cfi_offset lr, 16
; PWR7-NEXT:    li r4, 0
; PWR7-NEXT:    bl __atomic_load_16
; PWR7-NEXT:    nop
; PWR7-NEXT:    addi r1, r1, 112
; PWR7-NEXT:    ld r0, 16(r1)
; PWR7-NEXT:    mtlr r0
; PWR7-NEXT:    blr
;
; LE-PWR8-LABEL: lq_monotonic:
; LE-PWR8:       # %bb.0: # %entry
; LE-PWR8-NEXT:    lq r4, 0(r3)
; LE-PWR8-NEXT:    mr r3, r5
; LE-PWR8-NEXT:    blr
;
; AIX64-PWR8-LABEL: lq_monotonic:
; AIX64-PWR8:       # %bb.0: # %entry
; AIX64-PWR8-NEXT:    lq r4, 0(r3)
; AIX64-PWR8-NEXT:    mr r3, r4
; AIX64-PWR8-NEXT:    mr r4, r5
; AIX64-PWR8-NEXT:    blr
;
; PPC-PWR8-LABEL: lq_monotonic:
; PPC-PWR8:       # %bb.0: # %entry
; PPC-PWR8-NEXT:    mflr r0
; PPC-PWR8-NEXT:    stwu r1, -32(r1)
; PPC-PWR8-NEXT:    stw r0, 36(r1)
; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
; PPC-PWR8-NEXT:    .cfi_offset lr, 4
; PPC-PWR8-NEXT:    mr r4, r3
; PPC-PWR8-NEXT:    addi r5, r1, 16
; PPC-PWR8-NEXT:    li r3, 16
; PPC-PWR8-NEXT:    li r6, 0
; PPC-PWR8-NEXT:    bl __atomic_load
; PPC-PWR8-NEXT:    lwz r6, 28(r1)
; PPC-PWR8-NEXT:    lwz r5, 24(r1)
; PPC-PWR8-NEXT:    lwz r4, 20(r1)
; PPC-PWR8-NEXT:    lwz r3, 16(r1)
; PPC-PWR8-NEXT:    lwz r0, 36(r1)
; PPC-PWR8-NEXT:    addi r1, r1, 32
; PPC-PWR8-NEXT:    mtlr r0
; PPC-PWR8-NEXT:    blr
entry:
  %0 = load atomic i128, ptr %src monotonic, align 16
  ret i128 %0
}

define dso_local i128 @lq_acquire(ptr %src) {
; P8-LABEL: lq_acquire:
; P8:       # %bb.0: # %entry
; P8-NEXT:    lq r4, 0(r3)
; P8-NEXT:    cmpd cr7, r5, r5
; P8-NEXT:    mr r3, r4
; P8-NEXT:    mr r4, r5
; P8-NEXT:    bne- cr7, .+4
; P8-NEXT:    isync
; P8-NEXT:    blr
;
; PWR7-LABEL: lq_acquire:
; PWR7:       # %bb.0: # %entry
; PWR7-NEXT:    mflr r0
; PWR7-NEXT:    stdu r1, -112(r1)
; PWR7-NEXT:    std r0, 128(r1)
; PWR7-NEXT:    .cfi_def_cfa_offset 112
; PWR7-NEXT:    .cfi_offset lr, 16
; PWR7-NEXT:    li r4, 2
; PWR7-NEXT:    bl __atomic_load_16
; PWR7-NEXT:    nop
; PWR7-NEXT:    addi r1, r1, 112
; PWR7-NEXT:    ld r0, 16(r1)
; PWR7-NEXT:    mtlr r0
; PWR7-NEXT:    blr
;
; LE-PWR8-LABEL: lq_acquire:
; LE-PWR8:       # %bb.0: # %entry
; LE-PWR8-NEXT:    lq r4, 0(r3)
; LE-PWR8-NEXT:    cmpd cr7, r5, r5
; LE-PWR8-NEXT:    mr r3, r5
; LE-PWR8-NEXT:    bne- cr7, .+4
; LE-PWR8-NEXT:    isync
; LE-PWR8-NEXT:    blr
;
; AIX64-PWR8-LABEL: lq_acquire:
; AIX64-PWR8:       # %bb.0: # %entry
; AIX64-PWR8-NEXT:    lq r4, 0(r3)
; AIX64-PWR8-NEXT:    cmpd cr7, r5, r5
; AIX64-PWR8-NEXT:    mr r3, r4
; AIX64-PWR8-NEXT:    mr r4, r5
; AIX64-PWR8-NEXT:    bne- cr7, $+4
; AIX64-PWR8-NEXT:    isync
; AIX64-PWR8-NEXT:    blr
;
; PPC-PWR8-LABEL: lq_acquire:
; PPC-PWR8:       # %bb.0: # %entry
; PPC-PWR8-NEXT:    mflr r0
; PPC-PWR8-NEXT:    stwu r1, -32(r1)
; PPC-PWR8-NEXT:    stw r0, 36(r1)
; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
; PPC-PWR8-NEXT:    .cfi_offset lr, 4
; PPC-PWR8-NEXT:    mr r4, r3
; PPC-PWR8-NEXT:    addi r5, r1, 16
; PPC-PWR8-NEXT:    li r3, 16
; PPC-PWR8-NEXT:    li r6, 2
; PPC-PWR8-NEXT:    bl __atomic_load
; PPC-PWR8-NEXT:    lwz r6, 28(r1)
; PPC-PWR8-NEXT:    lwz r5, 24(r1)
; PPC-PWR8-NEXT:    lwz r4, 20(r1)
; PPC-PWR8-NEXT:    lwz r3, 16(r1)
; PPC-PWR8-NEXT:    lwz r0, 36(r1)
; PPC-PWR8-NEXT:    addi r1, r1, 32
; PPC-PWR8-NEXT:    mtlr r0
; PPC-PWR8-NEXT:    blr
entry:
  %0 = load atomic i128, ptr %src acquire, align 16
  ret i128 %0
}

define dso_local i128 @lq_seqcst(ptr %src) {
; P8-LABEL: lq_seqcst:
; P8:       # %bb.0: # %entry
; P8-NEXT:    sync
; P8-NEXT:    lq r4, 0(r3)
; P8-NEXT:    cmpd cr7, r5, r5
; P8-NEXT:    mr r3, r4
; P8-NEXT:    bne- cr7, .+4
; P8-NEXT:    isync
; P8-NEXT:    mr r4, r5
; P8-NEXT:    blr
;
; PWR7-LABEL: lq_seqcst:
; PWR7:       # %bb.0: # %entry
; PWR7-NEXT:    mflr r0
; PWR7-NEXT:    stdu r1, -112(r1)
; PWR7-NEXT:    std r0, 128(r1)
; PWR7-NEXT:    .cfi_def_cfa_offset 112
; PWR7-NEXT:    .cfi_offset lr, 16
; PWR7-NEXT:    li r4, 5
; PWR7-NEXT:    bl __atomic_load_16
; PWR7-NEXT:    nop
; PWR7-NEXT:    addi r1, r1, 112
; PWR7-NEXT:    ld r0, 16(r1)
; PWR7-NEXT:    mtlr r0
; PWR7-NEXT:    blr
;
; LE-PWR8-LABEL: lq_seqcst:
; LE-PWR8:       # %bb.0: # %entry
; LE-PWR8-NEXT:    sync
; LE-PWR8-NEXT:    lq r4, 0(r3)
; LE-PWR8-NEXT:    cmpd cr7, r5, r5
; LE-PWR8-NEXT:    mr r3, r5
; LE-PWR8-NEXT:    bne- cr7, .+4
; LE-PWR8-NEXT:    isync
; LE-PWR8-NEXT:    blr
;
; AIX64-PWR8-LABEL: lq_seqcst:
; AIX64-PWR8:       # %bb.0: # %entry
; AIX64-PWR8-NEXT:    sync
; AIX64-PWR8-NEXT:    lq r4, 0(r3)
; AIX64-PWR8-NEXT:    cmpd cr7, r5, r5
; AIX64-PWR8-NEXT:    mr r3, r4
; AIX64-PWR8-NEXT:    bne- cr7, $+4
; AIX64-PWR8-NEXT:    isync
; AIX64-PWR8-NEXT:    mr r4, r5
; AIX64-PWR8-NEXT:    blr
;
; PPC-PWR8-LABEL: lq_seqcst:
; PPC-PWR8:       # %bb.0: # %entry
; PPC-PWR8-NEXT:    mflr r0
; PPC-PWR8-NEXT:    stwu r1, -32(r1)
; PPC-PWR8-NEXT:    stw r0, 36(r1)
; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
; PPC-PWR8-NEXT:    .cfi_offset lr, 4
; PPC-PWR8-NEXT:    mr r4, r3
; PPC-PWR8-NEXT:    addi r5, r1, 16
; PPC-PWR8-NEXT:    li r3, 16
; PPC-PWR8-NEXT:    li r6, 5
; PPC-PWR8-NEXT:    bl __atomic_load
; PPC-PWR8-NEXT:    lwz r6, 28(r1)
; PPC-PWR8-NEXT:    lwz r5, 24(r1)
; PPC-PWR8-NEXT:    lwz r4, 20(r1)
; PPC-PWR8-NEXT:    lwz r3, 16(r1)
; PPC-PWR8-NEXT:    lwz r0, 36(r1)
; PPC-PWR8-NEXT:    addi r1, r1, 32
; PPC-PWR8-NEXT:    mtlr r0
; PPC-PWR8-NEXT:    blr
entry:
  %0 = load atomic i128, ptr %src seq_cst, align 16
  ret i128 %0
}

define dso_local void @stq_unordered(i128 %val, ptr %dst) {
; P8-LABEL: stq_unordered:
; P8:       # %bb.0: # %entry
; P8-NEXT:    mr r7, r4
; P8-NEXT:    mr r6, r3
; P8-NEXT:    stq r6, 0(r5)
; P8-NEXT:    blr
;
; PWR7-LABEL: stq_unordered:
; PWR7:       # %bb.0: # %entry
; PWR7-NEXT:    mflr r0
; PWR7-NEXT:    stdu r1, -112(r1)
; PWR7-NEXT:    std r0, 128(r1)
; PWR7-NEXT:    .cfi_def_cfa_offset 112
; PWR7-NEXT:    .cfi_offset lr, 16
; PWR7-NEXT:    mr r6, r4
; PWR7-NEXT:    mr r4, r3
; PWR7-NEXT:    mr r3, r5
; PWR7-NEXT:    mr r5, r6
; PWR7-NEXT:    li r6, 0
; PWR7-NEXT:    bl __atomic_store_16
; PWR7-NEXT:    nop
; PWR7-NEXT:    addi r1, r1, 112
; PWR7-NEXT:    ld r0, 16(r1)
; PWR7-NEXT:    mtlr r0
; PWR7-NEXT:    blr
;
; LE-PWR8-LABEL: stq_unordered:
; LE-PWR8:       # %bb.0: # %entry
; LE-PWR8-NEXT:    mr r7, r3
; LE-PWR8-NEXT:    mr r6, r4
; LE-PWR8-NEXT:    stq r6, 0(r5)
; LE-PWR8-NEXT:    blr
;
; AIX64-PWR8-LABEL: stq_unordered:
; AIX64-PWR8:       # %bb.0: # %entry
; AIX64-PWR8-NEXT:    mr r7, r4
; AIX64-PWR8-NEXT:    mr r6, r3
; AIX64-PWR8-NEXT:    stq r6, 0(r5)
; AIX64-PWR8-NEXT:    blr
;
; PPC-PWR8-LABEL: stq_unordered:
; PPC-PWR8:       # %bb.0: # %entry
; PPC-PWR8-NEXT:    mflr r0
; PPC-PWR8-NEXT:    stwu r1, -32(r1)
; PPC-PWR8-NEXT:    stw r0, 36(r1)
; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
; PPC-PWR8-NEXT:    .cfi_offset lr, 4
; PPC-PWR8-NEXT:    stw r6, 28(r1)
; PPC-PWR8-NEXT:    stw r5, 24(r1)
; PPC-PWR8-NEXT:    addi r5, r1, 16
; PPC-PWR8-NEXT:    li r6, 0
; PPC-PWR8-NEXT:    stw r4, 20(r1)
; PPC-PWR8-NEXT:    stw r3, 16(r1)
; PPC-PWR8-NEXT:    li r3, 16
; PPC-PWR8-NEXT:    mr r4, r7
; PPC-PWR8-NEXT:    bl __atomic_store
; PPC-PWR8-NEXT:    lwz r0, 36(r1)
; PPC-PWR8-NEXT:    addi r1, r1, 32
; PPC-PWR8-NEXT:    mtlr r0
; PPC-PWR8-NEXT:    blr
entry:
  store atomic i128 %val, ptr %dst unordered, align 16
  ret void
}

define dso_local void @stqx_unordered(i128 %val, ptr %dst, i64 %idx) {
; P8-LABEL: stqx_unordered:
; P8:       # %bb.0: # %entry
; P8-NEXT:    sldi r6, r6, 4
; P8-NEXT:    mr r9, r4
; P8-NEXT:    mr r8, r3
; P8-NEXT:    add r3, r5, r6
; P8-NEXT:    stq r8, 0(r3)
; P8-NEXT:    blr
;
; PWR7-LABEL: stqx_unordered:
; PWR7:       # %bb.0: # %entry
; PWR7-NEXT:    mflr r0
; PWR7-NEXT:    stdu r1, -112(r1)
; PWR7-NEXT:    std r0, 128(r1)
; PWR7-NEXT:    .cfi_def_cfa_offset 112
; PWR7-NEXT:    .cfi_offset lr, 16
; PWR7-NEXT:    mr r7, r4
; PWR7-NEXT:    mr r4, r3
; PWR7-NEXT:    sldi r3, r6, 4
; PWR7-NEXT:    li r6, 0
; PWR7-NEXT:    add r3, r5, r3
; PWR7-NEXT:    mr r5, r7
; PWR7-NEXT:    bl __atomic_store_16
; PWR7-NEXT:    nop
; PWR7-NEXT:    addi r1, r1, 112
; PWR7-NEXT:    ld r0, 16(r1)
; PWR7-NEXT:    mtlr r0
; PWR7-NEXT:    blr
;
; LE-PWR8-LABEL: stqx_unordered:
; LE-PWR8:       # %bb.0: # %entry
; LE-PWR8-NEXT:    sldi r6, r6, 4
; LE-PWR8-NEXT:    mr r9, r3
; LE-PWR8-NEXT:    mr r8, r4
; LE-PWR8-NEXT:    add r3, r5, r6
; LE-PWR8-NEXT:    stq r8, 0(r3)
; LE-PWR8-NEXT:    blr
;
; AIX64-PWR8-LABEL: stqx_unordered:
; AIX64-PWR8:       # %bb.0: # %entry
; AIX64-PWR8-NEXT:    sldi r6, r6, 4
; AIX64-PWR8-NEXT:    mr r9, r4
; AIX64-PWR8-NEXT:    mr r8, r3
; AIX64-PWR8-NEXT:    add r3, r5, r6
; AIX64-PWR8-NEXT:    stq r8, 0(r3)
; AIX64-PWR8-NEXT:    blr
;
; PPC-PWR8-LABEL: stqx_unordered:
; PPC-PWR8:       # %bb.0: # %entry
; PPC-PWR8-NEXT:    mflr r0
; PPC-PWR8-NEXT:    stwu r1, -32(r1)
; PPC-PWR8-NEXT:    stw r0, 36(r1)
; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
; PPC-PWR8-NEXT:    .cfi_offset lr, 4
; PPC-PWR8-NEXT:    slwi r8, r10, 4
; PPC-PWR8-NEXT:    stw r6, 28(r1)
; PPC-PWR8-NEXT:    stw r5, 24(r1)
; PPC-PWR8-NEXT:    addi r5, r1, 16
; PPC-PWR8-NEXT:    stw r4, 20(r1)
; PPC-PWR8-NEXT:    stw r3, 16(r1)
; PPC-PWR8-NEXT:    li r3, 16
; PPC-PWR8-NEXT:    add r6, r7, r8
; PPC-PWR8-NEXT:    mr r4, r6
; PPC-PWR8-NEXT:    li r6, 0
; PPC-PWR8-NEXT:    bl __atomic_store
; PPC-PWR8-NEXT:    lwz r0, 36(r1)
; PPC-PWR8-NEXT:    addi r1, r1, 32
; PPC-PWR8-NEXT:    mtlr r0
; PPC-PWR8-NEXT:    blr
entry:
  %0 = getelementptr i128, ptr %dst, i64 %idx
  store atomic i128 %val, ptr %0 unordered, align 16
  ret void
}

define dso_local void @stq_big_offset_unordered(i128 %val, ptr %dst) {
; P8-LABEL: stq_big_offset_unordered:
; P8:       # %bb.0: # %entry
; P8-NEXT:    mr r7, r4
; P8-NEXT:    mr r6, r3
; P8-NEXT:    lis r3, 32
; P8-NEXT:    add r3, r5, r3
; P8-NEXT:    stq r6, 0(r3)
; P8-NEXT:    blr
;
; PWR7-LABEL: stq_big_offset_unordered:
; PWR7:       # %bb.0: # %entry
; PWR7-NEXT:    mflr r0
; PWR7-NEXT:    stdu r1, -112(r1)
; PWR7-NEXT:    std r0, 128(r1)
; PWR7-NEXT:    .cfi_def_cfa_offset 112
; PWR7-NEXT:    .cfi_offset lr, 16
; PWR7-NEXT:    mr r6, r4
; PWR7-NEXT:    mr r4, r3
; PWR7-NEXT:    addis r3, r5, 32
; PWR7-NEXT:    mr r5, r6
; PWR7-NEXT:    li r6, 0
; PWR7-NEXT:    bl __atomic_store_16
; PWR7-NEXT:    nop
; PWR7-NEXT:    addi r1, r1, 112
; PWR7-NEXT:    ld r0, 16(r1)
; PWR7-NEXT:    mtlr r0
; PWR7-NEXT:    blr
;
; LE-PWR8-LABEL: stq_big_offset_unordered:
; LE-PWR8:       # %bb.0: # %entry
; LE-PWR8-NEXT:    mr r7, r3
; LE-PWR8-NEXT:    mr r6, r4
; LE-PWR8-NEXT:    lis r3, 32
; LE-PWR8-NEXT:    add r3, r5, r3
; LE-PWR8-NEXT:    stq r6, 0(r3)
; LE-PWR8-NEXT:    blr
;
; AIX64-PWR8-LABEL: stq_big_offset_unordered:
; AIX64-PWR8:       # %bb.0: # %entry
; AIX64-PWR8-NEXT:    mr r7, r4
; AIX64-PWR8-NEXT:    mr r6, r3
; AIX64-PWR8-NEXT:    lis r3, 32
; AIX64-PWR8-NEXT:    add r3, r5, r3
; AIX64-PWR8-NEXT:    stq r6, 0(r3)
; AIX64-PWR8-NEXT:    blr
;
; PPC-PWR8-LABEL: stq_big_offset_unordered:
; PPC-PWR8:       # %bb.0: # %entry
; PPC-PWR8-NEXT:    mflr r0
; PPC-PWR8-NEXT:    stwu r1, -32(r1)
; PPC-PWR8-NEXT:    stw r0, 36(r1)
; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
; PPC-PWR8-NEXT:    .cfi_offset lr, 4
; PPC-PWR8-NEXT:    stw r6, 28(r1)
; PPC-PWR8-NEXT:    addis r6, r7, 32
; PPC-PWR8-NEXT:    stw r5, 24(r1)
; PPC-PWR8-NEXT:    addi r5, r1, 16
; PPC-PWR8-NEXT:    stw r4, 20(r1)
; PPC-PWR8-NEXT:    stw r3, 16(r1)
; PPC-PWR8-NEXT:    li r3, 16
; PPC-PWR8-NEXT:    mr r4, r6
; PPC-PWR8-NEXT:    li r6, 0
; PPC-PWR8-NEXT:    bl __atomic_store
; PPC-PWR8-NEXT:    lwz r0, 36(r1)
; PPC-PWR8-NEXT:    addi r1, r1, 32
; PPC-PWR8-NEXT:    mtlr r0
; PPC-PWR8-NEXT:    blr
entry:
  %0 = getelementptr i128, ptr %dst, i64 131072
  store atomic i128 %val, ptr %0 unordered, align 16
  ret void
}

define dso_local void @stq_monotonic(i128 %val, ptr %dst) {
; P8-LABEL: stq_monotonic:
; P8:       # %bb.0: # %entry
; P8-NEXT:    mr r7, r4
; P8-NEXT:    mr r6, r3
; P8-NEXT:    stq r6, 0(r5)
; P8-NEXT:    blr
;
; PWR7-LABEL: stq_monotonic:
; PWR7:       # %bb.0: # %entry
; PWR7-NEXT:    mflr r0
; PWR7-NEXT:    stdu r1, -112(r1)
; PWR7-NEXT:    std r0, 128(r1)
; PWR7-NEXT:    .cfi_def_cfa_offset 112
; PWR7-NEXT:    .cfi_offset lr, 16
; PWR7-NEXT:    mr r6, r4
; PWR7-NEXT:    mr r4, r3
; PWR7-NEXT:    mr r3, r5
; PWR7-NEXT:    mr r5, r6
; PWR7-NEXT:    li r6, 0
; PWR7-NEXT:    bl __atomic_store_16
; PWR7-NEXT:    nop
; PWR7-NEXT:    addi r1, r1, 112
; PWR7-NEXT:    ld r0, 16(r1)
; PWR7-NEXT:    mtlr r0
; PWR7-NEXT:    blr
;
; LE-PWR8-LABEL: stq_monotonic:
; LE-PWR8:       # %bb.0: # %entry
; LE-PWR8-NEXT:    mr r7, r3
; LE-PWR8-NEXT:    mr r6, r4
; LE-PWR8-NEXT:    stq r6, 0(r5)
; LE-PWR8-NEXT:    blr
;
; AIX64-PWR8-LABEL: stq_monotonic:
; AIX64-PWR8:       # %bb.0: # %entry
; AIX64-PWR8-NEXT:    mr r7, r4
; AIX64-PWR8-NEXT:    mr r6, r3
; AIX64-PWR8-NEXT:    stq r6, 0(r5)
; AIX64-PWR8-NEXT:    blr
;
; PPC-PWR8-LABEL: stq_monotonic:
; PPC-PWR8:       # %bb.0: # %entry
; PPC-PWR8-NEXT:    mflr r0
; PPC-PWR8-NEXT:    stwu r1, -32(r1)
; PPC-PWR8-NEXT:    stw r0, 36(r1)
; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
; PPC-PWR8-NEXT:    .cfi_offset lr, 4
; PPC-PWR8-NEXT:    stw r6, 28(r1)
; PPC-PWR8-NEXT:    stw r5, 24(r1)
; PPC-PWR8-NEXT:    addi r5, r1, 16
; PPC-PWR8-NEXT:    li r6, 0
; PPC-PWR8-NEXT:    stw r4, 20(r1)
; PPC-PWR8-NEXT:    stw r3, 16(r1)
; PPC-PWR8-NEXT:    li r3, 16
; PPC-PWR8-NEXT:    mr r4, r7
; PPC-PWR8-NEXT:    bl __atomic_store
; PPC-PWR8-NEXT:    lwz r0, 36(r1)
; PPC-PWR8-NEXT:    addi r1, r1, 32
; PPC-PWR8-NEXT:    mtlr r0
; PPC-PWR8-NEXT:    blr
entry:
  store atomic i128 %val, ptr %dst monotonic, align 16
  ret void
}

define dso_local void @stq_release(i128 %val, ptr %dst) {
; P8-LABEL: stq_release:
; P8:       # %bb.0: # %entry
; P8-NEXT:    lwsync
; P8-NEXT:    mr r7, r4
; P8-NEXT:    mr r6, r3
; P8-NEXT:    stq r6, 0(r5)
; P8-NEXT:    blr
;
; PWR7-LABEL: stq_release:
; PWR7:       # %bb.0: # %entry
; PWR7-NEXT:    mflr r0
; PWR7-NEXT:    stdu r1, -112(r1)
; PWR7-NEXT:    std r0, 128(r1)
; PWR7-NEXT:    .cfi_def_cfa_offset 112
; PWR7-NEXT:    .cfi_offset lr, 16
; PWR7-NEXT:    mr r6, r4
; PWR7-NEXT:    mr r4, r3
; PWR7-NEXT:    mr r3, r5
; PWR7-NEXT:    mr r5, r6
; PWR7-NEXT:    li r6, 3
; PWR7-NEXT:    bl __atomic_store_16
; PWR7-NEXT:    nop
; PWR7-NEXT:    addi r1, r1, 112
; PWR7-NEXT:    ld r0, 16(r1)
; PWR7-NEXT:    mtlr r0
; PWR7-NEXT:    blr
;
; LE-PWR8-LABEL: stq_release:
; LE-PWR8:       # %bb.0: # %entry
; LE-PWR8-NEXT:    lwsync
; LE-PWR8-NEXT:    mr r7, r3
; LE-PWR8-NEXT:    mr r6, r4
; LE-PWR8-NEXT:    stq r6, 0(r5)
; LE-PWR8-NEXT:    blr
;
; AIX64-PWR8-LABEL: stq_release:
; AIX64-PWR8:       # %bb.0: # %entry
; AIX64-PWR8-NEXT:    lwsync
; AIX64-PWR8-NEXT:    mr r7, r4
; AIX64-PWR8-NEXT:    mr r6, r3
; AIX64-PWR8-NEXT:    stq r6, 0(r5)
; AIX64-PWR8-NEXT:    blr
;
; PPC-PWR8-LABEL: stq_release:
; PPC-PWR8:       # %bb.0: # %entry
; PPC-PWR8-NEXT:    mflr r0
; PPC-PWR8-NEXT:    stwu r1, -32(r1)
; PPC-PWR8-NEXT:    stw r0, 36(r1)
; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
; PPC-PWR8-NEXT:    .cfi_offset lr, 4
; PPC-PWR8-NEXT:    stw r6, 28(r1)
; PPC-PWR8-NEXT:    stw r5, 24(r1)
; PPC-PWR8-NEXT:    addi r5, r1, 16
; PPC-PWR8-NEXT:    li r6, 3
; PPC-PWR8-NEXT:    stw r4, 20(r1)
; PPC-PWR8-NEXT:    stw r3, 16(r1)
; PPC-PWR8-NEXT:    li r3, 16
; PPC-PWR8-NEXT:    mr r4, r7
; PPC-PWR8-NEXT:    bl __atomic_store
; PPC-PWR8-NEXT:    lwz r0, 36(r1)
; PPC-PWR8-NEXT:    addi r1, r1, 32
; PPC-PWR8-NEXT:    mtlr r0
; PPC-PWR8-NEXT:    blr
entry:
  store atomic i128 %val, ptr %dst release, align 16
  ret void
}

define dso_local void @stq_seqcst(i128 %val, ptr %dst) {
; P8-LABEL: stq_seqcst:
; P8:       # %bb.0: # %entry
; P8-NEXT:    sync
; P8-NEXT:    mr r7, r4
; P8-NEXT:    mr r6, r3
; P8-NEXT:    stq r6, 0(r5)
; P8-NEXT:    blr
;
; PWR7-LABEL: stq_seqcst:
; PWR7:       # %bb.0: # %entry
; PWR7-NEXT:    mflr r0
; PWR7-NEXT:    stdu r1, -112(r1)
; PWR7-NEXT:    std r0, 128(r1)
; PWR7-NEXT:    .cfi_def_cfa_offset 112
; PWR7-NEXT:    .cfi_offset lr, 16
; PWR7-NEXT:    mr r6, r4
; PWR7-NEXT:    mr r4, r3
; PWR7-NEXT:    mr r3, r5
; PWR7-NEXT:    mr r5, r6
; PWR7-NEXT:    li r6, 5
; PWR7-NEXT:    bl __atomic_store_16
; PWR7-NEXT:    nop
; PWR7-NEXT:    addi r1, r1, 112
; PWR7-NEXT:    ld r0, 16(r1)
; PWR7-NEXT:    mtlr r0
; PWR7-NEXT:    blr
;
; LE-PWR8-LABEL: stq_seqcst:
; LE-PWR8:       # %bb.0: # %entry
; LE-PWR8-NEXT:    sync
; LE-PWR8-NEXT:    mr r7, r3
; LE-PWR8-NEXT:    mr r6, r4
; LE-PWR8-NEXT:    stq r6, 0(r5)
; LE-PWR8-NEXT:    blr
;
; AIX64-PWR8-LABEL: stq_seqcst:
; AIX64-PWR8:       # %bb.0: # %entry
; AIX64-PWR8-NEXT:    sync
; AIX64-PWR8-NEXT:    mr r7, r4
; AIX64-PWR8-NEXT:    mr r6, r3
; AIX64-PWR8-NEXT:    stq r6, 0(r5)
; AIX64-PWR8-NEXT:    blr
;
; PPC-PWR8-LABEL: stq_seqcst:
; PPC-PWR8:       # %bb.0: # %entry
; PPC-PWR8-NEXT:    mflr r0
; PPC-PWR8-NEXT:    stwu r1, -32(r1)
; PPC-PWR8-NEXT:    stw r0, 36(r1)
; PPC-PWR8-NEXT:    .cfi_def_cfa_offset 32
; PPC-PWR8-NEXT:    .cfi_offset lr, 4
; PPC-PWR8-NEXT:    stw r6, 28(r1)
; PPC-PWR8-NEXT:    stw r5, 24(r1)
; PPC-PWR8-NEXT:    addi r5, r1, 16
; PPC-PWR8-NEXT:    li r6, 5
; PPC-PWR8-NEXT:    stw r4, 20(r1)
; PPC-PWR8-NEXT:    stw r3, 16(r1)
; PPC-PWR8-NEXT:    li r3, 16
; PPC-PWR8-NEXT:    mr r4, r7
; PPC-PWR8-NEXT:    bl __atomic_store
; PPC-PWR8-NEXT:    lwz r0, 36(r1)
; PPC-PWR8-NEXT:    addi r1, r1, 32
; PPC-PWR8-NEXT:    mtlr r0
; PPC-PWR8-NEXT:    blr
entry:
  store atomic i128 %val, ptr %dst seq_cst, align 16
  ret void
}