llvm/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-NOLSE,CHECK-NOLSE-O1
; RUN: llc < %s -mtriple=arm64-apple-ios -mattr=+outline-atomics -global-isel -global-isel-abort=1 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-OUTLINE,CHECK-OUTLINE-O1
; RUN: llc < %s -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -O0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-NOLSE,CHECK-NOLSE-O0
; RUN: llc < %s -mtriple=arm64-apple-ios -mattr=+outline-atomics -global-isel -global-isel-abort=1 -O0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-OUTLINE,CHECK-OUTLINE-O0
; RUN: llc < %s -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -mcpu=apple-a13 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-LSE-O1
; RUN: llc < %s -mtriple=arm64-apple-ios -mattr=+outline-atomics -global-isel -global-isel-abort=1 -mcpu=apple-a13 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-LSE-O1
; RUN: llc < %s -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -mcpu=apple-a13 -O0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-LSE-O0
; RUN: llc < %s -mtriple=arm64-apple-ios -mattr=+outline-atomics -global-isel -global-isel-abort=1 -mcpu=apple-a13 -O0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-LSE-O0

define i32 @val_compare_and_swap(ptr %p, i32 %cmp, i32 %new) #0 {
; CHECK-NOLSE-O1-LABEL: val_compare_and_swap:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB0_1: ; %cmpxchg.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldaxr w8, [x0]
; CHECK-NOLSE-O1-NEXT:    cmp w8, w1
; CHECK-NOLSE-O1-NEXT:    b.ne LBB0_4
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %cmpxchg.trystore
; CHECK-NOLSE-O1-NEXT:    ; in Loop: Header=BB0_1 Depth=1
; CHECK-NOLSE-O1-NEXT:    stxr w9, w2, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB0_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.3: ; %cmpxchg.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
; CHECK-NOLSE-O1-NEXT:  LBB0_4: ; %cmpxchg.nostore
; CHECK-NOLSE-O1-NEXT:    clrex
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: val_compare_and_swap:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    mov x3, x0
; CHECK-OUTLINE-O1-NEXT:    mov w0, w1
; CHECK-OUTLINE-O1-NEXT:    mov w1, w2
; CHECK-OUTLINE-O1-NEXT:    mov x2, x3
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_cas4_acq
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: val_compare_and_swap:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    mov x9, x0
; CHECK-NOLSE-O0-NEXT:  LBB0_1: ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ldaxr w0, [x9]
; CHECK-NOLSE-O0-NEXT:    cmp w0, w1
; CHECK-NOLSE-O0-NEXT:    b.ne LBB0_3
; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB0_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    stlxr w8, w2, [x9]
; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB0_1
; CHECK-NOLSE-O0-NEXT:  LBB0_3:
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: val_compare_and_swap:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov w0, w1
; CHECK-OUTLINE-O0-NEXT:    mov w1, w2
; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas4_acq
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: val_compare_and_swap:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    casa w1, w2, [x0]
; CHECK-LSE-O1-NEXT:    mov x0, x1
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: val_compare_and_swap:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    mov x8, x0
; CHECK-LSE-O0-NEXT:    mov x0, x1
; CHECK-LSE-O0-NEXT:    casa w0, w2, [x8]
; CHECK-LSE-O0-NEXT:    ret
  %pair = cmpxchg ptr %p, i32 %cmp, i32 %new acquire acquire
  %val = extractvalue { i32, i1 } %pair, 0
  ret i32 %val
}

define i32 @val_compare_and_swap_from_load(ptr %p, i32 %cmp, ptr %pnew) #0 {
; CHECK-NOLSE-O1-LABEL: val_compare_and_swap_from_load:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:    ldr w9, [x2]
; CHECK-NOLSE-O1-NEXT:  LBB1_1: ; %cmpxchg.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldaxr w8, [x0]
; CHECK-NOLSE-O1-NEXT:    cmp w8, w1
; CHECK-NOLSE-O1-NEXT:    b.ne LBB1_4
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %cmpxchg.trystore
; CHECK-NOLSE-O1-NEXT:    ; in Loop: Header=BB1_1 Depth=1
; CHECK-NOLSE-O1-NEXT:    stxr w10, w9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB1_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.3: ; %cmpxchg.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
; CHECK-NOLSE-O1-NEXT:  LBB1_4: ; %cmpxchg.nostore
; CHECK-NOLSE-O1-NEXT:    clrex
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: val_compare_and_swap_from_load:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    ldr w8, [x2]
; CHECK-OUTLINE-O1-NEXT:    mov x3, x0
; CHECK-OUTLINE-O1-NEXT:    mov w0, w1
; CHECK-OUTLINE-O1-NEXT:    mov x2, x3
; CHECK-OUTLINE-O1-NEXT:    mov w1, w8
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_cas4_acq
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: val_compare_and_swap_from_load:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    mov x9, x0
; CHECK-NOLSE-O0-NEXT:    ldr w10, [x2]
; CHECK-NOLSE-O0-NEXT:  LBB1_1: ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ldaxr w0, [x9]
; CHECK-NOLSE-O0-NEXT:    cmp w0, w1
; CHECK-NOLSE-O0-NEXT:    b.ne LBB1_3
; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB1_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    stlxr w8, w10, [x9]
; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB1_1
; CHECK-NOLSE-O0-NEXT:  LBB1_3:
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: val_compare_and_swap_from_load:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov w0, w1
; CHECK-OUTLINE-O0-NEXT:    mov x8, x2
; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr w1, [x8]
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas4_acq
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: val_compare_and_swap_from_load:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldr w8, [x2]
; CHECK-LSE-O1-NEXT:    casa w1, w8, [x0]
; CHECK-LSE-O1-NEXT:    mov x0, x1
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: val_compare_and_swap_from_load:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    mov x9, x0
; CHECK-LSE-O0-NEXT:    mov x0, x1
; CHECK-LSE-O0-NEXT:    ldr w8, [x2]
; CHECK-LSE-O0-NEXT:    casa w0, w8, [x9]
; CHECK-LSE-O0-NEXT:    ret
  %new = load i32, ptr %pnew
  %pair = cmpxchg ptr %p, i32 %cmp, i32 %new acquire acquire
  %val = extractvalue { i32, i1 } %pair, 0
  ret i32 %val
}

define i32 @val_compare_and_swap_rel(ptr %p, i32 %cmp, i32 %new) #0 {
; CHECK-NOLSE-O1-LABEL: val_compare_and_swap_rel:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB2_1: ; %cmpxchg.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldaxr w8, [x0]
; CHECK-NOLSE-O1-NEXT:    cmp w8, w1
; CHECK-NOLSE-O1-NEXT:    b.ne LBB2_4
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %cmpxchg.trystore
; CHECK-NOLSE-O1-NEXT:    ; in Loop: Header=BB2_1 Depth=1
; CHECK-NOLSE-O1-NEXT:    stlxr w9, w2, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB2_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.3: ; %cmpxchg.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
; CHECK-NOLSE-O1-NEXT:  LBB2_4: ; %cmpxchg.nostore
; CHECK-NOLSE-O1-NEXT:    clrex
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: val_compare_and_swap_rel:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    mov x3, x0
; CHECK-OUTLINE-O1-NEXT:    mov w0, w1
; CHECK-OUTLINE-O1-NEXT:    mov w1, w2
; CHECK-OUTLINE-O1-NEXT:    mov x2, x3
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_cas4_acq_rel
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: val_compare_and_swap_rel:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    mov x9, x0
; CHECK-NOLSE-O0-NEXT:  LBB2_1: ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ldaxr w0, [x9]
; CHECK-NOLSE-O0-NEXT:    cmp w0, w1
; CHECK-NOLSE-O0-NEXT:    b.ne LBB2_3
; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB2_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    stlxr w8, w2, [x9]
; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB2_1
; CHECK-NOLSE-O0-NEXT:  LBB2_3:
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: val_compare_and_swap_rel:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov w0, w1
; CHECK-OUTLINE-O0-NEXT:    mov w1, w2
; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas4_acq_rel
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: val_compare_and_swap_rel:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    casal w1, w2, [x0]
; CHECK-LSE-O1-NEXT:    mov x0, x1
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: val_compare_and_swap_rel:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    mov x8, x0
; CHECK-LSE-O0-NEXT:    mov x0, x1
; CHECK-LSE-O0-NEXT:    casal w0, w2, [x8]
; CHECK-LSE-O0-NEXT:    ret
  %pair = cmpxchg ptr %p, i32 %cmp, i32 %new acq_rel monotonic
  %val = extractvalue { i32, i1 } %pair, 0
  ret i32 %val
}

define i64 @val_compare_and_swap_64(ptr %p, i64 %cmp, i64 %new) #0 {
; CHECK-NOLSE-O1-LABEL: val_compare_and_swap_64:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB3_1: ; %cmpxchg.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldxr x8, [x0]
; CHECK-NOLSE-O1-NEXT:    cmp x8, x1
; CHECK-NOLSE-O1-NEXT:    b.ne LBB3_4
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %cmpxchg.trystore
; CHECK-NOLSE-O1-NEXT:    ; in Loop: Header=BB3_1 Depth=1
; CHECK-NOLSE-O1-NEXT:    stxr w9, x2, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB3_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.3: ; %cmpxchg.end
; CHECK-NOLSE-O1-NEXT:    mov x0, x8
; CHECK-NOLSE-O1-NEXT:    ret
; CHECK-NOLSE-O1-NEXT:  LBB3_4: ; %cmpxchg.nostore
; CHECK-NOLSE-O1-NEXT:    clrex
; CHECK-NOLSE-O1-NEXT:    mov x0, x8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: val_compare_and_swap_64:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    mov x3, x0
; CHECK-OUTLINE-O1-NEXT:    mov x0, x1
; CHECK-OUTLINE-O1-NEXT:    mov x1, x2
; CHECK-OUTLINE-O1-NEXT:    mov x2, x3
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_cas8_relax
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: val_compare_and_swap_64:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    mov x9, x0
; CHECK-NOLSE-O0-NEXT:  LBB3_1: ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ldaxr x0, [x9]
; CHECK-NOLSE-O0-NEXT:    cmp x0, x1
; CHECK-NOLSE-O0-NEXT:    b.ne LBB3_3
; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB3_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    stlxr w8, x2, [x9]
; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB3_1
; CHECK-NOLSE-O0-NEXT:  LBB3_3:
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: val_compare_and_swap_64:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov x0, x1
; CHECK-OUTLINE-O0-NEXT:    mov x1, x2
; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas8_relax
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: val_compare_and_swap_64:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    cas x1, x2, [x0]
; CHECK-LSE-O1-NEXT:    mov x0, x1
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: val_compare_and_swap_64:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    mov x8, x0
; CHECK-LSE-O0-NEXT:    mov x0, x1
; CHECK-LSE-O0-NEXT:    cas x0, x2, [x8]
; CHECK-LSE-O0-NEXT:    ret
  %pair = cmpxchg ptr %p, i64 %cmp, i64 %new monotonic monotonic
  %val = extractvalue { i64, i1 } %pair, 0
  ret i64 %val
}

define i64 @val_compare_and_swap_64_monotonic_seqcst(ptr %p, i64 %cmp, i64 %new) #0 {
; CHECK-NOLSE-O1-LABEL: val_compare_and_swap_64_monotonic_seqcst:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB4_1: ; %cmpxchg.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldaxr x8, [x0]
; CHECK-NOLSE-O1-NEXT:    cmp x8, x1
; CHECK-NOLSE-O1-NEXT:    b.ne LBB4_4
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %cmpxchg.trystore
; CHECK-NOLSE-O1-NEXT:    ; in Loop: Header=BB4_1 Depth=1
; CHECK-NOLSE-O1-NEXT:    stlxr w9, x2, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB4_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.3: ; %cmpxchg.end
; CHECK-NOLSE-O1-NEXT:    mov x0, x8
; CHECK-NOLSE-O1-NEXT:    ret
; CHECK-NOLSE-O1-NEXT:  LBB4_4: ; %cmpxchg.nostore
; CHECK-NOLSE-O1-NEXT:    clrex
; CHECK-NOLSE-O1-NEXT:    mov x0, x8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: val_compare_and_swap_64_monotonic_seqcst:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    mov x3, x0
; CHECK-OUTLINE-O1-NEXT:    mov x0, x1
; CHECK-OUTLINE-O1-NEXT:    mov x1, x2
; CHECK-OUTLINE-O1-NEXT:    mov x2, x3
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_cas8_acq_rel
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: val_compare_and_swap_64_monotonic_seqcst:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    mov x9, x0
; CHECK-NOLSE-O0-NEXT:  LBB4_1: ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ldaxr x0, [x9]
; CHECK-NOLSE-O0-NEXT:    cmp x0, x1
; CHECK-NOLSE-O0-NEXT:    b.ne LBB4_3
; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB4_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    stlxr w8, x2, [x9]
; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB4_1
; CHECK-NOLSE-O0-NEXT:  LBB4_3:
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: val_compare_and_swap_64_monotonic_seqcst:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov x0, x1
; CHECK-OUTLINE-O0-NEXT:    mov x1, x2
; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas8_acq_rel
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: val_compare_and_swap_64_monotonic_seqcst:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    casal x1, x2, [x0]
; CHECK-LSE-O1-NEXT:    mov x0, x1
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: val_compare_and_swap_64_monotonic_seqcst:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    mov x8, x0
; CHECK-LSE-O0-NEXT:    mov x0, x1
; CHECK-LSE-O0-NEXT:    casal x0, x2, [x8]
; CHECK-LSE-O0-NEXT:    ret
  %pair = cmpxchg ptr %p, i64 %cmp, i64 %new monotonic seq_cst
  %val = extractvalue { i64, i1 } %pair, 0
  ret i64 %val
}

define i64 @val_compare_and_swap_64_release_acquire(ptr %p, i64 %cmp, i64 %new) #0 {
; CHECK-NOLSE-O1-LABEL: val_compare_and_swap_64_release_acquire:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB5_1: ; %cmpxchg.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldaxr x8, [x0]
; CHECK-NOLSE-O1-NEXT:    cmp x8, x1
; CHECK-NOLSE-O1-NEXT:    b.ne LBB5_4
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %cmpxchg.trystore
; CHECK-NOLSE-O1-NEXT:    ; in Loop: Header=BB5_1 Depth=1
; CHECK-NOLSE-O1-NEXT:    stlxr w9, x2, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB5_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.3: ; %cmpxchg.end
; CHECK-NOLSE-O1-NEXT:    mov x0, x8
; CHECK-NOLSE-O1-NEXT:    ret
; CHECK-NOLSE-O1-NEXT:  LBB5_4: ; %cmpxchg.nostore
; CHECK-NOLSE-O1-NEXT:    clrex
; CHECK-NOLSE-O1-NEXT:    mov x0, x8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: val_compare_and_swap_64_release_acquire:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    mov x3, x0
; CHECK-OUTLINE-O1-NEXT:    mov x0, x1
; CHECK-OUTLINE-O1-NEXT:    mov x1, x2
; CHECK-OUTLINE-O1-NEXT:    mov x2, x3
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_cas8_acq_rel
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: val_compare_and_swap_64_release_acquire:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    mov x9, x0
; CHECK-NOLSE-O0-NEXT:  LBB5_1: ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ldaxr x0, [x9]
; CHECK-NOLSE-O0-NEXT:    cmp x0, x1
; CHECK-NOLSE-O0-NEXT:    b.ne LBB5_3
; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB5_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    stlxr w8, x2, [x9]
; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB5_1
; CHECK-NOLSE-O0-NEXT:  LBB5_3:
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: val_compare_and_swap_64_release_acquire:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov x0, x1
; CHECK-OUTLINE-O0-NEXT:    mov x1, x2
; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas8_acq_rel
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: val_compare_and_swap_64_release_acquire:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    casal x1, x2, [x0]
; CHECK-LSE-O1-NEXT:    mov x0, x1
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: val_compare_and_swap_64_release_acquire:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    mov x8, x0
; CHECK-LSE-O0-NEXT:    mov x0, x1
; CHECK-LSE-O0-NEXT:    casal x0, x2, [x8]
; CHECK-LSE-O0-NEXT:    ret
  %pair = cmpxchg ptr %p, i64 %cmp, i64 %new release acquire
  %val = extractvalue { i64, i1 } %pair, 0
  ret i64 %val
}

define i32 @fetch_and_nand(ptr %p) #0 {
; CHECK-NOLSE-O1-LABEL: fetch_and_nand:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB6_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldxr w8, [x0]
; CHECK-NOLSE-O1-NEXT:    and w9, w8, #0x7
; CHECK-NOLSE-O1-NEXT:    mvn w9, w9
; CHECK-NOLSE-O1-NEXT:    stlxr w10, w9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB6_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: fetch_and_nand:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:  LBB6_1: ; %atomicrmw.start
; CHECK-OUTLINE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O1-NEXT:    ldxr w8, [x0]
; CHECK-OUTLINE-O1-NEXT:    and w9, w8, #0x7
; CHECK-OUTLINE-O1-NEXT:    mvn w9, w9
; CHECK-OUTLINE-O1-NEXT:    stlxr w10, w9, [x0]
; CHECK-OUTLINE-O1-NEXT:    cbnz w10, LBB6_1
; CHECK-OUTLINE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-OUTLINE-O1-NEXT:    mov w0, w8
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: fetch_and_nand:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldr w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB6_1
; CHECK-NOLSE-O0-NEXT:  LBB6_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB6_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    and w9, w8, #0x7
; CHECK-NOLSE-O0-NEXT:    mvn w12, w9
; CHECK-NOLSE-O0-NEXT:  LBB6_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB6_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
; CHECK-NOLSE-O0-NEXT:    b.ne LBB6_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB6_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxr w10, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB6_2
; CHECK-NOLSE-O0-NEXT:  LBB6_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB6_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    subs w8, w9, w8
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB6_1
; CHECK-NOLSE-O0-NEXT:    b LBB6_5
; CHECK-NOLSE-O0-NEXT:  LBB6_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: fetch_and_nand:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #48
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [x0]
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    b LBB6_1
; CHECK-OUTLINE-O0-NEXT:  LBB6_1: ; %atomicrmw.start
; CHECK-OUTLINE-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [sp, #28] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #16] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #8] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    and w8, w0, #0x7
; CHECK-OUTLINE-O0-NEXT:    mvn w1, w8
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas4_rel
; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #8] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    subs w8, w0, w8
; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    tbz w8, #0, LBB6_1
; CHECK-OUTLINE-O0-NEXT:    b LBB6_2
; CHECK-OUTLINE-O0-NEXT:  LBB6_2: ; %atomicrmw.end
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #48
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: fetch_and_nand:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    mov x8, x0
; CHECK-LSE-O1-NEXT:    ldr w0, [x0]
; CHECK-LSE-O1-NEXT:  LBB6_1: ; %atomicrmw.start
; CHECK-LSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-LSE-O1-NEXT:    mov x9, x0
; CHECK-LSE-O1-NEXT:    and w10, w0, #0x7
; CHECK-LSE-O1-NEXT:    mvn w10, w10
; CHECK-LSE-O1-NEXT:    casl w0, w10, [x8]
; CHECK-LSE-O1-NEXT:    cmp w0, w9
; CHECK-LSE-O1-NEXT:    b.ne LBB6_1
; CHECK-LSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: fetch_and_nand:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    sub sp, sp, #32
; CHECK-LSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-LSE-O0-NEXT:    ldr w8, [x0]
; CHECK-LSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-LSE-O0-NEXT:    b LBB6_1
; CHECK-LSE-O0-NEXT:  LBB6_1: ; %atomicrmw.start
; CHECK-LSE-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-LSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
; CHECK-LSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-LSE-O0-NEXT:    and w9, w8, #0x7
; CHECK-LSE-O0-NEXT:    mvn w10, w9
; CHECK-LSE-O0-NEXT:    mov x9, x8
; CHECK-LSE-O0-NEXT:    casl w9, w10, [x11]
; CHECK-LSE-O0-NEXT:    subs w8, w9, w8
; CHECK-LSE-O0-NEXT:    cset w8, eq
; CHECK-LSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-LSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-LSE-O0-NEXT:    tbz w8, #0, LBB6_1
; CHECK-LSE-O0-NEXT:    b LBB6_2
; CHECK-LSE-O0-NEXT:  LBB6_2: ; %atomicrmw.end
; CHECK-LSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-LSE-O0-NEXT:    add sp, sp, #32
; CHECK-LSE-O0-NEXT:    ret
  %val = atomicrmw nand ptr %p, i32 7 release
  ret i32 %val
}

define i64 @fetch_and_nand_64(ptr %p) #0 {
; CHECK-NOLSE-O1-LABEL: fetch_and_nand_64:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB7_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldaxr x8, [x0]
; CHECK-NOLSE-O1-NEXT:    and x9, x8, #0x7
; CHECK-NOLSE-O1-NEXT:    mvn x9, x9
; CHECK-NOLSE-O1-NEXT:    stlxr w10, x9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB7_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov x0, x8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: fetch_and_nand_64:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:  LBB7_1: ; %atomicrmw.start
; CHECK-OUTLINE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O1-NEXT:    ldaxr x8, [x0]
; CHECK-OUTLINE-O1-NEXT:    and x9, x8, #0x7
; CHECK-OUTLINE-O1-NEXT:    mvn x9, x9
; CHECK-OUTLINE-O1-NEXT:    stlxr w10, x9, [x0]
; CHECK-OUTLINE-O1-NEXT:    cbnz w10, LBB7_1
; CHECK-OUTLINE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-OUTLINE-O1-NEXT:    mov x0, x8
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: fetch_and_nand_64:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldr x8, [x0]
; CHECK-NOLSE-O0-NEXT:    str x8, [sp, #24] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB7_1
; CHECK-NOLSE-O0-NEXT:  LBB7_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB7_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #24] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    and x9, x8, #0x7
; CHECK-NOLSE-O0-NEXT:    mvn x12, x9
; CHECK-NOLSE-O0-NEXT:  LBB7_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB7_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
; CHECK-NOLSE-O0-NEXT:    b.ne LBB7_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB7_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxr w10, x12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB7_2
; CHECK-NOLSE-O0-NEXT:  LBB7_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB7_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    subs x8, x9, x8
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str x9, [sp, #8] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str x9, [sp, #24] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB7_1
; CHECK-NOLSE-O0-NEXT:    b LBB7_5
; CHECK-NOLSE-O0-NEXT:  LBB7_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr x0, [sp, #8] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: fetch_and_nand_64:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #48
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    ldr x0, [x0]
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #24] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    b LBB7_1
; CHECK-OUTLINE-O0-NEXT:  LBB7_1: ; %atomicrmw.start
; CHECK-OUTLINE-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O0-NEXT:    ldr x0, [sp, #24] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #16] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    and x8, x0, #0x7
; CHECK-OUTLINE-O0-NEXT:    mvn x1, x8
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas8_acq_rel
; CHECK-OUTLINE-O0-NEXT:    ldr x8, [sp] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    subs x8, x0, x8
; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #24] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    tbz w8, #0, LBB7_1
; CHECK-OUTLINE-O0-NEXT:    b LBB7_2
; CHECK-OUTLINE-O0-NEXT:  LBB7_2: ; %atomicrmw.end
; CHECK-OUTLINE-O0-NEXT:    ldr x0, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #48
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: fetch_and_nand_64:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    mov x8, x0
; CHECK-LSE-O1-NEXT:    ldr x0, [x0]
; CHECK-LSE-O1-NEXT:  LBB7_1: ; %atomicrmw.start
; CHECK-LSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-LSE-O1-NEXT:    mov x9, x0
; CHECK-LSE-O1-NEXT:    and x10, x0, #0x7
; CHECK-LSE-O1-NEXT:    mvn x10, x10
; CHECK-LSE-O1-NEXT:    casal x0, x10, [x8]
; CHECK-LSE-O1-NEXT:    cmp x0, x9
; CHECK-LSE-O1-NEXT:    b.ne LBB7_1
; CHECK-LSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: fetch_and_nand_64:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    sub sp, sp, #32
; CHECK-LSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-LSE-O0-NEXT:    ldr x8, [x0]
; CHECK-LSE-O0-NEXT:    str x8, [sp, #24] ; 8-byte Folded Spill
; CHECK-LSE-O0-NEXT:    b LBB7_1
; CHECK-LSE-O0-NEXT:  LBB7_1: ; %atomicrmw.start
; CHECK-LSE-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-LSE-O0-NEXT:    ldr x8, [sp, #24] ; 8-byte Folded Reload
; CHECK-LSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-LSE-O0-NEXT:    and x9, x8, #0x7
; CHECK-LSE-O0-NEXT:    mvn x10, x9
; CHECK-LSE-O0-NEXT:    mov x9, x8
; CHECK-LSE-O0-NEXT:    casal x9, x10, [x11]
; CHECK-LSE-O0-NEXT:    subs x8, x9, x8
; CHECK-LSE-O0-NEXT:    cset w8, eq
; CHECK-LSE-O0-NEXT:    str x9, [sp, #8] ; 8-byte Folded Spill
; CHECK-LSE-O0-NEXT:    str x9, [sp, #24] ; 8-byte Folded Spill
; CHECK-LSE-O0-NEXT:    tbz w8, #0, LBB7_1
; CHECK-LSE-O0-NEXT:    b LBB7_2
; CHECK-LSE-O0-NEXT:  LBB7_2: ; %atomicrmw.end
; CHECK-LSE-O0-NEXT:    ldr x0, [sp, #8] ; 8-byte Folded Reload
; CHECK-LSE-O0-NEXT:    add sp, sp, #32
; CHECK-LSE-O0-NEXT:    ret
  %val = atomicrmw nand ptr %p, i64 7 acq_rel
  ret i64 %val
}

define i32 @fetch_and_or(ptr %p) #0 {
; CHECK-NOLSE-O1-LABEL: fetch_and_or:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:    mov w9, #5 ; =0x5
; CHECK-NOLSE-O1-NEXT:  LBB8_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldaxr w8, [x0]
; CHECK-NOLSE-O1-NEXT:    orr w10, w8, w9
; CHECK-NOLSE-O1-NEXT:    stlxr w11, w10, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w11, LBB8_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-LABEL: fetch_and_or:
; CHECK-OUTLINE:       ; %bb.0:
; CHECK-OUTLINE-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-NEXT:    mov x1, x0
; CHECK-OUTLINE-NEXT:    mov w0, #5 ; =0x5
; CHECK-OUTLINE-NEXT:    bl ___aarch64_ldset4_acq_rel
; CHECK-OUTLINE-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: fetch_and_or:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldr w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB8_1
; CHECK-NOLSE-O0-NEXT:  LBB8_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB8_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    mov w9, #5 ; =0x5
; CHECK-NOLSE-O0-NEXT:    orr w12, w8, w9
; CHECK-NOLSE-O0-NEXT:  LBB8_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB8_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
; CHECK-NOLSE-O0-NEXT:    b.ne LBB8_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB8_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxr w10, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB8_2
; CHECK-NOLSE-O0-NEXT:  LBB8_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB8_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    subs w8, w9, w8
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB8_1
; CHECK-NOLSE-O0-NEXT:    b LBB8_5
; CHECK-NOLSE-O0-NEXT:  LBB8_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: fetch_and_or:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    mov w8, #5 ; =0x5
; CHECK-LSE-O1-NEXT:    ldsetal w8, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: fetch_and_or:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    mov w8, #5 ; =0x5
; CHECK-LSE-O0-NEXT:    ldsetal w8, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %val = atomicrmw or ptr %p, i32 5 seq_cst
  ret i32 %val
}

define i64 @fetch_and_or_64(ptr %p) #0 {
; CHECK-NOLSE-O1-LABEL: fetch_and_or_64:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB9_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldxr x8, [x0]
; CHECK-NOLSE-O1-NEXT:    orr x9, x8, #0x7
; CHECK-NOLSE-O1-NEXT:    stxr w10, x9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB9_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov x0, x8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: fetch_and_or_64:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    mov x1, x0
; CHECK-OUTLINE-O1-NEXT:    mov w0, #7 ; =0x7
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_ldset8_relax
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: fetch_and_or_64:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldr x8, [x0]
; CHECK-NOLSE-O0-NEXT:    str x8, [sp, #24] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB9_1
; CHECK-NOLSE-O0-NEXT:  LBB9_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB9_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #24] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    orr x12, x8, #0x7
; CHECK-NOLSE-O0-NEXT:  LBB9_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB9_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
; CHECK-NOLSE-O0-NEXT:    b.ne LBB9_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB9_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxr w10, x12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB9_2
; CHECK-NOLSE-O0-NEXT:  LBB9_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB9_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    subs x8, x9, x8
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str x9, [sp, #8] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str x9, [sp, #24] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB9_1
; CHECK-NOLSE-O0-NEXT:    b LBB9_5
; CHECK-NOLSE-O0-NEXT:  LBB9_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr x0, [sp, #8] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: fetch_and_or_64:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov x1, x0
; CHECK-OUTLINE-O0-NEXT:    mov w8, #7 ; =0x7
; CHECK-OUTLINE-O0-NEXT:    mov w0, w8
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_ldset8_relax
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: fetch_and_or_64:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    mov w8, #7 ; =0x7
; CHECK-LSE-O1-NEXT:    ldset x8, x0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: fetch_and_or_64:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    mov w8, #7 ; =0x7
; CHECK-LSE-O0-NEXT:    ; kill: def $x8 killed $w8
; CHECK-LSE-O0-NEXT:    ldset x8, x0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %val = atomicrmw or ptr %p, i64 7 monotonic
  ret i64 %val
}

define void @acquire_fence() #0 {
; CHECK-NOLSE-LABEL: acquire_fence:
; CHECK-NOLSE:       ; %bb.0:
; CHECK-NOLSE-NEXT:    dmb ishld
; CHECK-NOLSE-NEXT:    ret
;
; CHECK-OUTLINE-LABEL: acquire_fence:
; CHECK-OUTLINE:       ; %bb.0:
; CHECK-OUTLINE-NEXT:    dmb ishld
; CHECK-OUTLINE-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: acquire_fence:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    dmb ishld
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: acquire_fence:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    dmb ishld
; CHECK-LSE-O0-NEXT:    ret
   fence acquire
   ret void
}

define void @release_fence() #0 {
; CHECK-NOLSE-LABEL: release_fence:
; CHECK-NOLSE:       ; %bb.0:
; CHECK-NOLSE-NEXT:    dmb ish
; CHECK-NOLSE-NEXT:    ret
;
; CHECK-OUTLINE-LABEL: release_fence:
; CHECK-OUTLINE:       ; %bb.0:
; CHECK-OUTLINE-NEXT:    dmb ish
; CHECK-OUTLINE-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: release_fence:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    dmb ish
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: release_fence:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    dmb ish
; CHECK-LSE-O0-NEXT:    ret
   fence release
   ret void
}

define void @seq_cst_fence() #0 {
; CHECK-NOLSE-LABEL: seq_cst_fence:
; CHECK-NOLSE:       ; %bb.0:
; CHECK-NOLSE-NEXT:    dmb ish
; CHECK-NOLSE-NEXT:    ret
;
; CHECK-OUTLINE-LABEL: seq_cst_fence:
; CHECK-OUTLINE:       ; %bb.0:
; CHECK-OUTLINE-NEXT:    dmb ish
; CHECK-OUTLINE-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: seq_cst_fence:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    dmb ish
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: seq_cst_fence:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    dmb ish
; CHECK-LSE-O0-NEXT:    ret
   fence seq_cst
   ret void
}

define i32 @atomic_load(ptr %p) #0 {
; CHECK-NOLSE-LABEL: atomic_load:
; CHECK-NOLSE:       ; %bb.0:
; CHECK-NOLSE-NEXT:    ldar w0, [x0]
; CHECK-NOLSE-NEXT:    ret
;
; CHECK-OUTLINE-LABEL: atomic_load:
; CHECK-OUTLINE:       ; %bb.0:
; CHECK-OUTLINE-NEXT:    ldar w0, [x0]
; CHECK-OUTLINE-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomic_load:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldar w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomic_load:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldar w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
   %r = load atomic i32, ptr %p seq_cst, align 4
   ret i32 %r
}

define i8 @atomic_load_relaxed_8(ptr %p, i32 %off32) #0 {
; CHECK-NOLSE-O1-LABEL: atomic_load_relaxed_8:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:    ldrb w8, [x0, #4095]
; CHECK-NOLSE-O1-NEXT:    ldrb w9, [x0, w1, sxtw]
; CHECK-NOLSE-O1-NEXT:    ldurb w10, [x0, #-256]
; CHECK-NOLSE-O1-NEXT:    add w8, w9, w8, uxtb
; CHECK-NOLSE-O1-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
; CHECK-NOLSE-O1-NEXT:    ldrb w9, [x9]
; CHECK-NOLSE-O1-NEXT:    add w8, w8, w10, uxtb
; CHECK-NOLSE-O1-NEXT:    add w0, w8, w9, uxtb
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomic_load_relaxed_8:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    ldrb w8, [x0, #4095]
; CHECK-OUTLINE-O1-NEXT:    ldrb w9, [x0, w1, sxtw]
; CHECK-OUTLINE-O1-NEXT:    ldurb w10, [x0, #-256]
; CHECK-OUTLINE-O1-NEXT:    add w8, w9, w8, uxtb
; CHECK-OUTLINE-O1-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
; CHECK-OUTLINE-O1-NEXT:    ldrb w9, [x9]
; CHECK-OUTLINE-O1-NEXT:    add w8, w8, w10, uxtb
; CHECK-OUTLINE-O1-NEXT:    add w0, w8, w9, uxtb
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_8:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    ldrb w9, [x0, #4095]
; CHECK-NOLSE-O0-NEXT:    add x8, x0, w1, sxtw
; CHECK-NOLSE-O0-NEXT:    ldrb w8, [x8]
; CHECK-NOLSE-O0-NEXT:    add w8, w8, w9, uxtb
; CHECK-NOLSE-O0-NEXT:    subs x9, x0, #256
; CHECK-NOLSE-O0-NEXT:    ldrb w9, [x9]
; CHECK-NOLSE-O0-NEXT:    add w8, w8, w9, uxtb
; CHECK-NOLSE-O0-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
; CHECK-NOLSE-O0-NEXT:    ldrb w9, [x9]
; CHECK-NOLSE-O0-NEXT:    add w0, w8, w9, uxtb
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomic_load_relaxed_8:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    ldrb w9, [x0, #4095]
; CHECK-OUTLINE-O0-NEXT:    add x8, x0, w1, sxtw
; CHECK-OUTLINE-O0-NEXT:    ldrb w8, [x8]
; CHECK-OUTLINE-O0-NEXT:    add w8, w8, w9, uxtb
; CHECK-OUTLINE-O0-NEXT:    subs x9, x0, #256
; CHECK-OUTLINE-O0-NEXT:    ldrb w9, [x9]
; CHECK-OUTLINE-O0-NEXT:    add w8, w8, w9, uxtb
; CHECK-OUTLINE-O0-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
; CHECK-OUTLINE-O0-NEXT:    ldrb w9, [x9]
; CHECK-OUTLINE-O0-NEXT:    add w0, w8, w9, uxtb
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomic_load_relaxed_8:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldrb w8, [x0, #4095]
; CHECK-LSE-O1-NEXT:    ldrb w9, [x0, w1, sxtw]
; CHECK-LSE-O1-NEXT:    add w8, w9, w8, uxtb
; CHECK-LSE-O1-NEXT:    ldurb w9, [x0, #-256]
; CHECK-LSE-O1-NEXT:    add w8, w8, w9, uxtb
; CHECK-LSE-O1-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O1-NEXT:    ldrb w9, [x9]
; CHECK-LSE-O1-NEXT:    add w0, w8, w9, uxtb
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomic_load_relaxed_8:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldrb w9, [x0, #4095]
; CHECK-LSE-O0-NEXT:    add x8, x0, w1, sxtw
; CHECK-LSE-O0-NEXT:    ldrb w8, [x8]
; CHECK-LSE-O0-NEXT:    add w8, w8, w9, uxtb
; CHECK-LSE-O0-NEXT:    subs x9, x0, #256
; CHECK-LSE-O0-NEXT:    ldrb w9, [x9]
; CHECK-LSE-O0-NEXT:    add w8, w8, w9, uxtb
; CHECK-LSE-O0-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O0-NEXT:    ldrb w9, [x9]
; CHECK-LSE-O0-NEXT:    add w0, w8, w9, uxtb
; CHECK-LSE-O0-NEXT:    ret
  %ptr_unsigned = getelementptr i8, ptr %p, i32 4095
  %val_unsigned = load atomic i8, ptr %ptr_unsigned monotonic, align 1

  %ptr_regoff = getelementptr i8, ptr %p, i32 %off32
  %val_regoff = load atomic i8, ptr %ptr_regoff unordered, align 1
  %tot1 = add i8 %val_unsigned, %val_regoff

  %ptr_unscaled = getelementptr i8, ptr %p, i32 -256
  %val_unscaled = load atomic i8, ptr %ptr_unscaled monotonic, align 1
  %tot2 = add i8 %tot1, %val_unscaled

  %ptr_random = getelementptr i8, ptr %p, i32 1191936 ; 0x123000 (i.e. ADD imm)
  %val_random = load atomic i8, ptr %ptr_random unordered, align 1
  %tot3 = add i8 %tot2, %val_random

  ret i8 %tot3
}

define i16 @atomic_load_relaxed_16(ptr %p, i32 %off32) #0 {
; CHECK-NOLSE-O1-LABEL: atomic_load_relaxed_16:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:    ldrh w8, [x0, #8190]
; CHECK-NOLSE-O1-NEXT:    ldrh w9, [x0, w1, sxtw #1]
; CHECK-NOLSE-O1-NEXT:    ldurh w10, [x0, #-256]
; CHECK-NOLSE-O1-NEXT:    add w8, w9, w8, uxth
; CHECK-NOLSE-O1-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
; CHECK-NOLSE-O1-NEXT:    ldrh w9, [x9]
; CHECK-NOLSE-O1-NEXT:    add w8, w8, w10, uxth
; CHECK-NOLSE-O1-NEXT:    add w0, w8, w9, uxth
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomic_load_relaxed_16:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    ldrh w8, [x0, #8190]
; CHECK-OUTLINE-O1-NEXT:    ldrh w9, [x0, w1, sxtw #1]
; CHECK-OUTLINE-O1-NEXT:    ldurh w10, [x0, #-256]
; CHECK-OUTLINE-O1-NEXT:    add w8, w9, w8, uxth
; CHECK-OUTLINE-O1-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
; CHECK-OUTLINE-O1-NEXT:    ldrh w9, [x9]
; CHECK-OUTLINE-O1-NEXT:    add w8, w8, w10, uxth
; CHECK-OUTLINE-O1-NEXT:    add w0, w8, w9, uxth
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_16:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    ldrh w9, [x0, #8190]
; CHECK-NOLSE-O0-NEXT:    add x8, x0, w1, sxtw #1
; CHECK-NOLSE-O0-NEXT:    ldrh w8, [x8]
; CHECK-NOLSE-O0-NEXT:    add w8, w8, w9, uxth
; CHECK-NOLSE-O0-NEXT:    subs x9, x0, #256
; CHECK-NOLSE-O0-NEXT:    ldrh w9, [x9]
; CHECK-NOLSE-O0-NEXT:    add w8, w8, w9, uxth
; CHECK-NOLSE-O0-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
; CHECK-NOLSE-O0-NEXT:    ldrh w9, [x9]
; CHECK-NOLSE-O0-NEXT:    add w0, w8, w9, uxth
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomic_load_relaxed_16:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    ldrh w9, [x0, #8190]
; CHECK-OUTLINE-O0-NEXT:    add x8, x0, w1, sxtw #1
; CHECK-OUTLINE-O0-NEXT:    ldrh w8, [x8]
; CHECK-OUTLINE-O0-NEXT:    add w8, w8, w9, uxth
; CHECK-OUTLINE-O0-NEXT:    subs x9, x0, #256
; CHECK-OUTLINE-O0-NEXT:    ldrh w9, [x9]
; CHECK-OUTLINE-O0-NEXT:    add w8, w8, w9, uxth
; CHECK-OUTLINE-O0-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
; CHECK-OUTLINE-O0-NEXT:    ldrh w9, [x9]
; CHECK-OUTLINE-O0-NEXT:    add w0, w8, w9, uxth
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomic_load_relaxed_16:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldrh w8, [x0, #8190]
; CHECK-LSE-O1-NEXT:    ldrh w9, [x0, w1, sxtw #1]
; CHECK-LSE-O1-NEXT:    add w8, w9, w8, uxth
; CHECK-LSE-O1-NEXT:    ldurh w9, [x0, #-256]
; CHECK-LSE-O1-NEXT:    add w8, w8, w9, uxth
; CHECK-LSE-O1-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O1-NEXT:    ldrh w9, [x9]
; CHECK-LSE-O1-NEXT:    add w0, w8, w9, uxth
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomic_load_relaxed_16:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldrh w9, [x0, #8190]
; CHECK-LSE-O0-NEXT:    add x8, x0, w1, sxtw #1
; CHECK-LSE-O0-NEXT:    ldrh w8, [x8]
; CHECK-LSE-O0-NEXT:    add w8, w8, w9, uxth
; CHECK-LSE-O0-NEXT:    subs x9, x0, #256
; CHECK-LSE-O0-NEXT:    ldrh w9, [x9]
; CHECK-LSE-O0-NEXT:    add w8, w8, w9, uxth
; CHECK-LSE-O0-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O0-NEXT:    ldrh w9, [x9]
; CHECK-LSE-O0-NEXT:    add w0, w8, w9, uxth
; CHECK-LSE-O0-NEXT:    ret
  %ptr_unsigned = getelementptr i16, ptr %p, i32 4095
  %val_unsigned = load atomic i16, ptr %ptr_unsigned monotonic, align 2

  %ptr_regoff = getelementptr i16, ptr %p, i32 %off32
  %val_regoff = load atomic i16, ptr %ptr_regoff unordered, align 2
  %tot1 = add i16 %val_unsigned, %val_regoff

  %ptr_unscaled = getelementptr i16, ptr %p, i32 -128
  %val_unscaled = load atomic i16, ptr %ptr_unscaled monotonic, align 2
  %tot2 = add i16 %tot1, %val_unscaled

  %ptr_random = getelementptr i16, ptr %p, i32 595968 ; 0x123000/2 (i.e. ADD imm)
  %val_random = load atomic i16, ptr %ptr_random unordered, align 2
  %tot3 = add i16 %tot2, %val_random

  ret i16 %tot3
}

define i32 @atomic_load_relaxed_32(ptr %p, i32 %off32) #0 {
; CHECK-NOLSE-O1-LABEL: atomic_load_relaxed_32:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:    ldr w8, [x0, #16380]
; CHECK-NOLSE-O1-NEXT:    ldr w9, [x0, w1, sxtw #2]
; CHECK-NOLSE-O1-NEXT:    add x11, x0, #291, lsl #12 ; =1191936
; CHECK-NOLSE-O1-NEXT:    ldur w10, [x0, #-256]
; CHECK-NOLSE-O1-NEXT:    add w8, w8, w9
; CHECK-NOLSE-O1-NEXT:    ldr w9, [x11]
; CHECK-NOLSE-O1-NEXT:    add w8, w8, w10
; CHECK-NOLSE-O1-NEXT:    add w0, w8, w9
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomic_load_relaxed_32:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    ldr w8, [x0, #16380]
; CHECK-OUTLINE-O1-NEXT:    ldr w9, [x0, w1, sxtw #2]
; CHECK-OUTLINE-O1-NEXT:    add x11, x0, #291, lsl #12 ; =1191936
; CHECK-OUTLINE-O1-NEXT:    ldur w10, [x0, #-256]
; CHECK-OUTLINE-O1-NEXT:    add w8, w8, w9
; CHECK-OUTLINE-O1-NEXT:    ldr w9, [x11]
; CHECK-OUTLINE-O1-NEXT:    add w8, w8, w10
; CHECK-OUTLINE-O1-NEXT:    add w0, w8, w9
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_32:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    ldr w8, [x0, #16380]
; CHECK-NOLSE-O0-NEXT:    ldr w9, [x0, w1, sxtw #2]
; CHECK-NOLSE-O0-NEXT:    add w8, w8, w9
; CHECK-NOLSE-O0-NEXT:    ldur w9, [x0, #-256]
; CHECK-NOLSE-O0-NEXT:    add w8, w8, w9
; CHECK-NOLSE-O0-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
; CHECK-NOLSE-O0-NEXT:    ldr w9, [x9]
; CHECK-NOLSE-O0-NEXT:    add w0, w8, w9
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomic_load_relaxed_32:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    ldr w8, [x0, #16380]
; CHECK-OUTLINE-O0-NEXT:    ldr w9, [x0, w1, sxtw #2]
; CHECK-OUTLINE-O0-NEXT:    add w8, w8, w9
; CHECK-OUTLINE-O0-NEXT:    ldur w9, [x0, #-256]
; CHECK-OUTLINE-O0-NEXT:    add w8, w8, w9
; CHECK-OUTLINE-O0-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
; CHECK-OUTLINE-O0-NEXT:    ldr w9, [x9]
; CHECK-OUTLINE-O0-NEXT:    add w0, w8, w9
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomic_load_relaxed_32:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldr w8, [x0, #16380]
; CHECK-LSE-O1-NEXT:    ldr w9, [x0, w1, sxtw #2]
; CHECK-LSE-O1-NEXT:    ldur w10, [x0, #-256]
; CHECK-LSE-O1-NEXT:    add w8, w8, w10
; CHECK-LSE-O1-NEXT:    add w8, w8, w9
; CHECK-LSE-O1-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O1-NEXT:    ldr w9, [x9]
; CHECK-LSE-O1-NEXT:    add w0, w8, w9
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomic_load_relaxed_32:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldr w8, [x0, #16380]
; CHECK-LSE-O0-NEXT:    ldr w9, [x0, w1, sxtw #2]
; CHECK-LSE-O0-NEXT:    add w8, w8, w9
; CHECK-LSE-O0-NEXT:    ldur w9, [x0, #-256]
; CHECK-LSE-O0-NEXT:    add w8, w8, w9
; CHECK-LSE-O0-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O0-NEXT:    ldr w9, [x9]
; CHECK-LSE-O0-NEXT:    add w0, w8, w9
; CHECK-LSE-O0-NEXT:    ret
  %ptr_unsigned = getelementptr i32, ptr %p, i32 4095
  %val_unsigned = load atomic i32, ptr %ptr_unsigned monotonic, align 4

  %ptr_regoff = getelementptr i32, ptr %p, i32 %off32
  %val_regoff = load atomic i32, ptr %ptr_regoff unordered, align 4
  %tot1 = add i32 %val_unsigned, %val_regoff

  %ptr_unscaled = getelementptr i32, ptr %p, i32 -64
  %val_unscaled = load atomic i32, ptr %ptr_unscaled monotonic, align 4
  %tot2 = add i32 %tot1, %val_unscaled

  %ptr_random = getelementptr i32, ptr %p, i32 297984 ; 0x123000/4 (i.e. ADD imm)
  %val_random = load atomic i32, ptr %ptr_random unordered, align 4
  %tot3 = add i32 %tot2, %val_random

  ret i32 %tot3
}

define i64 @atomic_load_relaxed_64(ptr %p, i32 %off32) #0 {
; CHECK-NOLSE-O1-LABEL: atomic_load_relaxed_64:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:    ldr x8, [x0, #32760]
; CHECK-NOLSE-O1-NEXT:    ldr x9, [x0, w1, sxtw #3]
; CHECK-NOLSE-O1-NEXT:    add x11, x0, #291, lsl #12 ; =1191936
; CHECK-NOLSE-O1-NEXT:    ldur x10, [x0, #-256]
; CHECK-NOLSE-O1-NEXT:    add x8, x8, x9
; CHECK-NOLSE-O1-NEXT:    ldr x9, [x11]
; CHECK-NOLSE-O1-NEXT:    add x8, x8, x10
; CHECK-NOLSE-O1-NEXT:    add x0, x8, x9
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomic_load_relaxed_64:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    ldr x8, [x0, #32760]
; CHECK-OUTLINE-O1-NEXT:    ldr x9, [x0, w1, sxtw #3]
; CHECK-OUTLINE-O1-NEXT:    add x11, x0, #291, lsl #12 ; =1191936
; CHECK-OUTLINE-O1-NEXT:    ldur x10, [x0, #-256]
; CHECK-OUTLINE-O1-NEXT:    add x8, x8, x9
; CHECK-OUTLINE-O1-NEXT:    ldr x9, [x11]
; CHECK-OUTLINE-O1-NEXT:    add x8, x8, x10
; CHECK-OUTLINE-O1-NEXT:    add x0, x8, x9
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_64:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    ldr x8, [x0, #32760]
; CHECK-NOLSE-O0-NEXT:    ldr x9, [x0, w1, sxtw #3]
; CHECK-NOLSE-O0-NEXT:    add x8, x8, x9
; CHECK-NOLSE-O0-NEXT:    ldur x9, [x0, #-256]
; CHECK-NOLSE-O0-NEXT:    add x8, x8, x9
; CHECK-NOLSE-O0-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
; CHECK-NOLSE-O0-NEXT:    ldr x9, [x9]
; CHECK-NOLSE-O0-NEXT:    add x0, x8, x9
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomic_load_relaxed_64:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    ldr x8, [x0, #32760]
; CHECK-OUTLINE-O0-NEXT:    ldr x9, [x0, w1, sxtw #3]
; CHECK-OUTLINE-O0-NEXT:    add x8, x8, x9
; CHECK-OUTLINE-O0-NEXT:    ldur x9, [x0, #-256]
; CHECK-OUTLINE-O0-NEXT:    add x8, x8, x9
; CHECK-OUTLINE-O0-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
; CHECK-OUTLINE-O0-NEXT:    ldr x9, [x9]
; CHECK-OUTLINE-O0-NEXT:    add x0, x8, x9
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomic_load_relaxed_64:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldr x8, [x0, #32760]
; CHECK-LSE-O1-NEXT:    ldr x9, [x0, w1, sxtw #3]
; CHECK-LSE-O1-NEXT:    ldur x10, [x0, #-256]
; CHECK-LSE-O1-NEXT:    add x8, x8, x10
; CHECK-LSE-O1-NEXT:    add x8, x8, x9
; CHECK-LSE-O1-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O1-NEXT:    ldr x9, [x9]
; CHECK-LSE-O1-NEXT:    add x0, x8, x9
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomic_load_relaxed_64:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldr x8, [x0, #32760]
; CHECK-LSE-O0-NEXT:    ldr x9, [x0, w1, sxtw #3]
; CHECK-LSE-O0-NEXT:    add x8, x8, x9
; CHECK-LSE-O0-NEXT:    ldur x9, [x0, #-256]
; CHECK-LSE-O0-NEXT:    add x8, x8, x9
; CHECK-LSE-O0-NEXT:    add x9, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O0-NEXT:    ldr x9, [x9]
; CHECK-LSE-O0-NEXT:    add x0, x8, x9
; CHECK-LSE-O0-NEXT:    ret
  %ptr_unsigned = getelementptr i64, ptr %p, i32 4095
  %val_unsigned = load atomic i64, ptr %ptr_unsigned monotonic, align 8

  %ptr_regoff = getelementptr i64, ptr %p, i32 %off32
  %val_regoff = load atomic i64, ptr %ptr_regoff unordered, align 8
  %tot1 = add i64 %val_unsigned, %val_regoff

  %ptr_unscaled = getelementptr i64, ptr %p, i32 -32
  %val_unscaled = load atomic i64, ptr %ptr_unscaled monotonic, align 8
  %tot2 = add i64 %tot1, %val_unscaled

  %ptr_random = getelementptr i64, ptr %p, i32 148992 ; 0x123000/8 (i.e. ADD imm)
  %val_random = load atomic i64, ptr %ptr_random unordered, align 8
  %tot3 = add i64 %tot2, %val_random

  ret i64 %tot3
}


define void @atomc_store(ptr %p) #0 {
; CHECK-NOLSE-LABEL: atomc_store:
; CHECK-NOLSE:       ; %bb.0:
; CHECK-NOLSE-NEXT:    mov w8, #4 ; =0x4
; CHECK-NOLSE-NEXT:    stlr w8, [x0]
; CHECK-NOLSE-NEXT:    ret
;
; CHECK-OUTLINE-LABEL: atomc_store:
; CHECK-OUTLINE:       ; %bb.0:
; CHECK-OUTLINE-NEXT:    mov w8, #4 ; =0x4
; CHECK-OUTLINE-NEXT:    stlr w8, [x0]
; CHECK-OUTLINE-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomc_store:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    mov w8, #4 ; =0x4
; CHECK-LSE-O1-NEXT:    stlr w8, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomc_store:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    mov w8, #4 ; =0x4
; CHECK-LSE-O0-NEXT:    stlr w8, [x0]
; CHECK-LSE-O0-NEXT:    ret
   store atomic i32 4, ptr %p seq_cst, align 4
   ret void
}

define void @atomic_store_relaxed_8(ptr %p, i32 %off32, i8 %val) #0 {
; CHECK-NOLSE-O1-LABEL: atomic_store_relaxed_8:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
; CHECK-NOLSE-O1-NEXT:    strb w2, [x0, #4095]
; CHECK-NOLSE-O1-NEXT:    strb w2, [x0, w1, sxtw]
; CHECK-NOLSE-O1-NEXT:    sturb w2, [x0, #-256]
; CHECK-NOLSE-O1-NEXT:    strb w2, [x8]
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomic_store_relaxed_8:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
; CHECK-OUTLINE-O1-NEXT:    strb w2, [x0, #4095]
; CHECK-OUTLINE-O1-NEXT:    strb w2, [x0, w1, sxtw]
; CHECK-OUTLINE-O1-NEXT:    sturb w2, [x0, #-256]
; CHECK-OUTLINE-O1-NEXT:    strb w2, [x8]
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomic_store_relaxed_8:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    strb w2, [x0, #4095]
; CHECK-NOLSE-O0-NEXT:    strb w2, [x0, w1, sxtw]
; CHECK-NOLSE-O0-NEXT:    sturb w2, [x0, #-256]
; CHECK-NOLSE-O0-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
; CHECK-NOLSE-O0-NEXT:    strb w2, [x8]
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomic_store_relaxed_8:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    strb w2, [x0, #4095]
; CHECK-OUTLINE-O0-NEXT:    strb w2, [x0, w1, sxtw]
; CHECK-OUTLINE-O0-NEXT:    sturb w2, [x0, #-256]
; CHECK-OUTLINE-O0-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
; CHECK-OUTLINE-O0-NEXT:    strb w2, [x8]
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomic_store_relaxed_8:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    strb w2, [x0, #4095]
; CHECK-LSE-O1-NEXT:    strb w2, [x0, w1, sxtw]
; CHECK-LSE-O1-NEXT:    sturb w2, [x0, #-256]
; CHECK-LSE-O1-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O1-NEXT:    strb w2, [x8]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomic_store_relaxed_8:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    strb w2, [x0, #4095]
; CHECK-LSE-O0-NEXT:    strb w2, [x0, w1, sxtw]
; CHECK-LSE-O0-NEXT:    sturb w2, [x0, #-256]
; CHECK-LSE-O0-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O0-NEXT:    strb w2, [x8]
; CHECK-LSE-O0-NEXT:    ret
  %ptr_unsigned = getelementptr i8, ptr %p, i32 4095
  store atomic i8 %val, ptr %ptr_unsigned monotonic, align 1

  %ptr_regoff = getelementptr i8, ptr %p, i32 %off32
  store atomic i8 %val, ptr %ptr_regoff unordered, align 1

  %ptr_unscaled = getelementptr i8, ptr %p, i32 -256
  store atomic i8 %val, ptr %ptr_unscaled monotonic, align 1

  %ptr_random = getelementptr i8, ptr %p, i32 1191936 ; 0x123000 (i.e. ADD imm)
  store atomic i8 %val, ptr %ptr_random unordered, align 1

  ret void
}

define void @atomic_store_relaxed_16(ptr %p, i32 %off32, i16 %val) #0 {
; CHECK-NOLSE-O1-LABEL: atomic_store_relaxed_16:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
; CHECK-NOLSE-O1-NEXT:    strh w2, [x0, #8190]
; CHECK-NOLSE-O1-NEXT:    strh w2, [x0, w1, sxtw #1]
; CHECK-NOLSE-O1-NEXT:    sturh w2, [x0, #-256]
; CHECK-NOLSE-O1-NEXT:    strh w2, [x8]
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomic_store_relaxed_16:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
; CHECK-OUTLINE-O1-NEXT:    strh w2, [x0, #8190]
; CHECK-OUTLINE-O1-NEXT:    strh w2, [x0, w1, sxtw #1]
; CHECK-OUTLINE-O1-NEXT:    sturh w2, [x0, #-256]
; CHECK-OUTLINE-O1-NEXT:    strh w2, [x8]
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomic_store_relaxed_16:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    strh w2, [x0, #8190]
; CHECK-NOLSE-O0-NEXT:    strh w2, [x0, w1, sxtw #1]
; CHECK-NOLSE-O0-NEXT:    sturh w2, [x0, #-256]
; CHECK-NOLSE-O0-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
; CHECK-NOLSE-O0-NEXT:    strh w2, [x8]
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomic_store_relaxed_16:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    strh w2, [x0, #8190]
; CHECK-OUTLINE-O0-NEXT:    strh w2, [x0, w1, sxtw #1]
; CHECK-OUTLINE-O0-NEXT:    sturh w2, [x0, #-256]
; CHECK-OUTLINE-O0-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
; CHECK-OUTLINE-O0-NEXT:    strh w2, [x8]
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomic_store_relaxed_16:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    strh w2, [x0, #8190]
; CHECK-LSE-O1-NEXT:    strh w2, [x0, w1, sxtw #1]
; CHECK-LSE-O1-NEXT:    sturh w2, [x0, #-256]
; CHECK-LSE-O1-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O1-NEXT:    strh w2, [x8]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomic_store_relaxed_16:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    strh w2, [x0, #8190]
; CHECK-LSE-O0-NEXT:    strh w2, [x0, w1, sxtw #1]
; CHECK-LSE-O0-NEXT:    sturh w2, [x0, #-256]
; CHECK-LSE-O0-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O0-NEXT:    strh w2, [x8]
; CHECK-LSE-O0-NEXT:    ret
  %ptr_unsigned = getelementptr i16, ptr %p, i32 4095
  store atomic i16 %val, ptr %ptr_unsigned monotonic, align 2

  %ptr_regoff = getelementptr i16, ptr %p, i32 %off32
  store atomic i16 %val, ptr %ptr_regoff unordered, align 2

  %ptr_unscaled = getelementptr i16, ptr %p, i32 -128
  store atomic i16 %val, ptr %ptr_unscaled monotonic, align 2

  %ptr_random = getelementptr i16, ptr %p, i32 595968 ; 0x123000/2 (i.e. ADD imm)
  store atomic i16 %val, ptr %ptr_random unordered, align 2

  ret void
}

define void @atomic_store_relaxed_32(ptr %p, i32 %off32, i32 %val) #0 {
; CHECK-NOLSE-O1-LABEL: atomic_store_relaxed_32:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
; CHECK-NOLSE-O1-NEXT:    str w2, [x0, #16380]
; CHECK-NOLSE-O1-NEXT:    str w2, [x0, w1, sxtw #2]
; CHECK-NOLSE-O1-NEXT:    stur w2, [x0, #-256]
; CHECK-NOLSE-O1-NEXT:    str w2, [x8]
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomic_store_relaxed_32:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
; CHECK-OUTLINE-O1-NEXT:    str w2, [x0, #16380]
; CHECK-OUTLINE-O1-NEXT:    str w2, [x0, w1, sxtw #2]
; CHECK-OUTLINE-O1-NEXT:    stur w2, [x0, #-256]
; CHECK-OUTLINE-O1-NEXT:    str w2, [x8]
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomic_store_relaxed_32:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    str w2, [x0, #16380]
; CHECK-NOLSE-O0-NEXT:    str w2, [x0, w1, sxtw #2]
; CHECK-NOLSE-O0-NEXT:    stur w2, [x0, #-256]
; CHECK-NOLSE-O0-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
; CHECK-NOLSE-O0-NEXT:    str w2, [x8]
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomic_store_relaxed_32:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    str w2, [x0, #16380]
; CHECK-OUTLINE-O0-NEXT:    str w2, [x0, w1, sxtw #2]
; CHECK-OUTLINE-O0-NEXT:    stur w2, [x0, #-256]
; CHECK-OUTLINE-O0-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
; CHECK-OUTLINE-O0-NEXT:    str w2, [x8]
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomic_store_relaxed_32:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    str w2, [x0, #16380]
; CHECK-LSE-O1-NEXT:    str w2, [x0, w1, sxtw #2]
; CHECK-LSE-O1-NEXT:    stur w2, [x0, #-256]
; CHECK-LSE-O1-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O1-NEXT:    str w2, [x8]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomic_store_relaxed_32:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    str w2, [x0, #16380]
; CHECK-LSE-O0-NEXT:    str w2, [x0, w1, sxtw #2]
; CHECK-LSE-O0-NEXT:    stur w2, [x0, #-256]
; CHECK-LSE-O0-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O0-NEXT:    str w2, [x8]
; CHECK-LSE-O0-NEXT:    ret
  %ptr_unsigned = getelementptr i32, ptr %p, i32 4095
  store atomic i32 %val, ptr %ptr_unsigned monotonic, align 4

  %ptr_regoff = getelementptr i32, ptr %p, i32 %off32
  store atomic i32 %val, ptr %ptr_regoff unordered, align 4

  %ptr_unscaled = getelementptr i32, ptr %p, i32 -64
  store atomic i32 %val, ptr %ptr_unscaled monotonic, align 4

  %ptr_random = getelementptr i32, ptr %p, i32 297984 ; 0x123000/4 (i.e. ADD imm)
  store atomic i32 %val, ptr %ptr_random unordered, align 4

  ret void
}

define void @atomic_store_relaxed_64(ptr %p, i32 %off32, i64 %val) #0 {
; CHECK-NOLSE-O1-LABEL: atomic_store_relaxed_64:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
; CHECK-NOLSE-O1-NEXT:    str x2, [x0, #32760]
; CHECK-NOLSE-O1-NEXT:    str x2, [x0, w1, sxtw #3]
; CHECK-NOLSE-O1-NEXT:    stur x2, [x0, #-256]
; CHECK-NOLSE-O1-NEXT:    str x2, [x8]
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomic_store_relaxed_64:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
; CHECK-OUTLINE-O1-NEXT:    str x2, [x0, #32760]
; CHECK-OUTLINE-O1-NEXT:    str x2, [x0, w1, sxtw #3]
; CHECK-OUTLINE-O1-NEXT:    stur x2, [x0, #-256]
; CHECK-OUTLINE-O1-NEXT:    str x2, [x8]
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomic_store_relaxed_64:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    str x2, [x0, #32760]
; CHECK-NOLSE-O0-NEXT:    str x2, [x0, w1, sxtw #3]
; CHECK-NOLSE-O0-NEXT:    stur x2, [x0, #-256]
; CHECK-NOLSE-O0-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
; CHECK-NOLSE-O0-NEXT:    str x2, [x8]
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomic_store_relaxed_64:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    str x2, [x0, #32760]
; CHECK-OUTLINE-O0-NEXT:    str x2, [x0, w1, sxtw #3]
; CHECK-OUTLINE-O0-NEXT:    stur x2, [x0, #-256]
; CHECK-OUTLINE-O0-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
; CHECK-OUTLINE-O0-NEXT:    str x2, [x8]
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomic_store_relaxed_64:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    str x2, [x0, #32760]
; CHECK-LSE-O1-NEXT:    str x2, [x0, w1, sxtw #3]
; CHECK-LSE-O1-NEXT:    stur x2, [x0, #-256]
; CHECK-LSE-O1-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O1-NEXT:    str x2, [x8]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomic_store_relaxed_64:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    str x2, [x0, #32760]
; CHECK-LSE-O0-NEXT:    str x2, [x0, w1, sxtw #3]
; CHECK-LSE-O0-NEXT:    stur x2, [x0, #-256]
; CHECK-LSE-O0-NEXT:    add x8, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O0-NEXT:    str x2, [x8]
; CHECK-LSE-O0-NEXT:    ret
  %ptr_unsigned = getelementptr i64, ptr %p, i32 4095
  store atomic i64 %val, ptr %ptr_unsigned monotonic, align 8

  %ptr_regoff = getelementptr i64, ptr %p, i32 %off32
  store atomic i64 %val, ptr %ptr_regoff unordered, align 8

  %ptr_unscaled = getelementptr i64, ptr %p, i32 -32
  store atomic i64 %val, ptr %ptr_unscaled monotonic, align 8

  %ptr_random = getelementptr i64, ptr %p, i32 148992 ; 0x123000/8 (i.e. ADD imm)
  store atomic i64 %val, ptr %ptr_random unordered, align 8

  ret void
}

define i32 @load_zext(ptr %p8, ptr %p16) {
; CHECK-NOLSE-O1-LABEL: load_zext:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:    ldarb w8, [x0]
; CHECK-NOLSE-O1-NEXT:    ldrh w9, [x1]
; CHECK-NOLSE-O1-NEXT:    add w0, w9, w8, uxtb
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: load_zext:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    ldarb w8, [x0]
; CHECK-OUTLINE-O1-NEXT:    ldrh w9, [x1]
; CHECK-OUTLINE-O1-NEXT:    add w0, w9, w8, uxtb
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: load_zext:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    ldarb w9, [x0]
; CHECK-NOLSE-O0-NEXT:    ldrh w8, [x1]
; CHECK-NOLSE-O0-NEXT:    add w0, w8, w9, uxtb
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: load_zext:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    ldarb w9, [x0]
; CHECK-OUTLINE-O0-NEXT:    ldrh w8, [x1]
; CHECK-OUTLINE-O0-NEXT:    add w0, w8, w9, uxtb
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: load_zext:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldaprb w8, [x0]
; CHECK-LSE-O1-NEXT:    ldrh w9, [x1]
; CHECK-LSE-O1-NEXT:    add w0, w9, w8, uxtb
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: load_zext:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldaprb w9, [x0]
; CHECK-LSE-O0-NEXT:    ldrh w8, [x1]
; CHECK-LSE-O0-NEXT:    add w0, w8, w9, uxtb
; CHECK-LSE-O0-NEXT:    ret
  %val1.8 = load atomic i8, ptr %p8 acquire, align 1
  %val1 = zext i8 %val1.8 to i32

  %val2.16 = load atomic i16, ptr %p16 unordered, align 2
  %val2 = zext i16 %val2.16 to i32

  %res = add i32 %val1, %val2
  ret i32 %res
}

define { i32, i64 } @load_acq(ptr %p32, ptr %p64) {
; CHECK-NOLSE-LABEL: load_acq:
; CHECK-NOLSE:       ; %bb.0:
; CHECK-NOLSE-NEXT:    ldar w0, [x0]
; CHECK-NOLSE-NEXT:    ldar x1, [x1]
; CHECK-NOLSE-NEXT:    ret
;
; CHECK-OUTLINE-LABEL: load_acq:
; CHECK-OUTLINE:       ; %bb.0:
; CHECK-OUTLINE-NEXT:    ldar w0, [x0]
; CHECK-OUTLINE-NEXT:    ldar x1, [x1]
; CHECK-OUTLINE-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: load_acq:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldar w0, [x0]
; CHECK-LSE-O1-NEXT:    ldapr x1, [x1]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: load_acq:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldar w0, [x0]
; CHECK-LSE-O0-NEXT:    ldapr x1, [x1]
; CHECK-LSE-O0-NEXT:    ret
  %val32 = load atomic i32, ptr %p32 seq_cst, align 4
  %tmp = insertvalue { i32, i64 } undef, i32 %val32, 0

  %val64 = load atomic i64, ptr %p64 acquire, align 8
  %res = insertvalue { i32, i64 } %tmp, i64 %val64, 1

  ret { i32, i64 } %res
}

define i32 @load_sext(ptr %p8, ptr %p16) {
; CHECK-NOLSE-O1-LABEL: load_sext:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:    ldarb w8, [x0]
; CHECK-NOLSE-O1-NEXT:    ldrh w9, [x1]
; CHECK-NOLSE-O1-NEXT:    sxth w9, w9
; CHECK-NOLSE-O1-NEXT:    add w0, w9, w8, sxtb
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: load_sext:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    ldarb w8, [x0]
; CHECK-OUTLINE-O1-NEXT:    ldrh w9, [x1]
; CHECK-OUTLINE-O1-NEXT:    sxth w9, w9
; CHECK-OUTLINE-O1-NEXT:    add w0, w9, w8, sxtb
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: load_sext:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    ldarb w9, [x0]
; CHECK-NOLSE-O0-NEXT:    ldrh w8, [x1]
; CHECK-NOLSE-O0-NEXT:    sxth w8, w8
; CHECK-NOLSE-O0-NEXT:    add w0, w8, w9, sxtb
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: load_sext:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    ldarb w9, [x0]
; CHECK-OUTLINE-O0-NEXT:    ldrh w8, [x1]
; CHECK-OUTLINE-O0-NEXT:    sxth w8, w8
; CHECK-OUTLINE-O0-NEXT:    add w0, w8, w9, sxtb
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: load_sext:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldaprb w8, [x0]
; CHECK-LSE-O1-NEXT:    ldrh w9, [x1]
; CHECK-LSE-O1-NEXT:    sxth w9, w9
; CHECK-LSE-O1-NEXT:    add w0, w9, w8, sxtb
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: load_sext:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldaprb w9, [x0]
; CHECK-LSE-O0-NEXT:    ldrh w8, [x1]
; CHECK-LSE-O0-NEXT:    sxth w8, w8
; CHECK-LSE-O0-NEXT:    add w0, w8, w9, sxtb
; CHECK-LSE-O0-NEXT:    ret
  %val1.8 = load atomic i8, ptr %p8 acquire, align 1
  %val1 = sext i8 %val1.8 to i32

  %val2.16 = load atomic i16, ptr %p16 unordered, align 2
  %val2 = sext i16 %val2.16 to i32

  %res = add i32 %val1, %val2
  ret i32 %res
}

define void @store_trunc(i32 %val, ptr %p8, ptr %p16) {
; CHECK-NOLSE-LABEL: store_trunc:
; CHECK-NOLSE:       ; %bb.0:
; CHECK-NOLSE-NEXT:    stlrb w0, [x1]
; CHECK-NOLSE-NEXT:    strh w0, [x2]
; CHECK-NOLSE-NEXT:    ret
;
; CHECK-OUTLINE-LABEL: store_trunc:
; CHECK-OUTLINE:       ; %bb.0:
; CHECK-OUTLINE-NEXT:    stlrb w0, [x1]
; CHECK-OUTLINE-NEXT:    strh w0, [x2]
; CHECK-OUTLINE-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: store_trunc:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    stlrb w0, [x1]
; CHECK-LSE-O1-NEXT:    strh w0, [x2]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: store_trunc:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    stlrb w0, [x1]
; CHECK-LSE-O0-NEXT:    strh w0, [x2]
; CHECK-LSE-O0-NEXT:    ret
  %val8 = trunc i32 %val to i8
  store atomic i8 %val8, ptr %p8 seq_cst, align 1

  %val16 = trunc i32 %val to i16
  store atomic i16 %val16, ptr %p16 monotonic, align 2

  ret void
}

define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_add_i8:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB27_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldaxrb w8, [x0]
; CHECK-NOLSE-O1-NEXT:    add w9, w8, w1
; CHECK-NOLSE-O1-NEXT:    stlxrb w10, w9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB27_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_add_i8:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    .cfi_def_cfa_offset 16
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT:    mov x2, x0
; CHECK-OUTLINE-O1-NEXT:    mov w0, w1
; CHECK-OUTLINE-O1-NEXT:    mov x1, x2
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_ldadd1_acq_rel
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_add_i8:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldrb w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB27_1
; CHECK-NOLSE-O0-NEXT:  LBB27_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB27_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w10, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add w12, w8, w10, uxth
; CHECK-NOLSE-O0-NEXT:  LBB27_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB27_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w10, uxtb
; CHECK-NOLSE-O0-NEXT:    b.ne LBB27_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB27_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxrb w8, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB27_2
; CHECK-NOLSE-O0-NEXT:  LBB27_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB27_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    and w8, w9, #0xff
; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w10, uxtb
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB27_1
; CHECK-NOLSE-O0-NEXT:    b LBB27_5
; CHECK-NOLSE-O0-NEXT:  LBB27_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_add_i8:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov w0, w1
; CHECK-OUTLINE-O0-NEXT:    ldr x1, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_ldadd1_acq_rel
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_add_i8:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldaddalb w1, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_add_i8:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldaddalb w1, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw add ptr %ptr, i8 %rhs seq_cst
  ret i8 %res
}

define i8 @atomicrmw_xchg_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_xchg_i8:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:    ; kill: def $w1 killed $w1 def $x1
; CHECK-NOLSE-O1-NEXT:  LBB28_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldxrb w8, [x0]
; CHECK-NOLSE-O1-NEXT:    stxrb w9, w1, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB28_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_xchg_i8:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    .cfi_def_cfa_offset 16
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT:    mov x2, x0
; CHECK-OUTLINE-O1-NEXT:    mov w0, w1
; CHECK-OUTLINE-O1-NEXT:    mov x1, x2
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_swp1_relax
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_xchg_i8:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldrb w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB28_1
; CHECK-NOLSE-O0-NEXT:  LBB28_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB28_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w10, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w12, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:  LBB28_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB28_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w10, uxtb
; CHECK-NOLSE-O0-NEXT:    b.ne LBB28_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB28_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxrb w8, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB28_2
; CHECK-NOLSE-O0-NEXT:  LBB28_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB28_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    and w8, w9, #0xff
; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w10, uxtb
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB28_1
; CHECK-NOLSE-O0-NEXT:    b LBB28_5
; CHECK-NOLSE-O0-NEXT:  LBB28_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_xchg_i8:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov w0, w1
; CHECK-OUTLINE-O0-NEXT:    ldr x1, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_swp1_relax
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_xchg_i8:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    swpb w1, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_xchg_i8:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    swpb w1, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw xchg ptr %ptr, i8 %rhs monotonic
  ret i8 %res
}

define i8 @atomicrmw_sub_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_sub_i8:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB29_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldaxrb w8, [x0]
; CHECK-NOLSE-O1-NEXT:    sub w9, w8, w1
; CHECK-NOLSE-O1-NEXT:    stxrb w10, w9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB29_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_sub_i8:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    .cfi_def_cfa_offset 16
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT:    mov x2, x0
; CHECK-OUTLINE-O1-NEXT:    neg w0, w1
; CHECK-OUTLINE-O1-NEXT:    mov x1, x2
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_ldadd1_acq
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_sub_i8:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldrb w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB29_1
; CHECK-NOLSE-O0-NEXT:  LBB29_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB29_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w10, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    subs w12, w10, w8
; CHECK-NOLSE-O0-NEXT:  LBB29_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB29_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w10, uxtb
; CHECK-NOLSE-O0-NEXT:    b.ne LBB29_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB29_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxrb w8, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB29_2
; CHECK-NOLSE-O0-NEXT:  LBB29_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB29_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    and w8, w9, #0xff
; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w10, uxtb
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB29_1
; CHECK-NOLSE-O0-NEXT:    b LBB29_5
; CHECK-NOLSE-O0-NEXT:  LBB29_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_sub_i8:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov w9, w1
; CHECK-OUTLINE-O0-NEXT:    ldr x1, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    mov w8, wzr
; CHECK-OUTLINE-O0-NEXT:    subs w0, w8, w9
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_ldadd1_acq
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_sub_i8:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    neg w8, w1
; CHECK-LSE-O1-NEXT:    ldaddab w8, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_sub_i8:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    neg w8, w1
; CHECK-LSE-O0-NEXT:    ldaddab w8, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw sub ptr %ptr, i8 %rhs acquire
  ret i8 %res
}

define i8 @atomicrmw_and_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_and_i8:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB30_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldxrb w8, [x0]
; CHECK-NOLSE-O1-NEXT:    and w9, w8, w1
; CHECK-NOLSE-O1-NEXT:    stlxrb w10, w9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB30_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_and_i8:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    .cfi_def_cfa_offset 16
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT:    mov x2, x0
; CHECK-OUTLINE-O1-NEXT:    mvn w0, w1
; CHECK-OUTLINE-O1-NEXT:    mov x1, x2
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_ldclr1_rel
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_and_i8:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldrb w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB30_1
; CHECK-NOLSE-O0-NEXT:  LBB30_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB30_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w10, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    and w12, w10, w8
; CHECK-NOLSE-O0-NEXT:  LBB30_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB30_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w10, uxtb
; CHECK-NOLSE-O0-NEXT:    b.ne LBB30_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB30_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxrb w8, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB30_2
; CHECK-NOLSE-O0-NEXT:  LBB30_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB30_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    and w8, w9, #0xff
; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w10, uxtb
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB30_1
; CHECK-NOLSE-O0-NEXT:    b LBB30_5
; CHECK-NOLSE-O0-NEXT:  LBB30_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_and_i8:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov w9, w1
; CHECK-OUTLINE-O0-NEXT:    ldr x1, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    mov w8, #-1 ; =0xffffffff
; CHECK-OUTLINE-O0-NEXT:    eor w0, w8, w9
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_ldclr1_rel
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_and_i8:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    mvn w8, w1
; CHECK-LSE-O1-NEXT:    ldclrlb w8, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_and_i8:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    mvn w8, w1
; CHECK-LSE-O0-NEXT:    ldclrlb w8, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw and ptr %ptr, i8 %rhs release
  ret i8 %res
}

define i8 @atomicrmw_or_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_or_i8:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB31_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldaxrb w8, [x0]
; CHECK-NOLSE-O1-NEXT:    orr w9, w8, w1
; CHECK-NOLSE-O1-NEXT:    stlxrb w10, w9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB31_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_or_i8:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    .cfi_def_cfa_offset 16
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT:    mov x2, x0
; CHECK-OUTLINE-O1-NEXT:    mov w0, w1
; CHECK-OUTLINE-O1-NEXT:    mov x1, x2
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_ldset1_acq_rel
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_or_i8:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldrb w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB31_1
; CHECK-NOLSE-O0-NEXT:  LBB31_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB31_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w10, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    orr w12, w10, w8
; CHECK-NOLSE-O0-NEXT:  LBB31_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB31_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w10, uxtb
; CHECK-NOLSE-O0-NEXT:    b.ne LBB31_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB31_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxrb w8, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB31_2
; CHECK-NOLSE-O0-NEXT:  LBB31_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB31_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    and w8, w9, #0xff
; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w10, uxtb
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB31_1
; CHECK-NOLSE-O0-NEXT:    b LBB31_5
; CHECK-NOLSE-O0-NEXT:  LBB31_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_or_i8:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov w0, w1
; CHECK-OUTLINE-O0-NEXT:    ldr x1, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_ldset1_acq_rel
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_or_i8:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldsetalb w1, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_or_i8:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldsetalb w1, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw or ptr %ptr, i8 %rhs seq_cst
  ret i8 %res
}

define i8 @atomicrmw_xor_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_xor_i8:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB32_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldxrb w8, [x0]
; CHECK-NOLSE-O1-NEXT:    eor w9, w8, w1
; CHECK-NOLSE-O1-NEXT:    stxrb w10, w9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB32_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_xor_i8:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    .cfi_def_cfa_offset 16
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT:    mov x2, x0
; CHECK-OUTLINE-O1-NEXT:    mov w0, w1
; CHECK-OUTLINE-O1-NEXT:    mov x1, x2
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_ldeor1_relax
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_xor_i8:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldrb w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB32_1
; CHECK-NOLSE-O0-NEXT:  LBB32_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB32_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w10, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    eor w12, w10, w8
; CHECK-NOLSE-O0-NEXT:  LBB32_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB32_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w10, uxtb
; CHECK-NOLSE-O0-NEXT:    b.ne LBB32_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB32_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxrb w8, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB32_2
; CHECK-NOLSE-O0-NEXT:  LBB32_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB32_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    and w8, w9, #0xff
; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w10, uxtb
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB32_1
; CHECK-NOLSE-O0-NEXT:    b LBB32_5
; CHECK-NOLSE-O0-NEXT:  LBB32_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_xor_i8:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov w0, w1
; CHECK-OUTLINE-O0-NEXT:    ldr x1, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_ldeor1_relax
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_xor_i8:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldeorb w1, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_xor_i8:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldeorb w1, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw xor ptr %ptr, i8 %rhs monotonic
  ret i8 %res
}

define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_min_i8:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB33_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldaxrb w8, [x0]
; CHECK-NOLSE-O1-NEXT:    sxtb w9, w8
; CHECK-NOLSE-O1-NEXT:    cmp w9, w1, sxtb
; CHECK-NOLSE-O1-NEXT:    csel w9, w8, w1, lt
; CHECK-NOLSE-O1-NEXT:    stxrb w10, w9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB33_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_min_i8:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:  LBB33_1: ; %atomicrmw.start
; CHECK-OUTLINE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O1-NEXT:    ldaxrb w8, [x0]
; CHECK-OUTLINE-O1-NEXT:    sxtb w9, w8
; CHECK-OUTLINE-O1-NEXT:    cmp w9, w1, sxtb
; CHECK-OUTLINE-O1-NEXT:    csel w9, w8, w1, lt
; CHECK-OUTLINE-O1-NEXT:    stxrb w10, w9, [x0]
; CHECK-OUTLINE-O1-NEXT:    cbnz w10, LBB33_1
; CHECK-OUTLINE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-OUTLINE-O1-NEXT:    mov w0, w8
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_min_i8:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldrb w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB33_1
; CHECK-NOLSE-O0-NEXT:  LBB33_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB33_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w10, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    sxtb w9, w10
; CHECK-NOLSE-O0-NEXT:    subs w9, w9, w8, sxtb
; CHECK-NOLSE-O0-NEXT:    csel w12, w10, w8, le
; CHECK-NOLSE-O0-NEXT:  LBB33_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB33_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w10, uxtb
; CHECK-NOLSE-O0-NEXT:    b.ne LBB33_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB33_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxrb w8, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB33_2
; CHECK-NOLSE-O0-NEXT:  LBB33_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB33_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    and w8, w9, #0xff
; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w10, uxtb
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB33_1
; CHECK-NOLSE-O0-NEXT:    b LBB33_5
; CHECK-NOLSE-O0-NEXT:  LBB33_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_min_i8:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #48
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 48
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    ldrb w0, [x0]
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    b LBB33_1
; CHECK-OUTLINE-O0-NEXT:  LBB33_1: ; %atomicrmw.start
; CHECK-OUTLINE-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [sp, #28] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #16] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #8] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    sxtb w9, w0
; CHECK-OUTLINE-O0-NEXT:    subs w9, w9, w8, sxtb
; CHECK-OUTLINE-O0-NEXT:    csel w1, w0, w8, le
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas1_acq
; CHECK-OUTLINE-O0-NEXT:    ldr w9, [sp, #8] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    and w8, w0, #0xff
; CHECK-OUTLINE-O0-NEXT:    subs w8, w8, w9, uxtb
; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    tbz w8, #0, LBB33_1
; CHECK-OUTLINE-O0-NEXT:    b LBB33_2
; CHECK-OUTLINE-O0-NEXT:  LBB33_2: ; %atomicrmw.end
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #48
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_min_i8:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldsminab w1, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_min_i8:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldsminab w1, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw min ptr %ptr, i8 %rhs acquire
  ret i8 %res
}

define i8 @atomicrmw_max_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_max_i8:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB34_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldxrb w8, [x0]
; CHECK-NOLSE-O1-NEXT:    sxtb w9, w8
; CHECK-NOLSE-O1-NEXT:    cmp w9, w1, sxtb
; CHECK-NOLSE-O1-NEXT:    csel w9, w8, w1, gt
; CHECK-NOLSE-O1-NEXT:    stlxrb w10, w9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB34_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_max_i8:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:  LBB34_1: ; %atomicrmw.start
; CHECK-OUTLINE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O1-NEXT:    ldxrb w8, [x0]
; CHECK-OUTLINE-O1-NEXT:    sxtb w9, w8
; CHECK-OUTLINE-O1-NEXT:    cmp w9, w1, sxtb
; CHECK-OUTLINE-O1-NEXT:    csel w9, w8, w1, gt
; CHECK-OUTLINE-O1-NEXT:    stlxrb w10, w9, [x0]
; CHECK-OUTLINE-O1-NEXT:    cbnz w10, LBB34_1
; CHECK-OUTLINE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-OUTLINE-O1-NEXT:    mov w0, w8
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_max_i8:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldrb w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB34_1
; CHECK-NOLSE-O0-NEXT:  LBB34_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB34_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w10, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    sxtb w9, w10
; CHECK-NOLSE-O0-NEXT:    subs w9, w9, w8, sxtb
; CHECK-NOLSE-O0-NEXT:    csel w12, w10, w8, gt
; CHECK-NOLSE-O0-NEXT:  LBB34_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB34_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w10, uxtb
; CHECK-NOLSE-O0-NEXT:    b.ne LBB34_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB34_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxrb w8, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB34_2
; CHECK-NOLSE-O0-NEXT:  LBB34_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB34_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    and w8, w9, #0xff
; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w10, uxtb
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB34_1
; CHECK-NOLSE-O0-NEXT:    b LBB34_5
; CHECK-NOLSE-O0-NEXT:  LBB34_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_max_i8:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #48
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 48
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    ldrb w0, [x0]
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    b LBB34_1
; CHECK-OUTLINE-O0-NEXT:  LBB34_1: ; %atomicrmw.start
; CHECK-OUTLINE-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [sp, #28] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #16] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #8] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    sxtb w9, w0
; CHECK-OUTLINE-O0-NEXT:    subs w9, w9, w8, sxtb
; CHECK-OUTLINE-O0-NEXT:    csel w1, w0, w8, gt
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas1_rel
; CHECK-OUTLINE-O0-NEXT:    ldr w9, [sp, #8] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    and w8, w0, #0xff
; CHECK-OUTLINE-O0-NEXT:    subs w8, w8, w9, uxtb
; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    tbz w8, #0, LBB34_1
; CHECK-OUTLINE-O0-NEXT:    b LBB34_2
; CHECK-OUTLINE-O0-NEXT:  LBB34_2: ; %atomicrmw.end
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #48
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_max_i8:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldsmaxlb w1, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_max_i8:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldsmaxlb w1, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw max ptr %ptr, i8 %rhs release
  ret i8 %res
}

define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_umin_i8:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:    and w9, w1, #0xff
; CHECK-NOLSE-O1-NEXT:  LBB35_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldaxrb w8, [x0]
; CHECK-NOLSE-O1-NEXT:    and w8, w8, #0xff
; CHECK-NOLSE-O1-NEXT:    cmp w8, w9
; CHECK-NOLSE-O1-NEXT:    csel w10, w8, w9, lo
; CHECK-NOLSE-O1-NEXT:    stlxrb w11, w10, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w11, LBB35_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_umin_i8:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    and w9, w1, #0xff
; CHECK-OUTLINE-O1-NEXT:  LBB35_1: ; %atomicrmw.start
; CHECK-OUTLINE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O1-NEXT:    ldaxrb w8, [x0]
; CHECK-OUTLINE-O1-NEXT:    and w8, w8, #0xff
; CHECK-OUTLINE-O1-NEXT:    cmp w8, w9
; CHECK-OUTLINE-O1-NEXT:    csel w10, w8, w9, lo
; CHECK-OUTLINE-O1-NEXT:    stlxrb w11, w10, [x0]
; CHECK-OUTLINE-O1-NEXT:    cbnz w11, LBB35_1
; CHECK-OUTLINE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-OUTLINE-O1-NEXT:    mov w0, w8
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_umin_i8:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldrb w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB35_1
; CHECK-NOLSE-O0-NEXT:  LBB35_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB35_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w10, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    and w9, w10, #0xff
; CHECK-NOLSE-O0-NEXT:    subs w9, w9, w8, uxtb
; CHECK-NOLSE-O0-NEXT:    csel w12, w10, w8, ls
; CHECK-NOLSE-O0-NEXT:  LBB35_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB35_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w10, uxtb
; CHECK-NOLSE-O0-NEXT:    b.ne LBB35_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB35_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxrb w8, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB35_2
; CHECK-NOLSE-O0-NEXT:  LBB35_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB35_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    and w8, w9, #0xff
; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w10, uxtb
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB35_1
; CHECK-NOLSE-O0-NEXT:    b LBB35_5
; CHECK-NOLSE-O0-NEXT:  LBB35_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_umin_i8:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #48
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 48
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    ldrb w0, [x0]
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    b LBB35_1
; CHECK-OUTLINE-O0-NEXT:  LBB35_1: ; %atomicrmw.start
; CHECK-OUTLINE-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [sp, #28] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #16] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #8] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    and w9, w0, #0xff
; CHECK-OUTLINE-O0-NEXT:    subs w9, w9, w8, uxtb
; CHECK-OUTLINE-O0-NEXT:    csel w1, w0, w8, ls
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas1_acq_rel
; CHECK-OUTLINE-O0-NEXT:    ldr w9, [sp, #8] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    and w8, w0, #0xff
; CHECK-OUTLINE-O0-NEXT:    subs w8, w8, w9, uxtb
; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    tbz w8, #0, LBB35_1
; CHECK-OUTLINE-O0-NEXT:    b LBB35_2
; CHECK-OUTLINE-O0-NEXT:  LBB35_2: ; %atomicrmw.end
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #48
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_umin_i8:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    lduminalb w1, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_umin_i8:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    lduminalb w1, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw umin ptr %ptr, i8 %rhs seq_cst
  ret i8 %res
}

define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_umax_i8:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:    and w9, w1, #0xff
; CHECK-NOLSE-O1-NEXT:  LBB36_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldxrb w8, [x0]
; CHECK-NOLSE-O1-NEXT:    and w8, w8, #0xff
; CHECK-NOLSE-O1-NEXT:    cmp w8, w9
; CHECK-NOLSE-O1-NEXT:    csel w10, w8, w9, hi
; CHECK-NOLSE-O1-NEXT:    stxrb w11, w10, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w11, LBB36_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_umax_i8:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    and w9, w1, #0xff
; CHECK-OUTLINE-O1-NEXT:  LBB36_1: ; %atomicrmw.start
; CHECK-OUTLINE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O1-NEXT:    ldxrb w8, [x0]
; CHECK-OUTLINE-O1-NEXT:    and w8, w8, #0xff
; CHECK-OUTLINE-O1-NEXT:    cmp w8, w9
; CHECK-OUTLINE-O1-NEXT:    csel w10, w8, w9, hi
; CHECK-OUTLINE-O1-NEXT:    stxrb w11, w10, [x0]
; CHECK-OUTLINE-O1-NEXT:    cbnz w11, LBB36_1
; CHECK-OUTLINE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-OUTLINE-O1-NEXT:    mov w0, w8
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_umax_i8:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldrb w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB36_1
; CHECK-NOLSE-O0-NEXT:  LBB36_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB36_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w10, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    and w9, w10, #0xff
; CHECK-NOLSE-O0-NEXT:    subs w9, w9, w8, uxtb
; CHECK-NOLSE-O0-NEXT:    csel w12, w10, w8, hi
; CHECK-NOLSE-O0-NEXT:  LBB36_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB36_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxrb w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w10, uxtb
; CHECK-NOLSE-O0-NEXT:    b.ne LBB36_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB36_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxrb w8, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB36_2
; CHECK-NOLSE-O0-NEXT:  LBB36_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB36_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    and w8, w9, #0xff
; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w10, uxtb
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB36_1
; CHECK-NOLSE-O0-NEXT:    b LBB36_5
; CHECK-NOLSE-O0-NEXT:  LBB36_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_umax_i8:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #48
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 48
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    ldrb w0, [x0]
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    b LBB36_1
; CHECK-OUTLINE-O0-NEXT:  LBB36_1: ; %atomicrmw.start
; CHECK-OUTLINE-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [sp, #28] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #16] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #8] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    and w9, w0, #0xff
; CHECK-OUTLINE-O0-NEXT:    subs w9, w9, w8, uxtb
; CHECK-OUTLINE-O0-NEXT:    csel w1, w0, w8, hi
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas1_relax
; CHECK-OUTLINE-O0-NEXT:    ldr w9, [sp, #8] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    and w8, w0, #0xff
; CHECK-OUTLINE-O0-NEXT:    subs w8, w8, w9, uxtb
; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    tbz w8, #0, LBB36_1
; CHECK-OUTLINE-O0-NEXT:    b LBB36_2
; CHECK-OUTLINE-O0-NEXT:  LBB36_2: ; %atomicrmw.end
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #48
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_umax_i8:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldumaxb w1, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_umax_i8:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldumaxb w1, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw umax ptr %ptr, i8 %rhs monotonic
  ret i8 %res
}

define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_add_i16:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB37_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldaxrh w8, [x0]
; CHECK-NOLSE-O1-NEXT:    add w9, w8, w1
; CHECK-NOLSE-O1-NEXT:    stlxrh w10, w9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB37_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_add_i16:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    .cfi_def_cfa_offset 16
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT:    mov x2, x0
; CHECK-OUTLINE-O1-NEXT:    mov w0, w1
; CHECK-OUTLINE-O1-NEXT:    mov x1, x2
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_ldadd2_acq_rel
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_add_i16:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldrh w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB37_1
; CHECK-NOLSE-O0-NEXT:  LBB37_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB37_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add w12, w9, w8, uxth
; CHECK-NOLSE-O0-NEXT:  LBB37_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB37_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w8, uxth
; CHECK-NOLSE-O0-NEXT:    b.ne LBB37_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB37_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxrh w10, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB37_2
; CHECK-NOLSE-O0-NEXT:  LBB37_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB37_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    uxth w8, w8
; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w9, uxth
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB37_1
; CHECK-NOLSE-O0-NEXT:    b LBB37_5
; CHECK-NOLSE-O0-NEXT:  LBB37_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_add_i16:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov w0, w1
; CHECK-OUTLINE-O0-NEXT:    ldr x1, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_ldadd2_acq_rel
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_add_i16:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldaddalh w1, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_add_i16:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldaddalh w1, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw add ptr %ptr, i16 %rhs seq_cst
  ret i16 %res
}

define i16 @atomicrmw_xchg_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_xchg_i16:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:    ; kill: def $w1 killed $w1 def $x1
; CHECK-NOLSE-O1-NEXT:  LBB38_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldxrh w8, [x0]
; CHECK-NOLSE-O1-NEXT:    stxrh w9, w1, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB38_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_xchg_i16:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    .cfi_def_cfa_offset 16
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT:    mov x2, x0
; CHECK-OUTLINE-O1-NEXT:    mov w0, w1
; CHECK-OUTLINE-O1-NEXT:    mov x1, x2
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_swp2_relax
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_xchg_i16:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldrh w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB38_1
; CHECK-NOLSE-O0-NEXT:  LBB38_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB38_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w12, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:  LBB38_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB38_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w8, uxth
; CHECK-NOLSE-O0-NEXT:    b.ne LBB38_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB38_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxrh w10, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB38_2
; CHECK-NOLSE-O0-NEXT:  LBB38_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB38_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    uxth w8, w8
; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w9, uxth
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB38_1
; CHECK-NOLSE-O0-NEXT:    b LBB38_5
; CHECK-NOLSE-O0-NEXT:  LBB38_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_xchg_i16:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov w0, w1
; CHECK-OUTLINE-O0-NEXT:    ldr x1, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_swp2_relax
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_xchg_i16:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    swph w1, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_xchg_i16:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    swph w1, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw xchg ptr %ptr, i16 %rhs monotonic
  ret i16 %res
}

define i16 @atomicrmw_sub_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_sub_i16:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB39_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldaxrh w8, [x0]
; CHECK-NOLSE-O1-NEXT:    sub w9, w8, w1
; CHECK-NOLSE-O1-NEXT:    stxrh w10, w9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB39_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_sub_i16:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    .cfi_def_cfa_offset 16
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT:    mov x2, x0
; CHECK-OUTLINE-O1-NEXT:    neg w0, w1
; CHECK-OUTLINE-O1-NEXT:    mov x1, x2
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_ldadd2_acq
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_sub_i16:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldrh w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB39_1
; CHECK-NOLSE-O0-NEXT:  LBB39_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB39_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    subs w12, w8, w9
; CHECK-NOLSE-O0-NEXT:  LBB39_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB39_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w8, uxth
; CHECK-NOLSE-O0-NEXT:    b.ne LBB39_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB39_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxrh w10, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB39_2
; CHECK-NOLSE-O0-NEXT:  LBB39_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB39_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    uxth w8, w8
; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w9, uxth
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB39_1
; CHECK-NOLSE-O0-NEXT:    b LBB39_5
; CHECK-NOLSE-O0-NEXT:  LBB39_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_sub_i16:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov w9, w1
; CHECK-OUTLINE-O0-NEXT:    ldr x1, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    mov w8, wzr
; CHECK-OUTLINE-O0-NEXT:    subs w0, w8, w9
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_ldadd2_acq
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_sub_i16:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    neg w8, w1
; CHECK-LSE-O1-NEXT:    ldaddah w8, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_sub_i16:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    neg w8, w1
; CHECK-LSE-O0-NEXT:    ldaddah w8, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw sub ptr %ptr, i16 %rhs acquire
  ret i16 %res
}

define i16 @atomicrmw_and_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_and_i16:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB40_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldxrh w8, [x0]
; CHECK-NOLSE-O1-NEXT:    and w9, w8, w1
; CHECK-NOLSE-O1-NEXT:    stlxrh w10, w9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB40_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_and_i16:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    .cfi_def_cfa_offset 16
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT:    mov x2, x0
; CHECK-OUTLINE-O1-NEXT:    mvn w0, w1
; CHECK-OUTLINE-O1-NEXT:    mov x1, x2
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_ldclr2_rel
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_and_i16:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldrh w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB40_1
; CHECK-NOLSE-O0-NEXT:  LBB40_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB40_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    and w12, w8, w9
; CHECK-NOLSE-O0-NEXT:  LBB40_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB40_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w8, uxth
; CHECK-NOLSE-O0-NEXT:    b.ne LBB40_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB40_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxrh w10, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB40_2
; CHECK-NOLSE-O0-NEXT:  LBB40_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB40_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    uxth w8, w8
; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w9, uxth
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB40_1
; CHECK-NOLSE-O0-NEXT:    b LBB40_5
; CHECK-NOLSE-O0-NEXT:  LBB40_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_and_i16:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov w9, w1
; CHECK-OUTLINE-O0-NEXT:    ldr x1, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    mov w8, #-1 ; =0xffffffff
; CHECK-OUTLINE-O0-NEXT:    eor w0, w8, w9
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_ldclr2_rel
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_and_i16:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    mvn w8, w1
; CHECK-LSE-O1-NEXT:    ldclrlh w8, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_and_i16:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    mvn w8, w1
; CHECK-LSE-O0-NEXT:    ldclrlh w8, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw and ptr %ptr, i16 %rhs release
  ret i16 %res
}

define i16 @atomicrmw_or_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_or_i16:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB41_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldaxrh w8, [x0]
; CHECK-NOLSE-O1-NEXT:    orr w9, w8, w1
; CHECK-NOLSE-O1-NEXT:    stlxrh w10, w9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB41_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_or_i16:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    .cfi_def_cfa_offset 16
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT:    mov x2, x0
; CHECK-OUTLINE-O1-NEXT:    mov w0, w1
; CHECK-OUTLINE-O1-NEXT:    mov x1, x2
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_ldset2_acq_rel
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_or_i16:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldrh w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB41_1
; CHECK-NOLSE-O0-NEXT:  LBB41_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB41_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    orr w12, w8, w9
; CHECK-NOLSE-O0-NEXT:  LBB41_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB41_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w8, uxth
; CHECK-NOLSE-O0-NEXT:    b.ne LBB41_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB41_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxrh w10, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB41_2
; CHECK-NOLSE-O0-NEXT:  LBB41_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB41_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    uxth w8, w8
; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w9, uxth
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB41_1
; CHECK-NOLSE-O0-NEXT:    b LBB41_5
; CHECK-NOLSE-O0-NEXT:  LBB41_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_or_i16:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov w0, w1
; CHECK-OUTLINE-O0-NEXT:    ldr x1, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_ldset2_acq_rel
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_or_i16:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldsetalh w1, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_or_i16:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldsetalh w1, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw or ptr %ptr, i16 %rhs seq_cst
  ret i16 %res
}

define i16 @atomicrmw_xor_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_xor_i16:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB42_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldxrh w8, [x0]
; CHECK-NOLSE-O1-NEXT:    eor w9, w8, w1
; CHECK-NOLSE-O1-NEXT:    stxrh w10, w9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB42_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_xor_i16:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    .cfi_def_cfa_offset 16
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT:    mov x2, x0
; CHECK-OUTLINE-O1-NEXT:    mov w0, w1
; CHECK-OUTLINE-O1-NEXT:    mov x1, x2
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_ldeor2_relax
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_xor_i16:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldrh w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB42_1
; CHECK-NOLSE-O0-NEXT:  LBB42_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB42_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    eor w12, w8, w9
; CHECK-NOLSE-O0-NEXT:  LBB42_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB42_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w8, uxth
; CHECK-NOLSE-O0-NEXT:    b.ne LBB42_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB42_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxrh w10, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB42_2
; CHECK-NOLSE-O0-NEXT:  LBB42_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB42_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    uxth w8, w8
; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w9, uxth
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB42_1
; CHECK-NOLSE-O0-NEXT:    b LBB42_5
; CHECK-NOLSE-O0-NEXT:  LBB42_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_xor_i16:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov w0, w1
; CHECK-OUTLINE-O0-NEXT:    ldr x1, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_ldeor2_relax
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_xor_i16:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldeorh w1, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_xor_i16:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldeorh w1, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw xor ptr %ptr, i16 %rhs monotonic
  ret i16 %res
}

define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_min_i16:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB43_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldaxrh w8, [x0]
; CHECK-NOLSE-O1-NEXT:    sxth w9, w8
; CHECK-NOLSE-O1-NEXT:    cmp w9, w1, sxth
; CHECK-NOLSE-O1-NEXT:    csel w9, w8, w1, lt
; CHECK-NOLSE-O1-NEXT:    stxrh w10, w9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB43_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_min_i16:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:  LBB43_1: ; %atomicrmw.start
; CHECK-OUTLINE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O1-NEXT:    ldaxrh w8, [x0]
; CHECK-OUTLINE-O1-NEXT:    sxth w9, w8
; CHECK-OUTLINE-O1-NEXT:    cmp w9, w1, sxth
; CHECK-OUTLINE-O1-NEXT:    csel w9, w8, w1, lt
; CHECK-OUTLINE-O1-NEXT:    stxrh w10, w9, [x0]
; CHECK-OUTLINE-O1-NEXT:    cbnz w10, LBB43_1
; CHECK-OUTLINE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-OUTLINE-O1-NEXT:    mov w0, w8
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_min_i16:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldrh w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB43_1
; CHECK-NOLSE-O0-NEXT:  LBB43_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB43_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    sxth w10, w8
; CHECK-NOLSE-O0-NEXT:    subs w10, w10, w9, sxth
; CHECK-NOLSE-O0-NEXT:    csel w12, w8, w9, le
; CHECK-NOLSE-O0-NEXT:  LBB43_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB43_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w8, uxth
; CHECK-NOLSE-O0-NEXT:    b.ne LBB43_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB43_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxrh w10, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB43_2
; CHECK-NOLSE-O0-NEXT:  LBB43_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB43_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    uxth w8, w8
; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w9, uxth
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB43_1
; CHECK-NOLSE-O0-NEXT:    b LBB43_5
; CHECK-NOLSE-O0-NEXT:  LBB43_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_min_i16:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #48
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 48
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    ldrh w0, [x0]
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    b LBB43_1
; CHECK-OUTLINE-O0-NEXT:  LBB43_1: ; %atomicrmw.start
; CHECK-OUTLINE-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [sp, #28] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #16] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #8] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    sxth w9, w0
; CHECK-OUTLINE-O0-NEXT:    subs w9, w9, w8, sxth
; CHECK-OUTLINE-O0-NEXT:    csel w1, w0, w8, le
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas2_acq
; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #8] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    uxth w8, w8
; CHECK-OUTLINE-O0-NEXT:    subs w8, w8, w0, uxth
; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    tbz w8, #0, LBB43_1
; CHECK-OUTLINE-O0-NEXT:    b LBB43_2
; CHECK-OUTLINE-O0-NEXT:  LBB43_2: ; %atomicrmw.end
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #48
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_min_i16:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldsminah w1, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_min_i16:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldsminah w1, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw min ptr %ptr, i16 %rhs acquire
  ret i16 %res
}

define i16 @atomicrmw_max_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_max_i16:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB44_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldxrh w8, [x0]
; CHECK-NOLSE-O1-NEXT:    sxth w9, w8
; CHECK-NOLSE-O1-NEXT:    cmp w9, w1, sxth
; CHECK-NOLSE-O1-NEXT:    csel w9, w8, w1, gt
; CHECK-NOLSE-O1-NEXT:    stlxrh w10, w9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB44_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_max_i16:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:  LBB44_1: ; %atomicrmw.start
; CHECK-OUTLINE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O1-NEXT:    ldxrh w8, [x0]
; CHECK-OUTLINE-O1-NEXT:    sxth w9, w8
; CHECK-OUTLINE-O1-NEXT:    cmp w9, w1, sxth
; CHECK-OUTLINE-O1-NEXT:    csel w9, w8, w1, gt
; CHECK-OUTLINE-O1-NEXT:    stlxrh w10, w9, [x0]
; CHECK-OUTLINE-O1-NEXT:    cbnz w10, LBB44_1
; CHECK-OUTLINE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-OUTLINE-O1-NEXT:    mov w0, w8
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_max_i16:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldrh w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB44_1
; CHECK-NOLSE-O0-NEXT:  LBB44_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB44_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    sxth w10, w8
; CHECK-NOLSE-O0-NEXT:    subs w10, w10, w9, sxth
; CHECK-NOLSE-O0-NEXT:    csel w12, w8, w9, gt
; CHECK-NOLSE-O0-NEXT:  LBB44_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB44_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w8, uxth
; CHECK-NOLSE-O0-NEXT:    b.ne LBB44_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB44_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxrh w10, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB44_2
; CHECK-NOLSE-O0-NEXT:  LBB44_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB44_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    uxth w8, w8
; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w9, uxth
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB44_1
; CHECK-NOLSE-O0-NEXT:    b LBB44_5
; CHECK-NOLSE-O0-NEXT:  LBB44_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_max_i16:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #48
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 48
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    ldrh w0, [x0]
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    b LBB44_1
; CHECK-OUTLINE-O0-NEXT:  LBB44_1: ; %atomicrmw.start
; CHECK-OUTLINE-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [sp, #28] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #16] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #8] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    sxth w9, w0
; CHECK-OUTLINE-O0-NEXT:    subs w9, w9, w8, sxth
; CHECK-OUTLINE-O0-NEXT:    csel w1, w0, w8, gt
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas2_rel
; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #8] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    uxth w8, w8
; CHECK-OUTLINE-O0-NEXT:    subs w8, w8, w0, uxth
; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    tbz w8, #0, LBB44_1
; CHECK-OUTLINE-O0-NEXT:    b LBB44_2
; CHECK-OUTLINE-O0-NEXT:  LBB44_2: ; %atomicrmw.end
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #48
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_max_i16:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldsmaxlh w1, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_max_i16:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldsmaxlh w1, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw max ptr %ptr, i16 %rhs release
  ret i16 %res
}

define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_umin_i16:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:    and w9, w1, #0xffff
; CHECK-NOLSE-O1-NEXT:  LBB45_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldaxrh w8, [x0]
; CHECK-NOLSE-O1-NEXT:    and w8, w8, #0xffff
; CHECK-NOLSE-O1-NEXT:    cmp w8, w9
; CHECK-NOLSE-O1-NEXT:    csel w10, w8, w9, lo
; CHECK-NOLSE-O1-NEXT:    stlxrh w11, w10, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w11, LBB45_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_umin_i16:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    and w9, w1, #0xffff
; CHECK-OUTLINE-O1-NEXT:  LBB45_1: ; %atomicrmw.start
; CHECK-OUTLINE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O1-NEXT:    ldaxrh w8, [x0]
; CHECK-OUTLINE-O1-NEXT:    and w8, w8, #0xffff
; CHECK-OUTLINE-O1-NEXT:    cmp w8, w9
; CHECK-OUTLINE-O1-NEXT:    csel w10, w8, w9, lo
; CHECK-OUTLINE-O1-NEXT:    stlxrh w11, w10, [x0]
; CHECK-OUTLINE-O1-NEXT:    cbnz w11, LBB45_1
; CHECK-OUTLINE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-OUTLINE-O1-NEXT:    mov w0, w8
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_umin_i16:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldrh w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB45_1
; CHECK-NOLSE-O0-NEXT:  LBB45_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB45_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    uxth w10, w8
; CHECK-NOLSE-O0-NEXT:    subs w10, w10, w9, uxth
; CHECK-NOLSE-O0-NEXT:    csel w12, w8, w9, ls
; CHECK-NOLSE-O0-NEXT:  LBB45_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB45_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w8, uxth
; CHECK-NOLSE-O0-NEXT:    b.ne LBB45_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB45_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxrh w10, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB45_2
; CHECK-NOLSE-O0-NEXT:  LBB45_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB45_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    uxth w8, w8
; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w9, uxth
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB45_1
; CHECK-NOLSE-O0-NEXT:    b LBB45_5
; CHECK-NOLSE-O0-NEXT:  LBB45_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_umin_i16:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #48
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 48
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    ldrh w0, [x0]
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    b LBB45_1
; CHECK-OUTLINE-O0-NEXT:  LBB45_1: ; %atomicrmw.start
; CHECK-OUTLINE-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [sp, #28] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #16] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #8] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    uxth w9, w0
; CHECK-OUTLINE-O0-NEXT:    subs w9, w9, w8, uxth
; CHECK-OUTLINE-O0-NEXT:    csel w1, w0, w8, ls
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas2_acq_rel
; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #8] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    uxth w8, w8
; CHECK-OUTLINE-O0-NEXT:    subs w8, w8, w0, uxth
; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    tbz w8, #0, LBB45_1
; CHECK-OUTLINE-O0-NEXT:    b LBB45_2
; CHECK-OUTLINE-O0-NEXT:  LBB45_2: ; %atomicrmw.end
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #48
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_umin_i16:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    lduminalh w1, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_umin_i16:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    lduminalh w1, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw umin ptr %ptr, i16 %rhs seq_cst
  ret i16 %res
}

define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_umax_i16:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:    and w9, w1, #0xffff
; CHECK-NOLSE-O1-NEXT:  LBB46_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldxrh w8, [x0]
; CHECK-NOLSE-O1-NEXT:    and w8, w8, #0xffff
; CHECK-NOLSE-O1-NEXT:    cmp w8, w9
; CHECK-NOLSE-O1-NEXT:    csel w10, w8, w9, hi
; CHECK-NOLSE-O1-NEXT:    stxrh w11, w10, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w11, LBB46_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_umax_i16:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    and w9, w1, #0xffff
; CHECK-OUTLINE-O1-NEXT:  LBB46_1: ; %atomicrmw.start
; CHECK-OUTLINE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O1-NEXT:    ldxrh w8, [x0]
; CHECK-OUTLINE-O1-NEXT:    and w8, w8, #0xffff
; CHECK-OUTLINE-O1-NEXT:    cmp w8, w9
; CHECK-OUTLINE-O1-NEXT:    csel w10, w8, w9, hi
; CHECK-OUTLINE-O1-NEXT:    stxrh w11, w10, [x0]
; CHECK-OUTLINE-O1-NEXT:    cbnz w11, LBB46_1
; CHECK-OUTLINE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-OUTLINE-O1-NEXT:    mov w0, w8
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_umax_i16:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldrh w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB46_1
; CHECK-NOLSE-O0-NEXT:  LBB46_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB46_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    uxth w10, w8
; CHECK-NOLSE-O0-NEXT:    subs w10, w10, w9, uxth
; CHECK-NOLSE-O0-NEXT:    csel w12, w8, w9, hi
; CHECK-NOLSE-O0-NEXT:  LBB46_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB46_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxrh w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w8, uxth
; CHECK-NOLSE-O0-NEXT:    b.ne LBB46_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB46_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxrh w10, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB46_2
; CHECK-NOLSE-O0-NEXT:  LBB46_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB46_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    uxth w8, w8
; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w9, uxth
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB46_1
; CHECK-NOLSE-O0-NEXT:    b LBB46_5
; CHECK-NOLSE-O0-NEXT:  LBB46_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_umax_i16:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #48
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 48
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    ldrh w0, [x0]
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    b LBB46_1
; CHECK-OUTLINE-O0-NEXT:  LBB46_1: ; %atomicrmw.start
; CHECK-OUTLINE-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [sp, #28] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #16] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #8] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    uxth w9, w0
; CHECK-OUTLINE-O0-NEXT:    subs w9, w9, w8, uxth
; CHECK-OUTLINE-O0-NEXT:    csel w1, w0, w8, hi
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas2_relax
; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #8] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    uxth w8, w8
; CHECK-OUTLINE-O0-NEXT:    subs w8, w8, w0, uxth
; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    tbz w8, #0, LBB46_1
; CHECK-OUTLINE-O0-NEXT:    b LBB46_2
; CHECK-OUTLINE-O0-NEXT:  LBB46_2: ; %atomicrmw.end
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #48
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_umax_i16:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldumaxh w1, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_umax_i16:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldumaxh w1, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw umax ptr %ptr, i16 %rhs monotonic
  ret i16 %res
}

define i32 @atomicrmw_add_i32(ptr %ptr, i32 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_add_i32:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB47_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldaxr w8, [x0]
; CHECK-NOLSE-O1-NEXT:    add w9, w8, w1
; CHECK-NOLSE-O1-NEXT:    stlxr w10, w9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB47_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_add_i32:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    .cfi_def_cfa_offset 16
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT:    mov x2, x0
; CHECK-OUTLINE-O1-NEXT:    mov w0, w1
; CHECK-OUTLINE-O1-NEXT:    mov x1, x2
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_ldadd4_acq_rel
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_add_i32:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldr w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB47_1
; CHECK-NOLSE-O0-NEXT:  LBB47_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB47_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add w12, w8, w9
; CHECK-NOLSE-O0-NEXT:  LBB47_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB47_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
; CHECK-NOLSE-O0-NEXT:    b.ne LBB47_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB47_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxr w10, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB47_2
; CHECK-NOLSE-O0-NEXT:  LBB47_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB47_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    subs w8, w9, w8
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB47_1
; CHECK-NOLSE-O0-NEXT:    b LBB47_5
; CHECK-NOLSE-O0-NEXT:  LBB47_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_add_i32:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov w0, w1
; CHECK-OUTLINE-O0-NEXT:    ldr x1, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_ldadd4_acq_rel
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_add_i32:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldaddal w1, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_add_i32:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldaddal w1, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw add ptr %ptr, i32 %rhs seq_cst
  ret i32 %res
}

define i32 @atomicrmw_xchg_i32(ptr %ptr, i32 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_xchg_i32:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:    mov x8, x0
; CHECK-NOLSE-O1-NEXT:  LBB48_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldxr w0, [x8]
; CHECK-NOLSE-O1-NEXT:    stxr w9, w1, [x8]
; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB48_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    ; kill: def $w0 killed $w0 killed $x0
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_xchg_i32:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    .cfi_def_cfa_offset 16
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT:    mov x2, x0
; CHECK-OUTLINE-O1-NEXT:    mov w0, w1
; CHECK-OUTLINE-O1-NEXT:    mov x1, x2
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_swp4_relax
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_xchg_i32:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldr w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB48_1
; CHECK-NOLSE-O0-NEXT:  LBB48_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB48_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w12, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:  LBB48_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB48_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
; CHECK-NOLSE-O0-NEXT:    b.ne LBB48_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB48_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxr w10, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB48_2
; CHECK-NOLSE-O0-NEXT:  LBB48_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB48_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    subs w8, w9, w8
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB48_1
; CHECK-NOLSE-O0-NEXT:    b LBB48_5
; CHECK-NOLSE-O0-NEXT:  LBB48_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_xchg_i32:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov w0, w1
; CHECK-OUTLINE-O0-NEXT:    ldr x1, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_swp4_relax
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_xchg_i32:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    swp w1, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_xchg_i32:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    swp w1, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw xchg ptr %ptr, i32 %rhs monotonic
  ret i32 %res
}

define i32 @atomicrmw_sub_i32(ptr %ptr, i32 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_sub_i32:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB49_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldaxr w8, [x0]
; CHECK-NOLSE-O1-NEXT:    sub w9, w8, w1
; CHECK-NOLSE-O1-NEXT:    stxr w10, w9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB49_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_sub_i32:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    .cfi_def_cfa_offset 16
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT:    mov x2, x0
; CHECK-OUTLINE-O1-NEXT:    neg w0, w1
; CHECK-OUTLINE-O1-NEXT:    mov x1, x2
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_ldadd4_acq
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_sub_i32:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldr w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB49_1
; CHECK-NOLSE-O0-NEXT:  LBB49_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB49_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    subs w12, w8, w9
; CHECK-NOLSE-O0-NEXT:  LBB49_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB49_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
; CHECK-NOLSE-O0-NEXT:    b.ne LBB49_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB49_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxr w10, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB49_2
; CHECK-NOLSE-O0-NEXT:  LBB49_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB49_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    subs w8, w9, w8
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB49_1
; CHECK-NOLSE-O0-NEXT:    b LBB49_5
; CHECK-NOLSE-O0-NEXT:  LBB49_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_sub_i32:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov w9, w1
; CHECK-OUTLINE-O0-NEXT:    ldr x1, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    mov w8, wzr
; CHECK-OUTLINE-O0-NEXT:    subs w0, w8, w9
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_ldadd4_acq
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_sub_i32:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    neg w8, w1
; CHECK-LSE-O1-NEXT:    ldadda w8, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_sub_i32:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    neg w8, w1
; CHECK-LSE-O0-NEXT:    ldadda w8, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw sub ptr %ptr, i32 %rhs acquire
  ret i32 %res
}

define i32 @atomicrmw_and_i32(ptr %ptr, i32 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_and_i32:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB50_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldxr w8, [x0]
; CHECK-NOLSE-O1-NEXT:    and w9, w8, w1
; CHECK-NOLSE-O1-NEXT:    stlxr w10, w9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB50_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_and_i32:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    .cfi_def_cfa_offset 16
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT:    mov x2, x0
; CHECK-OUTLINE-O1-NEXT:    mvn w0, w1
; CHECK-OUTLINE-O1-NEXT:    mov x1, x2
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_ldclr4_rel
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_and_i32:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldr w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB50_1
; CHECK-NOLSE-O0-NEXT:  LBB50_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB50_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    and w12, w8, w9
; CHECK-NOLSE-O0-NEXT:  LBB50_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB50_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
; CHECK-NOLSE-O0-NEXT:    b.ne LBB50_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB50_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxr w10, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB50_2
; CHECK-NOLSE-O0-NEXT:  LBB50_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB50_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    subs w8, w9, w8
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB50_1
; CHECK-NOLSE-O0-NEXT:    b LBB50_5
; CHECK-NOLSE-O0-NEXT:  LBB50_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_and_i32:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov w9, w1
; CHECK-OUTLINE-O0-NEXT:    ldr x1, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    mov w8, #-1 ; =0xffffffff
; CHECK-OUTLINE-O0-NEXT:    eor w0, w8, w9
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_ldclr4_rel
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_and_i32:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    mvn w8, w1
; CHECK-LSE-O1-NEXT:    ldclrl w8, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_and_i32:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    mvn w8, w1
; CHECK-LSE-O0-NEXT:    ldclrl w8, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw and ptr %ptr, i32 %rhs release
  ret i32 %res
}

define i32 @atomicrmw_or_i32(ptr %ptr, i32 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_or_i32:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB51_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldaxr w8, [x0]
; CHECK-NOLSE-O1-NEXT:    orr w9, w8, w1
; CHECK-NOLSE-O1-NEXT:    stlxr w10, w9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB51_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_or_i32:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    .cfi_def_cfa_offset 16
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT:    mov x2, x0
; CHECK-OUTLINE-O1-NEXT:    mov w0, w1
; CHECK-OUTLINE-O1-NEXT:    mov x1, x2
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_ldset4_acq_rel
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_or_i32:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldr w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB51_1
; CHECK-NOLSE-O0-NEXT:  LBB51_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB51_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    orr w12, w8, w9
; CHECK-NOLSE-O0-NEXT:  LBB51_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB51_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
; CHECK-NOLSE-O0-NEXT:    b.ne LBB51_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB51_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxr w10, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB51_2
; CHECK-NOLSE-O0-NEXT:  LBB51_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB51_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    subs w8, w9, w8
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB51_1
; CHECK-NOLSE-O0-NEXT:    b LBB51_5
; CHECK-NOLSE-O0-NEXT:  LBB51_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_or_i32:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov w0, w1
; CHECK-OUTLINE-O0-NEXT:    ldr x1, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_ldset4_acq_rel
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_or_i32:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldsetal w1, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_or_i32:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldsetal w1, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw or ptr %ptr, i32 %rhs seq_cst
  ret i32 %res
}

define i32 @atomicrmw_xor_i32(ptr %ptr, i32 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_xor_i32:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB52_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldxr w8, [x0]
; CHECK-NOLSE-O1-NEXT:    eor w9, w8, w1
; CHECK-NOLSE-O1-NEXT:    stxr w10, w9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB52_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_xor_i32:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    .cfi_def_cfa_offset 16
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT:    mov x2, x0
; CHECK-OUTLINE-O1-NEXT:    mov w0, w1
; CHECK-OUTLINE-O1-NEXT:    mov x1, x2
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_ldeor4_relax
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_xor_i32:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldr w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB52_1
; CHECK-NOLSE-O0-NEXT:  LBB52_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB52_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    eor w12, w8, w9
; CHECK-NOLSE-O0-NEXT:  LBB52_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB52_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
; CHECK-NOLSE-O0-NEXT:    b.ne LBB52_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB52_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxr w10, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB52_2
; CHECK-NOLSE-O0-NEXT:  LBB52_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB52_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    subs w8, w9, w8
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB52_1
; CHECK-NOLSE-O0-NEXT:    b LBB52_5
; CHECK-NOLSE-O0-NEXT:  LBB52_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_xor_i32:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov w0, w1
; CHECK-OUTLINE-O0-NEXT:    ldr x1, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_ldeor4_relax
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_xor_i32:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldeor w1, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_xor_i32:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldeor w1, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw xor ptr %ptr, i32 %rhs monotonic
  ret i32 %res
}

define i32 @atomicrmw_min_i32(ptr %ptr, i32 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_min_i32:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB53_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldaxr w8, [x0]
; CHECK-NOLSE-O1-NEXT:    cmp w8, w1
; CHECK-NOLSE-O1-NEXT:    csel w9, w8, w1, lt
; CHECK-NOLSE-O1-NEXT:    stxr w10, w9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB53_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_min_i32:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:  LBB53_1: ; %atomicrmw.start
; CHECK-OUTLINE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O1-NEXT:    ldaxr w8, [x0]
; CHECK-OUTLINE-O1-NEXT:    cmp w8, w1
; CHECK-OUTLINE-O1-NEXT:    csel w9, w8, w1, lt
; CHECK-OUTLINE-O1-NEXT:    stxr w10, w9, [x0]
; CHECK-OUTLINE-O1-NEXT:    cbnz w10, LBB53_1
; CHECK-OUTLINE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-OUTLINE-O1-NEXT:    mov w0, w8
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_min_i32:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldr w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB53_1
; CHECK-NOLSE-O0-NEXT:  LBB53_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB53_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    subs w10, w8, w9
; CHECK-NOLSE-O0-NEXT:    csel w12, w8, w9, le
; CHECK-NOLSE-O0-NEXT:  LBB53_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB53_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
; CHECK-NOLSE-O0-NEXT:    b.ne LBB53_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB53_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxr w10, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB53_2
; CHECK-NOLSE-O0-NEXT:  LBB53_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB53_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    subs w8, w9, w8
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB53_1
; CHECK-NOLSE-O0-NEXT:    b LBB53_5
; CHECK-NOLSE-O0-NEXT:  LBB53_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_min_i32:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #48
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 48
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [x0]
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    b LBB53_1
; CHECK-OUTLINE-O0-NEXT:  LBB53_1: ; %atomicrmw.start
; CHECK-OUTLINE-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [sp, #28] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #16] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #8] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    subs w9, w0, w8
; CHECK-OUTLINE-O0-NEXT:    csel w1, w0, w8, le
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas4_acq
; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #8] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    subs w8, w0, w8
; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    tbz w8, #0, LBB53_1
; CHECK-OUTLINE-O0-NEXT:    b LBB53_2
; CHECK-OUTLINE-O0-NEXT:  LBB53_2: ; %atomicrmw.end
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #48
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_min_i32:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldsmina w1, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_min_i32:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldsmina w1, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw min ptr %ptr, i32 %rhs acquire
  ret i32 %res
}

define i32 @atomicrmw_max_i32(ptr %ptr, i32 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_max_i32:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB54_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldxr w8, [x0]
; CHECK-NOLSE-O1-NEXT:    cmp w8, w1
; CHECK-NOLSE-O1-NEXT:    csel w9, w8, w1, gt
; CHECK-NOLSE-O1-NEXT:    stlxr w10, w9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB54_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_max_i32:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:  LBB54_1: ; %atomicrmw.start
; CHECK-OUTLINE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O1-NEXT:    ldxr w8, [x0]
; CHECK-OUTLINE-O1-NEXT:    cmp w8, w1
; CHECK-OUTLINE-O1-NEXT:    csel w9, w8, w1, gt
; CHECK-OUTLINE-O1-NEXT:    stlxr w10, w9, [x0]
; CHECK-OUTLINE-O1-NEXT:    cbnz w10, LBB54_1
; CHECK-OUTLINE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-OUTLINE-O1-NEXT:    mov w0, w8
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_max_i32:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldr w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB54_1
; CHECK-NOLSE-O0-NEXT:  LBB54_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB54_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    subs w10, w8, w9
; CHECK-NOLSE-O0-NEXT:    csel w12, w8, w9, gt
; CHECK-NOLSE-O0-NEXT:  LBB54_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB54_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
; CHECK-NOLSE-O0-NEXT:    b.ne LBB54_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB54_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxr w10, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB54_2
; CHECK-NOLSE-O0-NEXT:  LBB54_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB54_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    subs w8, w9, w8
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB54_1
; CHECK-NOLSE-O0-NEXT:    b LBB54_5
; CHECK-NOLSE-O0-NEXT:  LBB54_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_max_i32:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #48
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 48
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [x0]
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    b LBB54_1
; CHECK-OUTLINE-O0-NEXT:  LBB54_1: ; %atomicrmw.start
; CHECK-OUTLINE-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [sp, #28] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #16] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #8] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    subs w9, w0, w8
; CHECK-OUTLINE-O0-NEXT:    csel w1, w0, w8, gt
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas4_rel
; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #8] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    subs w8, w0, w8
; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    tbz w8, #0, LBB54_1
; CHECK-OUTLINE-O0-NEXT:    b LBB54_2
; CHECK-OUTLINE-O0-NEXT:  LBB54_2: ; %atomicrmw.end
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #48
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_max_i32:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldsmaxl w1, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_max_i32:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldsmaxl w1, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw max ptr %ptr, i32 %rhs release
  ret i32 %res
}

define i32 @atomicrmw_umin_i32(ptr %ptr, i32 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_umin_i32:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB55_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldaxr w8, [x0]
; CHECK-NOLSE-O1-NEXT:    cmp w8, w1
; CHECK-NOLSE-O1-NEXT:    csel w9, w8, w1, lo
; CHECK-NOLSE-O1-NEXT:    stlxr w10, w9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB55_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_umin_i32:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:  LBB55_1: ; %atomicrmw.start
; CHECK-OUTLINE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O1-NEXT:    ldaxr w8, [x0]
; CHECK-OUTLINE-O1-NEXT:    cmp w8, w1
; CHECK-OUTLINE-O1-NEXT:    csel w9, w8, w1, lo
; CHECK-OUTLINE-O1-NEXT:    stlxr w10, w9, [x0]
; CHECK-OUTLINE-O1-NEXT:    cbnz w10, LBB55_1
; CHECK-OUTLINE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-OUTLINE-O1-NEXT:    mov w0, w8
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_umin_i32:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldr w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB55_1
; CHECK-NOLSE-O0-NEXT:  LBB55_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB55_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    subs w10, w8, w9
; CHECK-NOLSE-O0-NEXT:    csel w12, w8, w9, ls
; CHECK-NOLSE-O0-NEXT:  LBB55_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB55_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
; CHECK-NOLSE-O0-NEXT:    b.ne LBB55_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB55_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxr w10, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB55_2
; CHECK-NOLSE-O0-NEXT:  LBB55_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB55_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    subs w8, w9, w8
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB55_1
; CHECK-NOLSE-O0-NEXT:    b LBB55_5
; CHECK-NOLSE-O0-NEXT:  LBB55_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_umin_i32:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #48
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 48
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [x0]
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    b LBB55_1
; CHECK-OUTLINE-O0-NEXT:  LBB55_1: ; %atomicrmw.start
; CHECK-OUTLINE-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [sp, #28] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #16] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #8] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    subs w9, w0, w8
; CHECK-OUTLINE-O0-NEXT:    csel w1, w0, w8, ls
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas4_acq_rel
; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #8] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    subs w8, w0, w8
; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    tbz w8, #0, LBB55_1
; CHECK-OUTLINE-O0-NEXT:    b LBB55_2
; CHECK-OUTLINE-O0-NEXT:  LBB55_2: ; %atomicrmw.end
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #48
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_umin_i32:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    lduminal w1, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_umin_i32:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    lduminal w1, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw umin ptr %ptr, i32 %rhs seq_cst
  ret i32 %res
}

define i32 @atomicrmw_umax_i32(ptr %ptr, i32 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_umax_i32:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB56_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldxr w8, [x0]
; CHECK-NOLSE-O1-NEXT:    cmp w8, w1
; CHECK-NOLSE-O1-NEXT:    csel w9, w8, w1, hi
; CHECK-NOLSE-O1-NEXT:    stxr w10, w9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB56_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov w0, w8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_umax_i32:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:  LBB56_1: ; %atomicrmw.start
; CHECK-OUTLINE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O1-NEXT:    ldxr w8, [x0]
; CHECK-OUTLINE-O1-NEXT:    cmp w8, w1
; CHECK-OUTLINE-O1-NEXT:    csel w9, w8, w1, hi
; CHECK-OUTLINE-O1-NEXT:    stxr w10, w9, [x0]
; CHECK-OUTLINE-O1-NEXT:    cbnz w10, LBB56_1
; CHECK-OUTLINE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-OUTLINE-O1-NEXT:    mov w0, w8
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_umax_i32:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldr w8, [x0]
; CHECK-NOLSE-O0-NEXT:    str w8, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB56_1
; CHECK-NOLSE-O0-NEXT:  LBB56_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB56_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr w8, [sp, #28] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr w9, [sp, #24] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    subs w10, w8, w9
; CHECK-NOLSE-O0-NEXT:    csel w12, w8, w9, hi
; CHECK-NOLSE-O0-NEXT:  LBB56_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB56_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxr w9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp w9, w8
; CHECK-NOLSE-O0-NEXT:    b.ne LBB56_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB56_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxr w10, w12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB56_2
; CHECK-NOLSE-O0-NEXT:  LBB56_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB56_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    subs w8, w9, w8
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #12] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str w9, [sp, #28] ; 4-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB56_1
; CHECK-NOLSE-O0-NEXT:    b LBB56_5
; CHECK-NOLSE-O0-NEXT:  LBB56_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_umax_i32:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #48
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 48
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str w1, [sp, #24] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [x0]
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    b LBB56_1
; CHECK-OUTLINE-O0-NEXT:  LBB56_1: ; %atomicrmw.start
; CHECK-OUTLINE-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [sp, #28] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #16] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #24] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #8] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    subs w9, w0, w8
; CHECK-OUTLINE-O0-NEXT:    csel w1, w0, w8, hi
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas4_relax
; CHECK-OUTLINE-O0-NEXT:    ldr w8, [sp, #8] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    subs w8, w0, w8
; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #28] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    tbz w8, #0, LBB56_1
; CHECK-OUTLINE-O0-NEXT:    b LBB56_2
; CHECK-OUTLINE-O0-NEXT:  LBB56_2: ; %atomicrmw.end
; CHECK-OUTLINE-O0-NEXT:    ldr w0, [sp, #12] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #48
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_umax_i32:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldumax w1, w0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_umax_i32:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldumax w1, w0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw umax ptr %ptr, i32 %rhs monotonic
  ret i32 %res
}

define i64 @atomicrmw_add_i64(ptr %ptr, i64 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_add_i64:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB57_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldaxr x8, [x0]
; CHECK-NOLSE-O1-NEXT:    add x9, x8, x1
; CHECK-NOLSE-O1-NEXT:    stlxr w10, x9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB57_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov x0, x8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_add_i64:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    .cfi_def_cfa_offset 16
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT:    mov x2, x0
; CHECK-OUTLINE-O1-NEXT:    mov x0, x1
; CHECK-OUTLINE-O1-NEXT:    mov x1, x2
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_ldadd8_acq_rel
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_add_i64:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str x1, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldr x8, [x0]
; CHECK-NOLSE-O0-NEXT:    str x8, [sp, #24] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB57_1
; CHECK-NOLSE-O0-NEXT:  LBB57_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB57_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #24] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #8] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x9, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add x12, x8, x9
; CHECK-NOLSE-O0-NEXT:  LBB57_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB57_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
; CHECK-NOLSE-O0-NEXT:    b.ne LBB57_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB57_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxr w10, x12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB57_2
; CHECK-NOLSE-O0-NEXT:  LBB57_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB57_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    subs x8, x9, x8
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str x9, [sp] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str x9, [sp, #24] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB57_1
; CHECK-NOLSE-O0-NEXT:    b LBB57_5
; CHECK-NOLSE-O0-NEXT:  LBB57_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr x0, [sp] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_add_i64:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov x0, x1
; CHECK-OUTLINE-O0-NEXT:    ldr x1, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_ldadd8_acq_rel
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_add_i64:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldaddal x1, x0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_add_i64:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldaddal x1, x0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw add ptr %ptr, i64 %rhs seq_cst
  ret i64 %res
}

define i64 @atomicrmw_xchg_i64(ptr %ptr, i64 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_xchg_i64:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB58_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldxr x8, [x0]
; CHECK-NOLSE-O1-NEXT:    stxr w9, x1, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB58_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov x0, x8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_xchg_i64:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    .cfi_def_cfa_offset 16
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT:    mov x2, x0
; CHECK-OUTLINE-O1-NEXT:    mov x0, x1
; CHECK-OUTLINE-O1-NEXT:    mov x1, x2
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_swp8_relax
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_xchg_i64:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str x1, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldr x8, [x0]
; CHECK-NOLSE-O0-NEXT:    str x8, [sp, #24] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB58_1
; CHECK-NOLSE-O0-NEXT:  LBB58_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB58_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #24] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #8] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x12, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:  LBB58_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB58_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
; CHECK-NOLSE-O0-NEXT:    b.ne LBB58_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB58_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxr w10, x12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB58_2
; CHECK-NOLSE-O0-NEXT:  LBB58_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB58_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    subs x8, x9, x8
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str x9, [sp] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str x9, [sp, #24] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB58_1
; CHECK-NOLSE-O0-NEXT:    b LBB58_5
; CHECK-NOLSE-O0-NEXT:  LBB58_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr x0, [sp] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_xchg_i64:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov x0, x1
; CHECK-OUTLINE-O0-NEXT:    ldr x1, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_swp8_relax
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_xchg_i64:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    swp x1, x0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_xchg_i64:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    swp x1, x0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw xchg ptr %ptr, i64 %rhs monotonic
  ret i64 %res
}

define i64 @atomicrmw_sub_i64(ptr %ptr, i64 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_sub_i64:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB59_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldaxr x8, [x0]
; CHECK-NOLSE-O1-NEXT:    sub x9, x8, x1
; CHECK-NOLSE-O1-NEXT:    stxr w10, x9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB59_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov x0, x8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_sub_i64:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    .cfi_def_cfa_offset 16
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT:    mov x2, x0
; CHECK-OUTLINE-O1-NEXT:    neg x0, x1
; CHECK-OUTLINE-O1-NEXT:    mov x1, x2
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_ldadd8_acq
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_sub_i64:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str x1, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldr x8, [x0]
; CHECK-NOLSE-O0-NEXT:    str x8, [sp, #24] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB59_1
; CHECK-NOLSE-O0-NEXT:  LBB59_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB59_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #24] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #8] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x9, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    subs x12, x8, x9
; CHECK-NOLSE-O0-NEXT:  LBB59_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB59_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
; CHECK-NOLSE-O0-NEXT:    b.ne LBB59_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB59_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxr w10, x12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB59_2
; CHECK-NOLSE-O0-NEXT:  LBB59_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB59_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    subs x8, x9, x8
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str x9, [sp] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str x9, [sp, #24] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB59_1
; CHECK-NOLSE-O0-NEXT:    b LBB59_5
; CHECK-NOLSE-O0-NEXT:  LBB59_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr x0, [sp] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_sub_i64:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov x9, x1
; CHECK-OUTLINE-O0-NEXT:    ldr x1, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    mov x8, xzr
; CHECK-OUTLINE-O0-NEXT:    subs x0, x8, x9
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_ldadd8_acq
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_sub_i64:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    neg x8, x1
; CHECK-LSE-O1-NEXT:    ldadda x8, x0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_sub_i64:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    neg x8, x1
; CHECK-LSE-O0-NEXT:    ldadda x8, x0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw sub ptr %ptr, i64 %rhs acquire
  ret i64 %res
}

define i64 @atomicrmw_and_i64(ptr %ptr, i64 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_and_i64:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB60_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldxr x8, [x0]
; CHECK-NOLSE-O1-NEXT:    and x9, x8, x1
; CHECK-NOLSE-O1-NEXT:    stlxr w10, x9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB60_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov x0, x8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_and_i64:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    .cfi_def_cfa_offset 16
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT:    mov x2, x0
; CHECK-OUTLINE-O1-NEXT:    mvn x0, x1
; CHECK-OUTLINE-O1-NEXT:    mov x1, x2
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_ldclr8_rel
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_and_i64:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str x1, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldr x8, [x0]
; CHECK-NOLSE-O0-NEXT:    str x8, [sp, #24] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB60_1
; CHECK-NOLSE-O0-NEXT:  LBB60_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB60_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #24] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #8] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x9, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    and x12, x8, x9
; CHECK-NOLSE-O0-NEXT:  LBB60_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB60_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
; CHECK-NOLSE-O0-NEXT:    b.ne LBB60_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB60_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxr w10, x12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB60_2
; CHECK-NOLSE-O0-NEXT:  LBB60_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB60_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    subs x8, x9, x8
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str x9, [sp] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str x9, [sp, #24] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB60_1
; CHECK-NOLSE-O0-NEXT:    b LBB60_5
; CHECK-NOLSE-O0-NEXT:  LBB60_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr x0, [sp] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_and_i64:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov x9, x1
; CHECK-OUTLINE-O0-NEXT:    ldr x1, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    mov x8, #-1 ; =0xffffffffffffffff
; CHECK-OUTLINE-O0-NEXT:    eor x0, x8, x9
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_ldclr8_rel
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_and_i64:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    mvn x8, x1
; CHECK-LSE-O1-NEXT:    ldclrl x8, x0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_and_i64:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    mvn x8, x1
; CHECK-LSE-O0-NEXT:    ldclrl x8, x0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw and ptr %ptr, i64 %rhs release
  ret i64 %res
}

define i64 @atomicrmw_or_i64(ptr %ptr, i64 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_or_i64:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB61_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldaxr x8, [x0]
; CHECK-NOLSE-O1-NEXT:    orr x9, x8, x1
; CHECK-NOLSE-O1-NEXT:    stlxr w10, x9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB61_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov x0, x8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_or_i64:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    .cfi_def_cfa_offset 16
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT:    mov x2, x0
; CHECK-OUTLINE-O1-NEXT:    mov x0, x1
; CHECK-OUTLINE-O1-NEXT:    mov x1, x2
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_ldset8_acq_rel
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_or_i64:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str x1, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldr x8, [x0]
; CHECK-NOLSE-O0-NEXT:    str x8, [sp, #24] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB61_1
; CHECK-NOLSE-O0-NEXT:  LBB61_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB61_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #24] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #8] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x9, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    orr x12, x8, x9
; CHECK-NOLSE-O0-NEXT:  LBB61_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB61_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
; CHECK-NOLSE-O0-NEXT:    b.ne LBB61_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB61_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxr w10, x12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB61_2
; CHECK-NOLSE-O0-NEXT:  LBB61_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB61_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    subs x8, x9, x8
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str x9, [sp] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str x9, [sp, #24] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB61_1
; CHECK-NOLSE-O0-NEXT:    b LBB61_5
; CHECK-NOLSE-O0-NEXT:  LBB61_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr x0, [sp] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_or_i64:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov x0, x1
; CHECK-OUTLINE-O0-NEXT:    ldr x1, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_ldset8_acq_rel
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_or_i64:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldsetal x1, x0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_or_i64:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldsetal x1, x0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw or ptr %ptr, i64 %rhs seq_cst
  ret i64 %res
}

define i64 @atomicrmw_xor_i64(ptr %ptr, i64 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_xor_i64:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB62_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldxr x8, [x0]
; CHECK-NOLSE-O1-NEXT:    eor x9, x8, x1
; CHECK-NOLSE-O1-NEXT:    stxr w10, x9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB62_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov x0, x8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_xor_i64:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    .cfi_def_cfa_offset 16
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT:    mov x2, x0
; CHECK-OUTLINE-O1-NEXT:    mov x0, x1
; CHECK-OUTLINE-O1-NEXT:    mov x1, x2
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_ldeor8_relax
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_xor_i64:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str x1, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldr x8, [x0]
; CHECK-NOLSE-O0-NEXT:    str x8, [sp, #24] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB62_1
; CHECK-NOLSE-O0-NEXT:  LBB62_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB62_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #24] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #8] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x9, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    eor x12, x8, x9
; CHECK-NOLSE-O0-NEXT:  LBB62_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB62_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
; CHECK-NOLSE-O0-NEXT:    b.ne LBB62_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB62_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxr w10, x12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB62_2
; CHECK-NOLSE-O0-NEXT:  LBB62_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB62_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    subs x8, x9, x8
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str x9, [sp] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str x9, [sp, #24] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB62_1
; CHECK-NOLSE-O0-NEXT:    b LBB62_5
; CHECK-NOLSE-O0-NEXT:  LBB62_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr x0, [sp] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_xor_i64:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov x0, x1
; CHECK-OUTLINE-O0-NEXT:    ldr x1, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_ldeor8_relax
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_xor_i64:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldeor x1, x0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_xor_i64:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldeor x1, x0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw xor ptr %ptr, i64 %rhs monotonic
  ret i64 %res
}

define i64 @atomicrmw_min_i64(ptr %ptr, i64 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_min_i64:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB63_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldaxr x8, [x0]
; CHECK-NOLSE-O1-NEXT:    cmp x8, x1
; CHECK-NOLSE-O1-NEXT:    csel x9, x8, x1, lt
; CHECK-NOLSE-O1-NEXT:    stxr w10, x9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB63_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov x0, x8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_min_i64:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:  LBB63_1: ; %atomicrmw.start
; CHECK-OUTLINE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O1-NEXT:    ldaxr x8, [x0]
; CHECK-OUTLINE-O1-NEXT:    cmp x8, x1
; CHECK-OUTLINE-O1-NEXT:    csel x9, x8, x1, lt
; CHECK-OUTLINE-O1-NEXT:    stxr w10, x9, [x0]
; CHECK-OUTLINE-O1-NEXT:    cbnz w10, LBB63_1
; CHECK-OUTLINE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-OUTLINE-O1-NEXT:    mov x0, x8
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_min_i64:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str x1, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldr x8, [x0]
; CHECK-NOLSE-O0-NEXT:    str x8, [sp, #24] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB63_1
; CHECK-NOLSE-O0-NEXT:  LBB63_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB63_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #24] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #8] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x9, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    subs x10, x8, x9
; CHECK-NOLSE-O0-NEXT:    csel x12, x8, x9, le
; CHECK-NOLSE-O0-NEXT:  LBB63_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB63_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
; CHECK-NOLSE-O0-NEXT:    b.ne LBB63_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB63_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxr w10, x12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB63_2
; CHECK-NOLSE-O0-NEXT:  LBB63_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB63_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    subs x8, x9, x8
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str x9, [sp] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str x9, [sp, #24] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB63_1
; CHECK-NOLSE-O0-NEXT:    b LBB63_5
; CHECK-NOLSE-O0-NEXT:  LBB63_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr x0, [sp] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_min_i64:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #64
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 64
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #24] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str x1, [sp, #32] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    ldr x0, [x0]
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #40] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    b LBB63_1
; CHECK-OUTLINE-O0-NEXT:  LBB63_1: ; %atomicrmw.start
; CHECK-OUTLINE-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O0-NEXT:    ldr x0, [sp, #40] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #24] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr x8, [sp, #32] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    subs x9, x0, x8
; CHECK-OUTLINE-O0-NEXT:    csel x1, x0, x8, le
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas8_acq
; CHECK-OUTLINE-O0-NEXT:    ldr x8, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    subs x8, x0, x8
; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #40] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    tbz w8, #0, LBB63_1
; CHECK-OUTLINE-O0-NEXT:    b LBB63_2
; CHECK-OUTLINE-O0-NEXT:  LBB63_2: ; %atomicrmw.end
; CHECK-OUTLINE-O0-NEXT:    ldr x0, [sp, #16] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #64
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_min_i64:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldsmina x1, x0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_min_i64:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldsmina x1, x0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw min ptr %ptr, i64 %rhs acquire
  ret i64 %res
}

define i64 @atomicrmw_max_i64(ptr %ptr, i64 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_max_i64:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB64_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldxr x8, [x0]
; CHECK-NOLSE-O1-NEXT:    cmp x8, x1
; CHECK-NOLSE-O1-NEXT:    csel x9, x8, x1, gt
; CHECK-NOLSE-O1-NEXT:    stlxr w10, x9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB64_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov x0, x8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_max_i64:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:  LBB64_1: ; %atomicrmw.start
; CHECK-OUTLINE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O1-NEXT:    ldxr x8, [x0]
; CHECK-OUTLINE-O1-NEXT:    cmp x8, x1
; CHECK-OUTLINE-O1-NEXT:    csel x9, x8, x1, gt
; CHECK-OUTLINE-O1-NEXT:    stlxr w10, x9, [x0]
; CHECK-OUTLINE-O1-NEXT:    cbnz w10, LBB64_1
; CHECK-OUTLINE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-OUTLINE-O1-NEXT:    mov x0, x8
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_max_i64:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str x1, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldr x8, [x0]
; CHECK-NOLSE-O0-NEXT:    str x8, [sp, #24] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB64_1
; CHECK-NOLSE-O0-NEXT:  LBB64_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB64_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #24] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #8] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x9, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    subs x10, x8, x9
; CHECK-NOLSE-O0-NEXT:    csel x12, x8, x9, gt
; CHECK-NOLSE-O0-NEXT:  LBB64_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB64_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
; CHECK-NOLSE-O0-NEXT:    b.ne LBB64_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB64_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxr w10, x12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB64_2
; CHECK-NOLSE-O0-NEXT:  LBB64_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB64_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    subs x8, x9, x8
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str x9, [sp] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str x9, [sp, #24] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB64_1
; CHECK-NOLSE-O0-NEXT:    b LBB64_5
; CHECK-NOLSE-O0-NEXT:  LBB64_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr x0, [sp] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_max_i64:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #64
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 64
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #24] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str x1, [sp, #32] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    ldr x0, [x0]
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #40] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    b LBB64_1
; CHECK-OUTLINE-O0-NEXT:  LBB64_1: ; %atomicrmw.start
; CHECK-OUTLINE-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O0-NEXT:    ldr x0, [sp, #40] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #24] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr x8, [sp, #32] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    subs x9, x0, x8
; CHECK-OUTLINE-O0-NEXT:    csel x1, x0, x8, gt
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas8_rel
; CHECK-OUTLINE-O0-NEXT:    ldr x8, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    subs x8, x0, x8
; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #40] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    tbz w8, #0, LBB64_1
; CHECK-OUTLINE-O0-NEXT:    b LBB64_2
; CHECK-OUTLINE-O0-NEXT:  LBB64_2: ; %atomicrmw.end
; CHECK-OUTLINE-O0-NEXT:    ldr x0, [sp, #16] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #64
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_max_i64:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldsmaxl x1, x0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_max_i64:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldsmaxl x1, x0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw max ptr %ptr, i64 %rhs release
  ret i64 %res
}

define i64 @atomicrmw_umin_i64(ptr %ptr, i64 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_umin_i64:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB65_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldaxr x8, [x0]
; CHECK-NOLSE-O1-NEXT:    cmp x8, x1
; CHECK-NOLSE-O1-NEXT:    csel x9, x8, x1, lo
; CHECK-NOLSE-O1-NEXT:    stlxr w10, x9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB65_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov x0, x8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_umin_i64:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:  LBB65_1: ; %atomicrmw.start
; CHECK-OUTLINE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O1-NEXT:    ldaxr x8, [x0]
; CHECK-OUTLINE-O1-NEXT:    cmp x8, x1
; CHECK-OUTLINE-O1-NEXT:    csel x9, x8, x1, lo
; CHECK-OUTLINE-O1-NEXT:    stlxr w10, x9, [x0]
; CHECK-OUTLINE-O1-NEXT:    cbnz w10, LBB65_1
; CHECK-OUTLINE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-OUTLINE-O1-NEXT:    mov x0, x8
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_umin_i64:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str x1, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldr x8, [x0]
; CHECK-NOLSE-O0-NEXT:    str x8, [sp, #24] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB65_1
; CHECK-NOLSE-O0-NEXT:  LBB65_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB65_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #24] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #8] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x9, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    subs x10, x8, x9
; CHECK-NOLSE-O0-NEXT:    csel x12, x8, x9, ls
; CHECK-NOLSE-O0-NEXT:  LBB65_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB65_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
; CHECK-NOLSE-O0-NEXT:    b.ne LBB65_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB65_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxr w10, x12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB65_2
; CHECK-NOLSE-O0-NEXT:  LBB65_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB65_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    subs x8, x9, x8
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str x9, [sp] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str x9, [sp, #24] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB65_1
; CHECK-NOLSE-O0-NEXT:    b LBB65_5
; CHECK-NOLSE-O0-NEXT:  LBB65_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr x0, [sp] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_umin_i64:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #64
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 64
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #24] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str x1, [sp, #32] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    ldr x0, [x0]
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #40] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    b LBB65_1
; CHECK-OUTLINE-O0-NEXT:  LBB65_1: ; %atomicrmw.start
; CHECK-OUTLINE-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O0-NEXT:    ldr x0, [sp, #40] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #24] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr x8, [sp, #32] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    subs x9, x0, x8
; CHECK-OUTLINE-O0-NEXT:    csel x1, x0, x8, ls
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas8_acq_rel
; CHECK-OUTLINE-O0-NEXT:    ldr x8, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    subs x8, x0, x8
; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #40] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    tbz w8, #0, LBB65_1
; CHECK-OUTLINE-O0-NEXT:    b LBB65_2
; CHECK-OUTLINE-O0-NEXT:  LBB65_2: ; %atomicrmw.end
; CHECK-OUTLINE-O0-NEXT:    ldr x0, [sp, #16] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #64
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_umin_i64:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    lduminal x1, x0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_umin_i64:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    lduminal x1, x0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw umin ptr %ptr, i64 %rhs seq_cst
  ret i64 %res
}

define i64 @atomicrmw_umax_i64(ptr %ptr, i64 %rhs) {
; CHECK-NOLSE-O1-LABEL: atomicrmw_umax_i64:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:  LBB66_1: ; %atomicrmw.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldxr x8, [x0]
; CHECK-NOLSE-O1-NEXT:    cmp x8, x1
; CHECK-NOLSE-O1-NEXT:    csel x9, x8, x1, hi
; CHECK-NOLSE-O1-NEXT:    stxr w10, x9, [x0]
; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB66_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-NOLSE-O1-NEXT:    mov x0, x8
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: atomicrmw_umax_i64:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:  LBB66_1: ; %atomicrmw.start
; CHECK-OUTLINE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O1-NEXT:    ldxr x8, [x0]
; CHECK-OUTLINE-O1-NEXT:    cmp x8, x1
; CHECK-OUTLINE-O1-NEXT:    csel x9, x8, x1, hi
; CHECK-OUTLINE-O1-NEXT:    stxr w10, x9, [x0]
; CHECK-OUTLINE-O1-NEXT:    cbnz w10, LBB66_1
; CHECK-OUTLINE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
; CHECK-OUTLINE-O1-NEXT:    mov x0, x8
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: atomicrmw_umax_i64:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    sub sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NOLSE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str x1, [sp, #16] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    ldr x8, [x0]
; CHECK-NOLSE-O0-NEXT:    str x8, [sp, #24] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    b LBB66_1
; CHECK-NOLSE-O0-NEXT:  LBB66_1: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ; Child Loop BB66_2 Depth 2
; CHECK-NOLSE-O0-NEXT:    ldr x8, [sp, #24] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x11, [sp, #8] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    ldr x9, [sp, #16] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    subs x10, x8, x9
; CHECK-NOLSE-O0-NEXT:    csel x12, x8, x9, hi
; CHECK-NOLSE-O0-NEXT:  LBB66_2: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; Parent Loop BB66_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NOLSE-O0-NEXT:    ldaxr x9, [x11]
; CHECK-NOLSE-O0-NEXT:    cmp x9, x8
; CHECK-NOLSE-O0-NEXT:    b.ne LBB66_4
; CHECK-NOLSE-O0-NEXT:  ; %bb.3: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB66_2 Depth=2
; CHECK-NOLSE-O0-NEXT:    stlxr w10, x12, [x11]
; CHECK-NOLSE-O0-NEXT:    cbnz w10, LBB66_2
; CHECK-NOLSE-O0-NEXT:  LBB66_4: ; %atomicrmw.start
; CHECK-NOLSE-O0-NEXT:    ; in Loop: Header=BB66_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    subs x8, x9, x8
; CHECK-NOLSE-O0-NEXT:    cset w8, eq
; CHECK-NOLSE-O0-NEXT:    str x9, [sp] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    str x9, [sp, #24] ; 8-byte Folded Spill
; CHECK-NOLSE-O0-NEXT:    tbz w8, #0, LBB66_1
; CHECK-NOLSE-O0-NEXT:    b LBB66_5
; CHECK-NOLSE-O0-NEXT:  LBB66_5: ; %atomicrmw.end
; CHECK-NOLSE-O0-NEXT:    ldr x0, [sp] ; 8-byte Folded Reload
; CHECK-NOLSE-O0-NEXT:    add sp, sp, #32
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: atomicrmw_umax_i64:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #64
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 64
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #24] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str x1, [sp, #32] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    ldr x0, [x0]
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #40] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    b LBB66_1
; CHECK-OUTLINE-O0-NEXT:  LBB66_1: ; %atomicrmw.start
; CHECK-OUTLINE-O0-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-OUTLINE-O0-NEXT:    ldr x0, [sp, #40] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp, #24] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldr x8, [sp, #32] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    subs x9, x0, x8
; CHECK-OUTLINE-O0-NEXT:    csel x1, x0, x8, hi
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas8_relax
; CHECK-OUTLINE-O0-NEXT:    ldr x8, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    subs x8, x0, x8
; CHECK-OUTLINE-O0-NEXT:    cset w8, eq
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #16] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #40] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    tbz w8, #0, LBB66_1
; CHECK-OUTLINE-O0-NEXT:    b LBB66_2
; CHECK-OUTLINE-O0-NEXT:  LBB66_2: ; %atomicrmw.end
; CHECK-OUTLINE-O0-NEXT:    ldr x0, [sp, #16] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #64
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: atomicrmw_umax_i64:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldumax x1, x0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: atomicrmw_umax_i64:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldumax x1, x0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %res = atomicrmw umax ptr %ptr, i64 %rhs monotonic
  ret i64 %res
}

define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
; CHECK-NOLSE-O1-LABEL: cmpxchg_i8:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:    mov x8, x0
; CHECK-NOLSE-O1-NEXT:    ; kill: def $w2 killed $w2 def $x2
; CHECK-NOLSE-O1-NEXT:  LBB67_1: ; %cmpxchg.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldxrb w0, [x8]
; CHECK-NOLSE-O1-NEXT:    and w9, w0, #0xff
; CHECK-NOLSE-O1-NEXT:    cmp w9, w1, uxtb
; CHECK-NOLSE-O1-NEXT:    b.ne LBB67_4
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %cmpxchg.trystore
; CHECK-NOLSE-O1-NEXT:    ; in Loop: Header=BB67_1 Depth=1
; CHECK-NOLSE-O1-NEXT:    stxrb w9, w2, [x8]
; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB67_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.3:
; CHECK-NOLSE-O1-NEXT:    mov w1, #1 ; =0x1
; CHECK-NOLSE-O1-NEXT:    ; kill: def $w0 killed $w0 killed $x0
; CHECK-NOLSE-O1-NEXT:    ret
; CHECK-NOLSE-O1-NEXT:  LBB67_4: ; %cmpxchg.nostore
; CHECK-NOLSE-O1-NEXT:    mov w1, wzr
; CHECK-NOLSE-O1-NEXT:    clrex
; CHECK-NOLSE-O1-NEXT:    ; kill: def $w0 killed $w0 killed $x0
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: cmpxchg_i8:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w19, -24
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w20, -32
; CHECK-OUTLINE-O1-NEXT:    mov x3, x0
; CHECK-OUTLINE-O1-NEXT:    mov w19, w1
; CHECK-OUTLINE-O1-NEXT:    mov w1, w2
; CHECK-OUTLINE-O1-NEXT:    mov w0, w19
; CHECK-OUTLINE-O1-NEXT:    mov x2, x3
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_cas1_relax
; CHECK-OUTLINE-O1-NEXT:    and w8, w0, #0xff
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    cmp w8, w19, uxtb
; CHECK-OUTLINE-O1-NEXT:    cset w1, eq
; CHECK-OUTLINE-O1-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: cmpxchg_i8:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    mov x9, x0
; CHECK-NOLSE-O0-NEXT:  LBB67_1: ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ldaxrb w0, [x9]
; CHECK-NOLSE-O0-NEXT:    cmp w0, w1, uxtb
; CHECK-NOLSE-O0-NEXT:    b.ne LBB67_3
; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB67_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    stlxrb w8, w2, [x9]
; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB67_1
; CHECK-NOLSE-O0-NEXT:  LBB67_3:
; CHECK-NOLSE-O0-NEXT:    and w8, w0, #0xff
; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w1, uxtb
; CHECK-NOLSE-O0-NEXT:    cset w1, eq
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: cmpxchg_i8:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov w0, w1
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov w1, w2
; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas1_relax
; CHECK-OUTLINE-O0-NEXT:    ldr w1, [sp, #12] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    and w8, w0, #0xff
; CHECK-OUTLINE-O0-NEXT:    subs w8, w8, w1, uxtb
; CHECK-OUTLINE-O0-NEXT:    cset w1, eq
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: cmpxchg_i8:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    mov x8, x1
; CHECK-LSE-O1-NEXT:    casb w8, w2, [x0]
; CHECK-LSE-O1-NEXT:    and w9, w8, #0xff
; CHECK-LSE-O1-NEXT:    cmp w9, w1, uxtb
; CHECK-LSE-O1-NEXT:    cset w1, eq
; CHECK-LSE-O1-NEXT:    mov x0, x8
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: cmpxchg_i8:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    mov x8, x0
; CHECK-LSE-O0-NEXT:    mov x0, x1
; CHECK-LSE-O0-NEXT:    casb w0, w2, [x8]
; CHECK-LSE-O0-NEXT:    and w8, w0, #0xff
; CHECK-LSE-O0-NEXT:    subs w8, w8, w1, uxtb
; CHECK-LSE-O0-NEXT:    cset w1, eq
; CHECK-LSE-O0-NEXT:    ret
  %res = cmpxchg ptr %ptr, i8 %desired, i8 %new monotonic monotonic
  ret { i8, i1 } %res
}

define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
; CHECK-NOLSE-O1-LABEL: cmpxchg_i16:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:    mov x8, x0
; CHECK-NOLSE-O1-NEXT:    ; kill: def $w2 killed $w2 def $x2
; CHECK-NOLSE-O1-NEXT:  LBB68_1: ; %cmpxchg.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldxrh w0, [x8]
; CHECK-NOLSE-O1-NEXT:    and w9, w0, #0xffff
; CHECK-NOLSE-O1-NEXT:    cmp w9, w1, uxth
; CHECK-NOLSE-O1-NEXT:    b.ne LBB68_4
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %cmpxchg.trystore
; CHECK-NOLSE-O1-NEXT:    ; in Loop: Header=BB68_1 Depth=1
; CHECK-NOLSE-O1-NEXT:    stxrh w9, w2, [x8]
; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB68_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.3:
; CHECK-NOLSE-O1-NEXT:    mov w1, #1 ; =0x1
; CHECK-NOLSE-O1-NEXT:    ; kill: def $w0 killed $w0 killed $x0
; CHECK-NOLSE-O1-NEXT:    ret
; CHECK-NOLSE-O1-NEXT:  LBB68_4: ; %cmpxchg.nostore
; CHECK-NOLSE-O1-NEXT:    mov w1, wzr
; CHECK-NOLSE-O1-NEXT:    clrex
; CHECK-NOLSE-O1-NEXT:    ; kill: def $w0 killed $w0 killed $x0
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: cmpxchg_i16:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w19, -24
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w20, -32
; CHECK-OUTLINE-O1-NEXT:    mov x3, x0
; CHECK-OUTLINE-O1-NEXT:    mov w19, w1
; CHECK-OUTLINE-O1-NEXT:    mov w1, w2
; CHECK-OUTLINE-O1-NEXT:    mov w0, w19
; CHECK-OUTLINE-O1-NEXT:    mov x2, x3
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_cas2_relax
; CHECK-OUTLINE-O1-NEXT:    and w8, w0, #0xffff
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    cmp w8, w19, uxth
; CHECK-OUTLINE-O1-NEXT:    cset w1, eq
; CHECK-OUTLINE-O1-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: cmpxchg_i16:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    mov x9, x0
; CHECK-NOLSE-O0-NEXT:  LBB68_1: ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ldaxrh w0, [x9]
; CHECK-NOLSE-O0-NEXT:    cmp w0, w1, uxth
; CHECK-NOLSE-O0-NEXT:    b.ne LBB68_3
; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB68_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    stlxrh w8, w2, [x9]
; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB68_1
; CHECK-NOLSE-O0-NEXT:  LBB68_3:
; CHECK-NOLSE-O0-NEXT:    and w8, w0, #0xffff
; CHECK-NOLSE-O0-NEXT:    subs w8, w8, w1, uxth
; CHECK-NOLSE-O0-NEXT:    cset w1, eq
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: cmpxchg_i16:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov w0, w1
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov w1, w2
; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas2_relax
; CHECK-OUTLINE-O0-NEXT:    ldr w1, [sp, #12] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    and w8, w0, #0xffff
; CHECK-OUTLINE-O0-NEXT:    subs w8, w8, w1, uxth
; CHECK-OUTLINE-O0-NEXT:    cset w1, eq
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: cmpxchg_i16:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    mov x8, x1
; CHECK-LSE-O1-NEXT:    cash w8, w2, [x0]
; CHECK-LSE-O1-NEXT:    and w9, w8, #0xffff
; CHECK-LSE-O1-NEXT:    cmp w9, w1, uxth
; CHECK-LSE-O1-NEXT:    cset w1, eq
; CHECK-LSE-O1-NEXT:    mov x0, x8
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: cmpxchg_i16:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    mov x8, x0
; CHECK-LSE-O0-NEXT:    mov x0, x1
; CHECK-LSE-O0-NEXT:    cash w0, w2, [x8]
; CHECK-LSE-O0-NEXT:    and w8, w0, #0xffff
; CHECK-LSE-O0-NEXT:    subs w8, w8, w1, uxth
; CHECK-LSE-O0-NEXT:    cset w1, eq
; CHECK-LSE-O0-NEXT:    ret
  %res = cmpxchg ptr %ptr, i16 %desired, i16 %new monotonic monotonic
  ret { i16, i1 } %res
}

define { i32, i1 } @cmpxchg_i32(ptr %ptr, i32 %desired, i32 %new) {
; CHECK-NOLSE-O1-LABEL: cmpxchg_i32:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:    mov x8, x0
; CHECK-NOLSE-O1-NEXT:  LBB69_1: ; %cmpxchg.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldxr w0, [x8]
; CHECK-NOLSE-O1-NEXT:    cmp w0, w1
; CHECK-NOLSE-O1-NEXT:    b.ne LBB69_4
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %cmpxchg.trystore
; CHECK-NOLSE-O1-NEXT:    ; in Loop: Header=BB69_1 Depth=1
; CHECK-NOLSE-O1-NEXT:    stxr w9, w2, [x8]
; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB69_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.3:
; CHECK-NOLSE-O1-NEXT:    mov w1, #1 ; =0x1
; CHECK-NOLSE-O1-NEXT:    ; kill: def $w0 killed $w0 killed $x0
; CHECK-NOLSE-O1-NEXT:    ret
; CHECK-NOLSE-O1-NEXT:  LBB69_4: ; %cmpxchg.nostore
; CHECK-NOLSE-O1-NEXT:    mov w1, wzr
; CHECK-NOLSE-O1-NEXT:    clrex
; CHECK-NOLSE-O1-NEXT:    ; kill: def $w0 killed $w0 killed $x0
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: cmpxchg_i32:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w19, -24
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w20, -32
; CHECK-OUTLINE-O1-NEXT:    mov x3, x0
; CHECK-OUTLINE-O1-NEXT:    mov w19, w1
; CHECK-OUTLINE-O1-NEXT:    mov w1, w2
; CHECK-OUTLINE-O1-NEXT:    mov w0, w19
; CHECK-OUTLINE-O1-NEXT:    mov x2, x3
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_cas4_relax
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    cmp w0, w19
; CHECK-OUTLINE-O1-NEXT:    cset w1, eq
; CHECK-OUTLINE-O1-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: cmpxchg_i32:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    mov x9, x0
; CHECK-NOLSE-O0-NEXT:  LBB69_1: ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ldaxr w0, [x9]
; CHECK-NOLSE-O0-NEXT:    cmp w0, w1
; CHECK-NOLSE-O0-NEXT:    b.ne LBB69_3
; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB69_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    stlxr w8, w2, [x9]
; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB69_1
; CHECK-NOLSE-O0-NEXT:  LBB69_3:
; CHECK-NOLSE-O0-NEXT:    subs w8, w0, w1
; CHECK-NOLSE-O0-NEXT:    cset w1, eq
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: cmpxchg_i32:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov w0, w1
; CHECK-OUTLINE-O0-NEXT:    str w0, [sp, #12] ; 4-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov w1, w2
; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas4_relax
; CHECK-OUTLINE-O0-NEXT:    ldr w1, [sp, #12] ; 4-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    subs w8, w0, w1
; CHECK-OUTLINE-O0-NEXT:    cset w1, eq
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: cmpxchg_i32:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    mov x8, x1
; CHECK-LSE-O1-NEXT:    cas w8, w2, [x0]
; CHECK-LSE-O1-NEXT:    cmp w8, w1
; CHECK-LSE-O1-NEXT:    cset w1, eq
; CHECK-LSE-O1-NEXT:    mov x0, x8
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: cmpxchg_i32:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    mov x8, x0
; CHECK-LSE-O0-NEXT:    mov x0, x1
; CHECK-LSE-O0-NEXT:    cas w0, w2, [x8]
; CHECK-LSE-O0-NEXT:    subs w8, w0, w1
; CHECK-LSE-O0-NEXT:    cset w1, eq
; CHECK-LSE-O0-NEXT:    ret
  %res = cmpxchg ptr %ptr, i32 %desired, i32 %new monotonic monotonic
  ret { i32, i1 } %res
}

define { i64, i1 } @cmpxchg_i64(ptr %ptr, i64 %desired, i64 %new) {
; CHECK-NOLSE-O1-LABEL: cmpxchg_i64:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:    mov x8, x0
; CHECK-NOLSE-O1-NEXT:  LBB70_1: ; %cmpxchg.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldxr x0, [x8]
; CHECK-NOLSE-O1-NEXT:    cmp x0, x1
; CHECK-NOLSE-O1-NEXT:    b.ne LBB70_4
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %cmpxchg.trystore
; CHECK-NOLSE-O1-NEXT:    ; in Loop: Header=BB70_1 Depth=1
; CHECK-NOLSE-O1-NEXT:    stxr w9, x2, [x8]
; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB70_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.3:
; CHECK-NOLSE-O1-NEXT:    mov w1, #1 ; =0x1
; CHECK-NOLSE-O1-NEXT:    ret
; CHECK-NOLSE-O1-NEXT:  LBB70_4: ; %cmpxchg.nostore
; CHECK-NOLSE-O1-NEXT:    mov w1, wzr
; CHECK-NOLSE-O1-NEXT:    clrex
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: cmpxchg_i64:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w19, -24
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w20, -32
; CHECK-OUTLINE-O1-NEXT:    mov x3, x0
; CHECK-OUTLINE-O1-NEXT:    mov x19, x1
; CHECK-OUTLINE-O1-NEXT:    mov x1, x2
; CHECK-OUTLINE-O1-NEXT:    mov x0, x19
; CHECK-OUTLINE-O1-NEXT:    mov x2, x3
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_cas8_relax
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    cmp x0, x19
; CHECK-OUTLINE-O1-NEXT:    cset w1, eq
; CHECK-OUTLINE-O1-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: cmpxchg_i64:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    mov x9, x0
; CHECK-NOLSE-O0-NEXT:  LBB70_1: ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ldaxr x0, [x9]
; CHECK-NOLSE-O0-NEXT:    cmp x0, x1
; CHECK-NOLSE-O0-NEXT:    b.ne LBB70_3
; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB70_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    stlxr w8, x2, [x9]
; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB70_1
; CHECK-NOLSE-O0-NEXT:  LBB70_3:
; CHECK-NOLSE-O0-NEXT:    subs x8, x0, x1
; CHECK-NOLSE-O0-NEXT:    cset w1, eq
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: cmpxchg_i64:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov x0, x1
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov x1, x2
; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas8_relax
; CHECK-OUTLINE-O0-NEXT:    ldr x1, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    subs x8, x0, x1
; CHECK-OUTLINE-O0-NEXT:    cset w1, eq
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: cmpxchg_i64:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    mov x8, x1
; CHECK-LSE-O1-NEXT:    cas x8, x2, [x0]
; CHECK-LSE-O1-NEXT:    cmp x8, x1
; CHECK-LSE-O1-NEXT:    cset w1, eq
; CHECK-LSE-O1-NEXT:    mov x0, x8
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: cmpxchg_i64:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    mov x8, x0
; CHECK-LSE-O0-NEXT:    mov x0, x1
; CHECK-LSE-O0-NEXT:    cas x0, x2, [x8]
; CHECK-LSE-O0-NEXT:    subs x8, x0, x1
; CHECK-LSE-O0-NEXT:    cset w1, eq
; CHECK-LSE-O0-NEXT:    ret
  %res = cmpxchg ptr %ptr, i64 %desired, i64 %new monotonic monotonic
  ret { i64, i1 } %res
}

define { ptr, i1 } @cmpxchg_ptr(ptr %ptr, ptr %desired, ptr %new) {
; CHECK-NOLSE-O1-LABEL: cmpxchg_ptr:
; CHECK-NOLSE-O1:       ; %bb.0:
; CHECK-NOLSE-O1-NEXT:    mov x8, x0
; CHECK-NOLSE-O1-NEXT:  LBB71_1: ; %cmpxchg.start
; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT:    ldxr x0, [x8]
; CHECK-NOLSE-O1-NEXT:    cmp x0, x1
; CHECK-NOLSE-O1-NEXT:    b.ne LBB71_4
; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %cmpxchg.trystore
; CHECK-NOLSE-O1-NEXT:    ; in Loop: Header=BB71_1 Depth=1
; CHECK-NOLSE-O1-NEXT:    stxr w9, x2, [x8]
; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB71_1
; CHECK-NOLSE-O1-NEXT:  ; %bb.3:
; CHECK-NOLSE-O1-NEXT:    mov w1, #1 ; =0x1
; CHECK-NOLSE-O1-NEXT:    ret
; CHECK-NOLSE-O1-NEXT:  LBB71_4: ; %cmpxchg.nostore
; CHECK-NOLSE-O1-NEXT:    mov w1, wzr
; CHECK-NOLSE-O1-NEXT:    clrex
; CHECK-NOLSE-O1-NEXT:    ret
;
; CHECK-OUTLINE-O1-LABEL: cmpxchg_ptr:
; CHECK-OUTLINE-O1:       ; %bb.0:
; CHECK-OUTLINE-O1-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O1-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w19, -24
; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w20, -32
; CHECK-OUTLINE-O1-NEXT:    mov x3, x0
; CHECK-OUTLINE-O1-NEXT:    mov x19, x1
; CHECK-OUTLINE-O1-NEXT:    mov x1, x2
; CHECK-OUTLINE-O1-NEXT:    mov x0, x19
; CHECK-OUTLINE-O1-NEXT:    mov x2, x3
; CHECK-OUTLINE-O1-NEXT:    bl ___aarch64_cas8_relax
; CHECK-OUTLINE-O1-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    cmp x0, x19
; CHECK-OUTLINE-O1-NEXT:    cset w1, eq
; CHECK-OUTLINE-O1-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
; CHECK-OUTLINE-O1-NEXT:    ret
;
; CHECK-NOLSE-O0-LABEL: cmpxchg_ptr:
; CHECK-NOLSE-O0:       ; %bb.0:
; CHECK-NOLSE-O0-NEXT:    mov x9, x0
; CHECK-NOLSE-O0-NEXT:  LBB71_1: ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O0-NEXT:    ldaxr x0, [x9]
; CHECK-NOLSE-O0-NEXT:    cmp x0, x1
; CHECK-NOLSE-O0-NEXT:    b.ne LBB71_3
; CHECK-NOLSE-O0-NEXT:  ; %bb.2: ; in Loop: Header=BB71_1 Depth=1
; CHECK-NOLSE-O0-NEXT:    stlxr w8, x2, [x9]
; CHECK-NOLSE-O0-NEXT:    cbnz w8, LBB71_1
; CHECK-NOLSE-O0-NEXT:  LBB71_3:
; CHECK-NOLSE-O0-NEXT:    subs x8, x0, x1
; CHECK-NOLSE-O0-NEXT:    cset w1, eq
; CHECK-NOLSE-O0-NEXT:    ret
;
; CHECK-OUTLINE-O0-LABEL: cmpxchg_ptr:
; CHECK-OUTLINE-O0:       ; %bb.0:
; CHECK-OUTLINE-O0-NEXT:    sub sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    .cfi_def_cfa_offset 32
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w30, -8
; CHECK-OUTLINE-O0-NEXT:    .cfi_offset w29, -16
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov x0, x1
; CHECK-OUTLINE-O0-NEXT:    str x0, [sp, #8] ; 8-byte Folded Spill
; CHECK-OUTLINE-O0-NEXT:    mov x1, x2
; CHECK-OUTLINE-O0-NEXT:    ldr x2, [sp] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    bl ___aarch64_cas8_relax
; CHECK-OUTLINE-O0-NEXT:    ldr x1, [sp, #8] ; 8-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    subs x8, x0, x1
; CHECK-OUTLINE-O0-NEXT:    cset w1, eq
; CHECK-OUTLINE-O0-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
; CHECK-OUTLINE-O0-NEXT:    add sp, sp, #32
; CHECK-OUTLINE-O0-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: cmpxchg_ptr:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    mov x8, x1
; CHECK-LSE-O1-NEXT:    cas x8, x2, [x0]
; CHECK-LSE-O1-NEXT:    cmp x8, x1
; CHECK-LSE-O1-NEXT:    cset w1, eq
; CHECK-LSE-O1-NEXT:    mov x0, x8
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: cmpxchg_ptr:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    mov x8, x0
; CHECK-LSE-O0-NEXT:    mov x0, x1
; CHECK-LSE-O0-NEXT:    cas x0, x2, [x8]
; CHECK-LSE-O0-NEXT:    subs x8, x0, x1
; CHECK-LSE-O0-NEXT:    cset w1, eq
; CHECK-LSE-O0-NEXT:    ret
  %res = cmpxchg ptr %ptr, ptr %desired, ptr %new monotonic monotonic
  ret { ptr, i1 } %res
}

define internal double @bitcast_to_double(ptr %ptr) {
; CHECK-NOLSE-LABEL: bitcast_to_double:
; CHECK-NOLSE:       ; %bb.0:
; CHECK-NOLSE-NEXT:    ldar x8, [x0]
; CHECK-NOLSE-NEXT:    fmov d0, x8
; CHECK-NOLSE-NEXT:    ret
;
; CHECK-OUTLINE-LABEL: bitcast_to_double:
; CHECK-OUTLINE:       ; %bb.0:
; CHECK-OUTLINE-NEXT:    ldar x8, [x0]
; CHECK-OUTLINE-NEXT:    fmov d0, x8
; CHECK-OUTLINE-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: bitcast_to_double:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldar x8, [x0]
; CHECK-LSE-O1-NEXT:    fmov d0, x8
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: bitcast_to_double:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldar x8, [x0]
; CHECK-LSE-O0-NEXT:    fmov d0, x8
; CHECK-LSE-O0-NEXT:    ret
  %load = load atomic i64, ptr %ptr seq_cst, align 8
  %bitcast = bitcast i64 %load to double
  ret double %bitcast
}

define internal float @bitcast_to_float(ptr %ptr) {
; CHECK-NOLSE-LABEL: bitcast_to_float:
; CHECK-NOLSE:       ; %bb.0:
; CHECK-NOLSE-NEXT:    ldar w8, [x0]
; CHECK-NOLSE-NEXT:    fmov s0, w8
; CHECK-NOLSE-NEXT:    ret
;
; CHECK-OUTLINE-LABEL: bitcast_to_float:
; CHECK-OUTLINE:       ; %bb.0:
; CHECK-OUTLINE-NEXT:    ldar w8, [x0]
; CHECK-OUTLINE-NEXT:    fmov s0, w8
; CHECK-OUTLINE-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: bitcast_to_float:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldar w8, [x0]
; CHECK-LSE-O1-NEXT:    fmov s0, w8
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: bitcast_to_float:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldar w8, [x0]
; CHECK-LSE-O0-NEXT:    fmov s0, w8
; CHECK-LSE-O0-NEXT:    ret
  %load = load atomic i32, ptr %ptr seq_cst, align 8
  %bitcast = bitcast i32 %load to float
  ret float %bitcast
}

define internal half @bitcast_to_half(ptr %ptr) {
; CHECK-NOLSE-LABEL: bitcast_to_half:
; CHECK-NOLSE:       ; %bb.0:
; CHECK-NOLSE-NEXT:    ldarh w8, [x0]
; CHECK-NOLSE-NEXT:    fmov s0, w8
; CHECK-NOLSE-NEXT:    ; kill: def $h0 killed $h0 killed $s0
; CHECK-NOLSE-NEXT:    ret
;
; CHECK-OUTLINE-LABEL: bitcast_to_half:
; CHECK-OUTLINE:       ; %bb.0:
; CHECK-OUTLINE-NEXT:    ldarh w8, [x0]
; CHECK-OUTLINE-NEXT:    fmov s0, w8
; CHECK-OUTLINE-NEXT:    ; kill: def $h0 killed $h0 killed $s0
; CHECK-OUTLINE-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: bitcast_to_half:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldarh w8, [x0]
; CHECK-LSE-O1-NEXT:    fmov s0, w8
; CHECK-LSE-O1-NEXT:    ; kill: def $h0 killed $h0 killed $s0
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: bitcast_to_half:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldarh w8, [x0]
; CHECK-LSE-O0-NEXT:    fmov s0, w8
; CHECK-LSE-O0-NEXT:    ; kill: def $h0 killed $h0 killed $s0
; CHECK-LSE-O0-NEXT:    ret
  %load = load atomic i16, ptr %ptr seq_cst, align 8
  %bitcast = bitcast i16 %load to half
  ret half %bitcast
}

define internal ptr @inttoptr(ptr %ptr) {
; CHECK-NOLSE-LABEL: inttoptr:
; CHECK-NOLSE:       ; %bb.0:
; CHECK-NOLSE-NEXT:    ldar x0, [x0]
; CHECK-NOLSE-NEXT:    ret
;
; CHECK-OUTLINE-LABEL: inttoptr:
; CHECK-OUTLINE:       ; %bb.0:
; CHECK-OUTLINE-NEXT:    ldar x0, [x0]
; CHECK-OUTLINE-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: inttoptr:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldar x0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: inttoptr:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldar x0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %load = load atomic i64, ptr %ptr seq_cst, align 8
  %bitcast = inttoptr i64 %load to ptr
  ret ptr %bitcast
}

define internal ptr @load_ptr(ptr %ptr) {
; CHECK-NOLSE-LABEL: load_ptr:
; CHECK-NOLSE:       ; %bb.0:
; CHECK-NOLSE-NEXT:    ldar x0, [x0]
; CHECK-NOLSE-NEXT:    ret
;
; CHECK-OUTLINE-LABEL: load_ptr:
; CHECK-OUTLINE:       ; %bb.0:
; CHECK-OUTLINE-NEXT:    ldar x0, [x0]
; CHECK-OUTLINE-NEXT:    ret
;
; CHECK-LSE-O1-LABEL: load_ptr:
; CHECK-LSE-O1:       ; %bb.0:
; CHECK-LSE-O1-NEXT:    ldar x0, [x0]
; CHECK-LSE-O1-NEXT:    ret
;
; CHECK-LSE-O0-LABEL: load_ptr:
; CHECK-LSE-O0:       ; %bb.0:
; CHECK-LSE-O0-NEXT:    ldar x0, [x0]
; CHECK-LSE-O0-NEXT:    ret
  %load = load atomic ptr, ptr %ptr seq_cst, align 8
  ret ptr %load
}

attributes #0 = { nounwind }