llvm/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE


target triple = "aarch64-unknown-linux-gnu"

;
; Masked Store
;

define void @masked_store_v4i8(ptr %dst, <4 x i1> %mask) {
; CHECK-LABEL: masked_store_v4i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT:    ptrue p0.h, vl4
; CHECK-NEXT:    lsl z0.h, z0.h, #15
; CHECK-NEXT:    asr z0.h, z0.h, #15
; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
; CHECK-NEXT:    mov z0.h, #0 // =0x0
; CHECK-NEXT:    st1b { z0.h }, p0, [x0]
; CHECK-NEXT:    ret
;
; NONEON-NOSVE-LABEL: masked_store_v4i8:
; NONEON-NOSVE:       // %bb.0:
; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #2]
; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #4]
; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #6]
; NONEON-NOSVE-NEXT:    ldrh w11, [sp]
; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w10, w10, #0, #1
; NONEON-NOSVE-NEXT:    and w8, w8, #0x2
; NONEON-NOSVE-NEXT:    and w9, w9, #0x4
; NONEON-NOSVE-NEXT:    and w10, w10, #0x8
; NONEON-NOSVE-NEXT:    bfxil w8, w11, #0, #1
; NONEON-NOSVE-NEXT:    orr w9, w9, w10
; NONEON-NOSVE-NEXT:    orr w8, w8, w9
; NONEON-NOSVE-NEXT:    tbnz w8, #0, .LBB0_5
; NONEON-NOSVE-NEXT:  // %bb.1: // %else
; NONEON-NOSVE-NEXT:    tbnz w8, #1, .LBB0_6
; NONEON-NOSVE-NEXT:  .LBB0_2: // %else2
; NONEON-NOSVE-NEXT:    tbnz w8, #2, .LBB0_7
; NONEON-NOSVE-NEXT:  .LBB0_3: // %else4
; NONEON-NOSVE-NEXT:    tbnz w8, #3, .LBB0_8
; NONEON-NOSVE-NEXT:  .LBB0_4: // %else6
; NONEON-NOSVE-NEXT:    add sp, sp, #16
; NONEON-NOSVE-NEXT:    ret
; NONEON-NOSVE-NEXT:  .LBB0_5: // %cond.store
; NONEON-NOSVE-NEXT:    strb wzr, [x0]
; NONEON-NOSVE-NEXT:    tbz w8, #1, .LBB0_2
; NONEON-NOSVE-NEXT:  .LBB0_6: // %cond.store1
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #1]
; NONEON-NOSVE-NEXT:    tbz w8, #2, .LBB0_3
; NONEON-NOSVE-NEXT:  .LBB0_7: // %cond.store3
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #2]
; NONEON-NOSVE-NEXT:    tbz w8, #3, .LBB0_4
; NONEON-NOSVE-NEXT:  .LBB0_8: // %cond.store5
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #3]
; NONEON-NOSVE-NEXT:    add sp, sp, #16
; NONEON-NOSVE-NEXT:    ret
  call void @llvm.masked.store.v4i8(<4 x i8> zeroinitializer, ptr %dst, i32 8, <4 x i1> %mask)
  ret void
}

define void @masked_store_v8i8(ptr %dst, <8 x i1> %mask) {
; CHECK-LABEL: masked_store_v8i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT:    ptrue p0.b, vl8
; CHECK-NEXT:    lsl z0.b, z0.b, #7
; CHECK-NEXT:    asr z0.b, z0.b, #7
; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, #0
; CHECK-NEXT:    mov z0.b, #0 // =0x0
; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
; CHECK-NEXT:    ret
;
; NONEON-NOSVE-LABEL: masked_store_v8i8:
; NONEON-NOSVE:       // %bb.0:
; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #2]
; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #3]
; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #1]
; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #4]
; NONEON-NOSVE-NEXT:    ldrb w13, [sp, #5]
; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #6]
; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w10, w10, #0, #1
; NONEON-NOSVE-NEXT:    ldrb w11, [sp]
; NONEON-NOSVE-NEXT:    sbfx w12, w12, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w13, w13, #0, #1
; NONEON-NOSVE-NEXT:    and w8, w8, #0x4
; NONEON-NOSVE-NEXT:    and w9, w9, #0x8
; NONEON-NOSVE-NEXT:    sbfx w14, w14, #0, #1
; NONEON-NOSVE-NEXT:    orr w8, w8, w9
; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #7]
; NONEON-NOSVE-NEXT:    and w10, w10, #0x2
; NONEON-NOSVE-NEXT:    and w12, w12, #0x10
; NONEON-NOSVE-NEXT:    bfxil w10, w11, #0, #1
; NONEON-NOSVE-NEXT:    and w11, w13, #0x20
; NONEON-NOSVE-NEXT:    orr w8, w8, w12
; NONEON-NOSVE-NEXT:    and w12, w14, #0x40
; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
; NONEON-NOSVE-NEXT:    orr w8, w10, w8
; NONEON-NOSVE-NEXT:    orr w10, w11, w12
; NONEON-NOSVE-NEXT:    orr w8, w8, w10
; NONEON-NOSVE-NEXT:    and w9, w9, #0x80
; NONEON-NOSVE-NEXT:    add w9, w8, w9
; NONEON-NOSVE-NEXT:    and w8, w9, #0xff
; NONEON-NOSVE-NEXT:    tbnz w9, #0, .LBB1_9
; NONEON-NOSVE-NEXT:  // %bb.1: // %else
; NONEON-NOSVE-NEXT:    tbnz w8, #1, .LBB1_10
; NONEON-NOSVE-NEXT:  .LBB1_2: // %else2
; NONEON-NOSVE-NEXT:    tbnz w8, #2, .LBB1_11
; NONEON-NOSVE-NEXT:  .LBB1_3: // %else4
; NONEON-NOSVE-NEXT:    tbnz w8, #3, .LBB1_12
; NONEON-NOSVE-NEXT:  .LBB1_4: // %else6
; NONEON-NOSVE-NEXT:    tbnz w8, #4, .LBB1_13
; NONEON-NOSVE-NEXT:  .LBB1_5: // %else8
; NONEON-NOSVE-NEXT:    tbnz w8, #5, .LBB1_14
; NONEON-NOSVE-NEXT:  .LBB1_6: // %else10
; NONEON-NOSVE-NEXT:    tbnz w8, #6, .LBB1_15
; NONEON-NOSVE-NEXT:  .LBB1_7: // %else12
; NONEON-NOSVE-NEXT:    tbnz w8, #7, .LBB1_16
; NONEON-NOSVE-NEXT:  .LBB1_8: // %else14
; NONEON-NOSVE-NEXT:    add sp, sp, #16
; NONEON-NOSVE-NEXT:    ret
; NONEON-NOSVE-NEXT:  .LBB1_9: // %cond.store
; NONEON-NOSVE-NEXT:    strb wzr, [x0]
; NONEON-NOSVE-NEXT:    tbz w8, #1, .LBB1_2
; NONEON-NOSVE-NEXT:  .LBB1_10: // %cond.store1
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #1]
; NONEON-NOSVE-NEXT:    tbz w8, #2, .LBB1_3
; NONEON-NOSVE-NEXT:  .LBB1_11: // %cond.store3
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #2]
; NONEON-NOSVE-NEXT:    tbz w8, #3, .LBB1_4
; NONEON-NOSVE-NEXT:  .LBB1_12: // %cond.store5
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #3]
; NONEON-NOSVE-NEXT:    tbz w8, #4, .LBB1_5
; NONEON-NOSVE-NEXT:  .LBB1_13: // %cond.store7
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #4]
; NONEON-NOSVE-NEXT:    tbz w8, #5, .LBB1_6
; NONEON-NOSVE-NEXT:  .LBB1_14: // %cond.store9
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #5]
; NONEON-NOSVE-NEXT:    tbz w8, #6, .LBB1_7
; NONEON-NOSVE-NEXT:  .LBB1_15: // %cond.store11
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #6]
; NONEON-NOSVE-NEXT:    tbz w8, #7, .LBB1_8
; NONEON-NOSVE-NEXT:  .LBB1_16: // %cond.store13
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #7]
; NONEON-NOSVE-NEXT:    add sp, sp, #16
; NONEON-NOSVE-NEXT:    ret
  call void @llvm.masked.store.v8i8(<8 x i8> zeroinitializer, ptr %dst, i32 8, <8 x i1> %mask)
  ret void
}

define void @masked_store_v16i8(ptr %dst, <16 x i1> %mask) {
; CHECK-LABEL: masked_store_v16i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT:    ptrue p0.b, vl16
; CHECK-NEXT:    lsl z0.b, z0.b, #7
; CHECK-NEXT:    asr z0.b, z0.b, #7
; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, #0
; CHECK-NEXT:    mov z0.b, #0 // =0x0
; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
; CHECK-NEXT:    ret
;
; NONEON-NOSVE-LABEL: masked_store_v16i8:
; NONEON-NOSVE:       // %bb.0:
; NONEON-NOSVE-NEXT:    str q0, [sp, #-16]!
; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #2]
; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #3]
; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #1]
; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #4]
; NONEON-NOSVE-NEXT:    ldrb w13, [sp, #5]
; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #6]
; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w10, w10, #0, #1
; NONEON-NOSVE-NEXT:    ldrb w11, [sp]
; NONEON-NOSVE-NEXT:    sbfx w12, w12, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w13, w13, #0, #1
; NONEON-NOSVE-NEXT:    and w8, w8, #0x4
; NONEON-NOSVE-NEXT:    and w9, w9, #0x8
; NONEON-NOSVE-NEXT:    sbfx w14, w14, #0, #1
; NONEON-NOSVE-NEXT:    orr w8, w8, w9
; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #7]
; NONEON-NOSVE-NEXT:    and w10, w10, #0x2
; NONEON-NOSVE-NEXT:    and w12, w12, #0x10
; NONEON-NOSVE-NEXT:    bfxil w10, w11, #0, #1
; NONEON-NOSVE-NEXT:    and w11, w13, #0x20
; NONEON-NOSVE-NEXT:    orr w8, w8, w12
; NONEON-NOSVE-NEXT:    and w12, w14, #0x40
; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
; NONEON-NOSVE-NEXT:    orr w8, w10, w8
; NONEON-NOSVE-NEXT:    orr w10, w11, w12
; NONEON-NOSVE-NEXT:    orr w8, w8, w10
; NONEON-NOSVE-NEXT:    and w9, w9, #0xffffff80
; NONEON-NOSVE-NEXT:    add w8, w8, w9
; NONEON-NOSVE-NEXT:    tbnz w8, #0, .LBB2_17
; NONEON-NOSVE-NEXT:  // %bb.1: // %else
; NONEON-NOSVE-NEXT:    tbnz w8, #1, .LBB2_18
; NONEON-NOSVE-NEXT:  .LBB2_2: // %else2
; NONEON-NOSVE-NEXT:    tbnz w8, #2, .LBB2_19
; NONEON-NOSVE-NEXT:  .LBB2_3: // %else4
; NONEON-NOSVE-NEXT:    tbnz w8, #3, .LBB2_20
; NONEON-NOSVE-NEXT:  .LBB2_4: // %else6
; NONEON-NOSVE-NEXT:    tbnz w8, #4, .LBB2_21
; NONEON-NOSVE-NEXT:  .LBB2_5: // %else8
; NONEON-NOSVE-NEXT:    tbnz w8, #5, .LBB2_22
; NONEON-NOSVE-NEXT:  .LBB2_6: // %else10
; NONEON-NOSVE-NEXT:    tbnz w8, #6, .LBB2_23
; NONEON-NOSVE-NEXT:  .LBB2_7: // %else12
; NONEON-NOSVE-NEXT:    tbnz w8, #7, .LBB2_24
; NONEON-NOSVE-NEXT:  .LBB2_8: // %else14
; NONEON-NOSVE-NEXT:    tbnz w8, #8, .LBB2_25
; NONEON-NOSVE-NEXT:  .LBB2_9: // %else16
; NONEON-NOSVE-NEXT:    tbnz w8, #9, .LBB2_26
; NONEON-NOSVE-NEXT:  .LBB2_10: // %else18
; NONEON-NOSVE-NEXT:    tbnz w8, #10, .LBB2_27
; NONEON-NOSVE-NEXT:  .LBB2_11: // %else20
; NONEON-NOSVE-NEXT:    tbnz w8, #11, .LBB2_28
; NONEON-NOSVE-NEXT:  .LBB2_12: // %else22
; NONEON-NOSVE-NEXT:    tbnz w8, #12, .LBB2_29
; NONEON-NOSVE-NEXT:  .LBB2_13: // %else24
; NONEON-NOSVE-NEXT:    tbnz w8, #13, .LBB2_30
; NONEON-NOSVE-NEXT:  .LBB2_14: // %else26
; NONEON-NOSVE-NEXT:    tbnz w8, #14, .LBB2_31
; NONEON-NOSVE-NEXT:  .LBB2_15: // %else28
; NONEON-NOSVE-NEXT:    tbnz w8, #15, .LBB2_32
; NONEON-NOSVE-NEXT:  .LBB2_16: // %else30
; NONEON-NOSVE-NEXT:    add sp, sp, #16
; NONEON-NOSVE-NEXT:    ret
; NONEON-NOSVE-NEXT:  .LBB2_17: // %cond.store
; NONEON-NOSVE-NEXT:    strb wzr, [x0]
; NONEON-NOSVE-NEXT:    tbz w8, #1, .LBB2_2
; NONEON-NOSVE-NEXT:  .LBB2_18: // %cond.store1
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #1]
; NONEON-NOSVE-NEXT:    tbz w8, #2, .LBB2_3
; NONEON-NOSVE-NEXT:  .LBB2_19: // %cond.store3
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #2]
; NONEON-NOSVE-NEXT:    tbz w8, #3, .LBB2_4
; NONEON-NOSVE-NEXT:  .LBB2_20: // %cond.store5
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #3]
; NONEON-NOSVE-NEXT:    tbz w8, #4, .LBB2_5
; NONEON-NOSVE-NEXT:  .LBB2_21: // %cond.store7
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #4]
; NONEON-NOSVE-NEXT:    tbz w8, #5, .LBB2_6
; NONEON-NOSVE-NEXT:  .LBB2_22: // %cond.store9
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #5]
; NONEON-NOSVE-NEXT:    tbz w8, #6, .LBB2_7
; NONEON-NOSVE-NEXT:  .LBB2_23: // %cond.store11
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #6]
; NONEON-NOSVE-NEXT:    tbz w8, #7, .LBB2_8
; NONEON-NOSVE-NEXT:  .LBB2_24: // %cond.store13
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #7]
; NONEON-NOSVE-NEXT:    tbz w8, #8, .LBB2_9
; NONEON-NOSVE-NEXT:  .LBB2_25: // %cond.store15
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #8]
; NONEON-NOSVE-NEXT:    tbz w8, #9, .LBB2_10
; NONEON-NOSVE-NEXT:  .LBB2_26: // %cond.store17
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #9]
; NONEON-NOSVE-NEXT:    tbz w8, #10, .LBB2_11
; NONEON-NOSVE-NEXT:  .LBB2_27: // %cond.store19
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #10]
; NONEON-NOSVE-NEXT:    tbz w8, #11, .LBB2_12
; NONEON-NOSVE-NEXT:  .LBB2_28: // %cond.store21
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #11]
; NONEON-NOSVE-NEXT:    tbz w8, #12, .LBB2_13
; NONEON-NOSVE-NEXT:  .LBB2_29: // %cond.store23
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #12]
; NONEON-NOSVE-NEXT:    tbz w8, #13, .LBB2_14
; NONEON-NOSVE-NEXT:  .LBB2_30: // %cond.store25
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #13]
; NONEON-NOSVE-NEXT:    tbz w8, #14, .LBB2_15
; NONEON-NOSVE-NEXT:  .LBB2_31: // %cond.store27
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #14]
; NONEON-NOSVE-NEXT:    tbz w8, #15, .LBB2_16
; NONEON-NOSVE-NEXT:  .LBB2_32: // %cond.store29
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #15]
; NONEON-NOSVE-NEXT:    add sp, sp, #16
; NONEON-NOSVE-NEXT:    ret
  call void @llvm.masked.store.v16i8(<16 x i8> zeroinitializer, ptr %dst, i32 8, <16 x i1> %mask)
  ret void
}

define void @masked_store_v32i8(ptr %dst, <32 x i1> %mask) {
; CHECK-LABEL: masked_store_v32i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sub sp, sp, #32
; CHECK-NEXT:    .cfi_def_cfa_offset 32
; CHECK-NEXT:    ldr w8, [sp, #96]
; CHECK-NEXT:    ldr w9, [sp, #88]
; CHECK-NEXT:    ptrue p0.b, vl16
; CHECK-NEXT:    ldr w10, [sp, #120]
; CHECK-NEXT:    strb w7, [sp, #6]
; CHECK-NEXT:    strb w8, [sp, #15]
; CHECK-NEXT:    ldr w8, [sp, #80]
; CHECK-NEXT:    strb w9, [sp, #14]
; CHECK-NEXT:    ldr w9, [sp, #72]
; CHECK-NEXT:    strb w8, [sp, #13]
; CHECK-NEXT:    ldr w8, [sp, #64]
; CHECK-NEXT:    strb w9, [sp, #12]
; CHECK-NEXT:    ldr w9, [sp, #56]
; CHECK-NEXT:    strb w8, [sp, #11]
; CHECK-NEXT:    ldr w8, [sp, #48]
; CHECK-NEXT:    strb w9, [sp, #10]
; CHECK-NEXT:    ldr w9, [sp, #40]
; CHECK-NEXT:    strb w8, [sp, #9]
; CHECK-NEXT:    ldr w8, [sp, #32]
; CHECK-NEXT:    strb w9, [sp, #8]
; CHECK-NEXT:    ldr w9, [sp, #216]
; CHECK-NEXT:    strb w8, [sp, #7]
; CHECK-NEXT:    ldr w8, [sp, #224]
; CHECK-NEXT:    strb w9, [sp, #30]
; CHECK-NEXT:    ldr w9, [sp, #200]
; CHECK-NEXT:    strb w8, [sp, #31]
; CHECK-NEXT:    ldr w8, [sp, #208]
; CHECK-NEXT:    strb w9, [sp, #28]
; CHECK-NEXT:    ldr w9, [sp, #184]
; CHECK-NEXT:    strb w8, [sp, #29]
; CHECK-NEXT:    ldr w8, [sp, #192]
; CHECK-NEXT:    strb w9, [sp, #26]
; CHECK-NEXT:    ldr w9, [sp, #168]
; CHECK-NEXT:    strb w8, [sp, #27]
; CHECK-NEXT:    ldr w8, [sp, #176]
; CHECK-NEXT:    strb w9, [sp, #24]
; CHECK-NEXT:    ldr w9, [sp, #152]
; CHECK-NEXT:    strb w8, [sp, #25]
; CHECK-NEXT:    ldr w8, [sp, #160]
; CHECK-NEXT:    strb w9, [sp, #22]
; CHECK-NEXT:    ldr w9, [sp, #136]
; CHECK-NEXT:    strb w8, [sp, #23]
; CHECK-NEXT:    ldr w8, [sp, #144]
; CHECK-NEXT:    strb w9, [sp, #20]
; CHECK-NEXT:    ldr w9, [sp, #112]
; CHECK-NEXT:    strb w8, [sp, #21]
; CHECK-NEXT:    ldr w8, [sp, #128]
; CHECK-NEXT:    strb w6, [sp, #5]
; CHECK-NEXT:    strb w8, [sp, #19]
; CHECK-NEXT:    ldr w8, [sp, #104]
; CHECK-NEXT:    strb w5, [sp, #4]
; CHECK-NEXT:    strb w4, [sp, #3]
; CHECK-NEXT:    strb w3, [sp, #2]
; CHECK-NEXT:    strb w2, [sp, #1]
; CHECK-NEXT:    strb w1, [sp]
; CHECK-NEXT:    strb w10, [sp, #18]
; CHECK-NEXT:    strb w9, [sp, #17]
; CHECK-NEXT:    strb w8, [sp, #16]
; CHECK-NEXT:    mov w8, #16 // =0x10
; CHECK-NEXT:    ldp q1, q0, [sp]
; CHECK-NEXT:    lsl z0.b, z0.b, #7
; CHECK-NEXT:    lsl z1.b, z1.b, #7
; CHECK-NEXT:    asr z0.b, z0.b, #7
; CHECK-NEXT:    asr z1.b, z1.b, #7
; CHECK-NEXT:    cmpne p1.b, p0/z, z0.b, #0
; CHECK-NEXT:    cmpne p0.b, p0/z, z1.b, #0
; CHECK-NEXT:    mov z0.b, #0 // =0x0
; CHECK-NEXT:    st1b { z0.b }, p1, [x0, x8]
; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
; CHECK-NEXT:    add sp, sp, #32
; CHECK-NEXT:    ret
;
; NONEON-NOSVE-LABEL: masked_store_v32i8:
; NONEON-NOSVE:       // %bb.0:
; NONEON-NOSVE-NEXT:    ldr w8, [sp, #80]
; NONEON-NOSVE-NEXT:    ldr w9, [sp, #88]
; NONEON-NOSVE-NEXT:    sbfx w15, w7, #0, #1
; NONEON-NOSVE-NEXT:    ldr w10, [sp, #96]
; NONEON-NOSVE-NEXT:    ldr w12, [sp, #104]
; NONEON-NOSVE-NEXT:    ldr w11, [sp, #72]
; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
; NONEON-NOSVE-NEXT:    ldr w13, [sp, #120]
; NONEON-NOSVE-NEXT:    sbfx w10, w10, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w12, w12, #0, #1
; NONEON-NOSVE-NEXT:    ldr w14, [sp, #128]
; NONEON-NOSVE-NEXT:    and w8, w8, #0x2
; NONEON-NOSVE-NEXT:    and w9, w9, #0x4
; NONEON-NOSVE-NEXT:    ldr w16, [sp]
; NONEON-NOSVE-NEXT:    bfxil w8, w11, #0, #1
; NONEON-NOSVE-NEXT:    ldr w11, [sp, #112]
; NONEON-NOSVE-NEXT:    and w10, w10, #0x8
; NONEON-NOSVE-NEXT:    orr w9, w9, w10
; NONEON-NOSVE-NEXT:    and w10, w12, #0x10
; NONEON-NOSVE-NEXT:    sbfx w12, w4, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w11, w11, #0, #1
; NONEON-NOSVE-NEXT:    orr w9, w9, w10
; NONEON-NOSVE-NEXT:    sbfx w10, w13, #0, #1
; NONEON-NOSVE-NEXT:    orr w8, w8, w9
; NONEON-NOSVE-NEXT:    sbfx w13, w5, #0, #1
; NONEON-NOSVE-NEXT:    and w12, w12, #0x8
; NONEON-NOSVE-NEXT:    and w9, w11, #0x20
; NONEON-NOSVE-NEXT:    and w10, w10, #0x40
; NONEON-NOSVE-NEXT:    sbfx w11, w3, #0, #1
; NONEON-NOSVE-NEXT:    orr w9, w9, w10
; NONEON-NOSVE-NEXT:    sbfx w10, w2, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w14, w14, #0, #1
; NONEON-NOSVE-NEXT:    and w11, w11, #0x4
; NONEON-NOSVE-NEXT:    orr w8, w8, w9
; NONEON-NOSVE-NEXT:    and w10, w10, #0x2
; NONEON-NOSVE-NEXT:    orr w11, w11, w12
; NONEON-NOSVE-NEXT:    and w12, w13, #0x10
; NONEON-NOSVE-NEXT:    sbfx w13, w6, #0, #1
; NONEON-NOSVE-NEXT:    bfxil w10, w1, #0, #1
; NONEON-NOSVE-NEXT:    orr w11, w11, w12
; NONEON-NOSVE-NEXT:    and w12, w13, #0x20
; NONEON-NOSVE-NEXT:    and w13, w15, #0x40
; NONEON-NOSVE-NEXT:    sbfx w15, w16, #0, #1
; NONEON-NOSVE-NEXT:    orr w9, w10, w11
; NONEON-NOSVE-NEXT:    orr w10, w12, w13
; NONEON-NOSVE-NEXT:    and w11, w14, #0xff80
; NONEON-NOSVE-NEXT:    orr w9, w9, w10
; NONEON-NOSVE-NEXT:    and w10, w15, #0xff80
; NONEON-NOSVE-NEXT:    add w11, w8, w11
; NONEON-NOSVE-NEXT:    add w8, w9, w10
; NONEON-NOSVE-NEXT:    bfi w8, w11, #16, #16
; NONEON-NOSVE-NEXT:    tbnz w8, #0, .LBB3_33
; NONEON-NOSVE-NEXT:  // %bb.1: // %else
; NONEON-NOSVE-NEXT:    tbnz w8, #1, .LBB3_34
; NONEON-NOSVE-NEXT:  .LBB3_2: // %else2
; NONEON-NOSVE-NEXT:    tbnz w8, #2, .LBB3_35
; NONEON-NOSVE-NEXT:  .LBB3_3: // %else4
; NONEON-NOSVE-NEXT:    tbnz w8, #3, .LBB3_36
; NONEON-NOSVE-NEXT:  .LBB3_4: // %else6
; NONEON-NOSVE-NEXT:    tbnz w8, #4, .LBB3_37
; NONEON-NOSVE-NEXT:  .LBB3_5: // %else8
; NONEON-NOSVE-NEXT:    tbnz w8, #5, .LBB3_38
; NONEON-NOSVE-NEXT:  .LBB3_6: // %else10
; NONEON-NOSVE-NEXT:    tbnz w8, #6, .LBB3_39
; NONEON-NOSVE-NEXT:  .LBB3_7: // %else12
; NONEON-NOSVE-NEXT:    tbnz w8, #7, .LBB3_40
; NONEON-NOSVE-NEXT:  .LBB3_8: // %else14
; NONEON-NOSVE-NEXT:    tbnz w8, #8, .LBB3_41
; NONEON-NOSVE-NEXT:  .LBB3_9: // %else16
; NONEON-NOSVE-NEXT:    tbnz w8, #9, .LBB3_42
; NONEON-NOSVE-NEXT:  .LBB3_10: // %else18
; NONEON-NOSVE-NEXT:    tbnz w8, #10, .LBB3_43
; NONEON-NOSVE-NEXT:  .LBB3_11: // %else20
; NONEON-NOSVE-NEXT:    tbnz w8, #11, .LBB3_44
; NONEON-NOSVE-NEXT:  .LBB3_12: // %else22
; NONEON-NOSVE-NEXT:    tbnz w8, #12, .LBB3_45
; NONEON-NOSVE-NEXT:  .LBB3_13: // %else24
; NONEON-NOSVE-NEXT:    tbnz w8, #13, .LBB3_46
; NONEON-NOSVE-NEXT:  .LBB3_14: // %else26
; NONEON-NOSVE-NEXT:    tbnz w8, #14, .LBB3_47
; NONEON-NOSVE-NEXT:  .LBB3_15: // %else28
; NONEON-NOSVE-NEXT:    tbnz w8, #15, .LBB3_48
; NONEON-NOSVE-NEXT:  .LBB3_16: // %else30
; NONEON-NOSVE-NEXT:    tbnz w8, #16, .LBB3_49
; NONEON-NOSVE-NEXT:  .LBB3_17: // %else32
; NONEON-NOSVE-NEXT:    tbnz w8, #17, .LBB3_50
; NONEON-NOSVE-NEXT:  .LBB3_18: // %else34
; NONEON-NOSVE-NEXT:    tbnz w8, #18, .LBB3_51
; NONEON-NOSVE-NEXT:  .LBB3_19: // %else36
; NONEON-NOSVE-NEXT:    tbnz w8, #19, .LBB3_52
; NONEON-NOSVE-NEXT:  .LBB3_20: // %else38
; NONEON-NOSVE-NEXT:    tbnz w8, #20, .LBB3_53
; NONEON-NOSVE-NEXT:  .LBB3_21: // %else40
; NONEON-NOSVE-NEXT:    tbnz w8, #21, .LBB3_54
; NONEON-NOSVE-NEXT:  .LBB3_22: // %else42
; NONEON-NOSVE-NEXT:    tbnz w8, #22, .LBB3_55
; NONEON-NOSVE-NEXT:  .LBB3_23: // %else44
; NONEON-NOSVE-NEXT:    tbnz w8, #23, .LBB3_56
; NONEON-NOSVE-NEXT:  .LBB3_24: // %else46
; NONEON-NOSVE-NEXT:    tbnz w8, #24, .LBB3_57
; NONEON-NOSVE-NEXT:  .LBB3_25: // %else48
; NONEON-NOSVE-NEXT:    tbnz w8, #25, .LBB3_58
; NONEON-NOSVE-NEXT:  .LBB3_26: // %else50
; NONEON-NOSVE-NEXT:    tbnz w8, #26, .LBB3_59
; NONEON-NOSVE-NEXT:  .LBB3_27: // %else52
; NONEON-NOSVE-NEXT:    tbnz w8, #27, .LBB3_60
; NONEON-NOSVE-NEXT:  .LBB3_28: // %else54
; NONEON-NOSVE-NEXT:    tbnz w8, #28, .LBB3_61
; NONEON-NOSVE-NEXT:  .LBB3_29: // %else56
; NONEON-NOSVE-NEXT:    tbnz w8, #29, .LBB3_62
; NONEON-NOSVE-NEXT:  .LBB3_30: // %else58
; NONEON-NOSVE-NEXT:    tbnz w8, #30, .LBB3_63
; NONEON-NOSVE-NEXT:  .LBB3_31: // %else60
; NONEON-NOSVE-NEXT:    tbnz w8, #31, .LBB3_64
; NONEON-NOSVE-NEXT:  .LBB3_32: // %else62
; NONEON-NOSVE-NEXT:    ret
; NONEON-NOSVE-NEXT:  .LBB3_33: // %cond.store
; NONEON-NOSVE-NEXT:    strb wzr, [x0]
; NONEON-NOSVE-NEXT:    tbz w8, #1, .LBB3_2
; NONEON-NOSVE-NEXT:  .LBB3_34: // %cond.store1
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #1]
; NONEON-NOSVE-NEXT:    tbz w8, #2, .LBB3_3
; NONEON-NOSVE-NEXT:  .LBB3_35: // %cond.store3
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #2]
; NONEON-NOSVE-NEXT:    tbz w8, #3, .LBB3_4
; NONEON-NOSVE-NEXT:  .LBB3_36: // %cond.store5
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #3]
; NONEON-NOSVE-NEXT:    tbz w8, #4, .LBB3_5
; NONEON-NOSVE-NEXT:  .LBB3_37: // %cond.store7
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #4]
; NONEON-NOSVE-NEXT:    tbz w8, #5, .LBB3_6
; NONEON-NOSVE-NEXT:  .LBB3_38: // %cond.store9
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #5]
; NONEON-NOSVE-NEXT:    tbz w8, #6, .LBB3_7
; NONEON-NOSVE-NEXT:  .LBB3_39: // %cond.store11
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #6]
; NONEON-NOSVE-NEXT:    tbz w8, #7, .LBB3_8
; NONEON-NOSVE-NEXT:  .LBB3_40: // %cond.store13
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #7]
; NONEON-NOSVE-NEXT:    tbz w8, #8, .LBB3_9
; NONEON-NOSVE-NEXT:  .LBB3_41: // %cond.store15
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #8]
; NONEON-NOSVE-NEXT:    tbz w8, #9, .LBB3_10
; NONEON-NOSVE-NEXT:  .LBB3_42: // %cond.store17
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #9]
; NONEON-NOSVE-NEXT:    tbz w8, #10, .LBB3_11
; NONEON-NOSVE-NEXT:  .LBB3_43: // %cond.store19
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #10]
; NONEON-NOSVE-NEXT:    tbz w8, #11, .LBB3_12
; NONEON-NOSVE-NEXT:  .LBB3_44: // %cond.store21
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #11]
; NONEON-NOSVE-NEXT:    tbz w8, #12, .LBB3_13
; NONEON-NOSVE-NEXT:  .LBB3_45: // %cond.store23
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #12]
; NONEON-NOSVE-NEXT:    tbz w8, #13, .LBB3_14
; NONEON-NOSVE-NEXT:  .LBB3_46: // %cond.store25
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #13]
; NONEON-NOSVE-NEXT:    tbz w8, #14, .LBB3_15
; NONEON-NOSVE-NEXT:  .LBB3_47: // %cond.store27
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #14]
; NONEON-NOSVE-NEXT:    tbz w8, #15, .LBB3_16
; NONEON-NOSVE-NEXT:  .LBB3_48: // %cond.store29
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #15]
; NONEON-NOSVE-NEXT:    tbz w8, #16, .LBB3_17
; NONEON-NOSVE-NEXT:  .LBB3_49: // %cond.store31
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #16]
; NONEON-NOSVE-NEXT:    tbz w8, #17, .LBB3_18
; NONEON-NOSVE-NEXT:  .LBB3_50: // %cond.store33
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #17]
; NONEON-NOSVE-NEXT:    tbz w8, #18, .LBB3_19
; NONEON-NOSVE-NEXT:  .LBB3_51: // %cond.store35
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #18]
; NONEON-NOSVE-NEXT:    tbz w8, #19, .LBB3_20
; NONEON-NOSVE-NEXT:  .LBB3_52: // %cond.store37
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #19]
; NONEON-NOSVE-NEXT:    tbz w8, #20, .LBB3_21
; NONEON-NOSVE-NEXT:  .LBB3_53: // %cond.store39
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #20]
; NONEON-NOSVE-NEXT:    tbz w8, #21, .LBB3_22
; NONEON-NOSVE-NEXT:  .LBB3_54: // %cond.store41
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #21]
; NONEON-NOSVE-NEXT:    tbz w8, #22, .LBB3_23
; NONEON-NOSVE-NEXT:  .LBB3_55: // %cond.store43
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #22]
; NONEON-NOSVE-NEXT:    tbz w8, #23, .LBB3_24
; NONEON-NOSVE-NEXT:  .LBB3_56: // %cond.store45
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #23]
; NONEON-NOSVE-NEXT:    tbz w8, #24, .LBB3_25
; NONEON-NOSVE-NEXT:  .LBB3_57: // %cond.store47
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #24]
; NONEON-NOSVE-NEXT:    tbz w8, #25, .LBB3_26
; NONEON-NOSVE-NEXT:  .LBB3_58: // %cond.store49
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #25]
; NONEON-NOSVE-NEXT:    tbz w8, #26, .LBB3_27
; NONEON-NOSVE-NEXT:  .LBB3_59: // %cond.store51
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #26]
; NONEON-NOSVE-NEXT:    tbz w8, #27, .LBB3_28
; NONEON-NOSVE-NEXT:  .LBB3_60: // %cond.store53
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #27]
; NONEON-NOSVE-NEXT:    tbz w8, #28, .LBB3_29
; NONEON-NOSVE-NEXT:  .LBB3_61: // %cond.store55
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #28]
; NONEON-NOSVE-NEXT:    tbz w8, #29, .LBB3_30
; NONEON-NOSVE-NEXT:  .LBB3_62: // %cond.store57
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #29]
; NONEON-NOSVE-NEXT:    tbz w8, #30, .LBB3_31
; NONEON-NOSVE-NEXT:  .LBB3_63: // %cond.store59
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #30]
; NONEON-NOSVE-NEXT:    tbz w8, #31, .LBB3_32
; NONEON-NOSVE-NEXT:  .LBB3_64: // %cond.store61
; NONEON-NOSVE-NEXT:    strb wzr, [x0, #31]
; NONEON-NOSVE-NEXT:    ret
  call void @llvm.masked.store.v32i8(<32 x i8> zeroinitializer, ptr %dst, i32 8, <32 x i1> %mask)
  ret void
}

define void @masked_store_v2f16(ptr %dst, <2 x i1> %mask) {
; CHECK-LABEL: masked_store_v2f16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sub sp, sp, #16
; CHECK-NEXT:    .cfi_def_cfa_offset 16
; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT:    mov z1.s, z0.s[1]
; CHECK-NEXT:    fmov w8, s0
; CHECK-NEXT:    str wzr, [sp, #12]
; CHECK-NEXT:    ptrue p0.h, vl4
; CHECK-NEXT:    strh w8, [sp, #8]
; CHECK-NEXT:    fmov w8, s1
; CHECK-NEXT:    strh w8, [sp, #10]
; CHECK-NEXT:    ldr d0, [sp, #8]
; CHECK-NEXT:    lsl z0.h, z0.h, #15
; CHECK-NEXT:    asr z0.h, z0.h, #15
; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
; CHECK-NEXT:    mov z0.h, #0 // =0x0
; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
; CHECK-NEXT:    add sp, sp, #16
; CHECK-NEXT:    ret
;
; NONEON-NOSVE-LABEL: masked_store_v2f16:
; NONEON-NOSVE:       // %bb.0:
; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT:    ldr w8, [sp, #4]
; NONEON-NOSVE-NEXT:    ldrb w9, [sp]
; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
; NONEON-NOSVE-NEXT:    and w8, w8, #0x2
; NONEON-NOSVE-NEXT:    bfxil w8, w9, #0, #1
; NONEON-NOSVE-NEXT:    tbnz w8, #0, .LBB4_3
; NONEON-NOSVE-NEXT:  // %bb.1: // %else
; NONEON-NOSVE-NEXT:    tbnz w8, #1, .LBB4_4
; NONEON-NOSVE-NEXT:  .LBB4_2: // %else2
; NONEON-NOSVE-NEXT:    add sp, sp, #16
; NONEON-NOSVE-NEXT:    ret
; NONEON-NOSVE-NEXT:  .LBB4_3: // %cond.store
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0]
; NONEON-NOSVE-NEXT:    tbz w8, #1, .LBB4_2
; NONEON-NOSVE-NEXT:  .LBB4_4: // %cond.store1
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0, #2]
; NONEON-NOSVE-NEXT:    add sp, sp, #16
; NONEON-NOSVE-NEXT:    ret
  call void @llvm.masked.store.v2f16(<2 x half> zeroinitializer, ptr %dst, i32 8, <2 x i1> %mask)
  ret void
}

define void @masked_store_v4f16(ptr %dst, <4 x i1> %mask) {
; CHECK-LABEL: masked_store_v4f16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT:    ptrue p0.h, vl4
; CHECK-NEXT:    lsl z0.h, z0.h, #15
; CHECK-NEXT:    asr z0.h, z0.h, #15
; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
; CHECK-NEXT:    mov z0.h, #0 // =0x0
; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
; CHECK-NEXT:    ret
;
; NONEON-NOSVE-LABEL: masked_store_v4f16:
; NONEON-NOSVE:       // %bb.0:
; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #2]
; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #4]
; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #6]
; NONEON-NOSVE-NEXT:    ldrh w11, [sp]
; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w10, w10, #0, #1
; NONEON-NOSVE-NEXT:    and w8, w8, #0x2
; NONEON-NOSVE-NEXT:    and w9, w9, #0x4
; NONEON-NOSVE-NEXT:    and w10, w10, #0x8
; NONEON-NOSVE-NEXT:    bfxil w8, w11, #0, #1
; NONEON-NOSVE-NEXT:    orr w9, w9, w10
; NONEON-NOSVE-NEXT:    orr w8, w8, w9
; NONEON-NOSVE-NEXT:    tbnz w8, #0, .LBB5_5
; NONEON-NOSVE-NEXT:  // %bb.1: // %else
; NONEON-NOSVE-NEXT:    tbnz w8, #1, .LBB5_6
; NONEON-NOSVE-NEXT:  .LBB5_2: // %else2
; NONEON-NOSVE-NEXT:    tbnz w8, #2, .LBB5_7
; NONEON-NOSVE-NEXT:  .LBB5_3: // %else4
; NONEON-NOSVE-NEXT:    tbnz w8, #3, .LBB5_8
; NONEON-NOSVE-NEXT:  .LBB5_4: // %else6
; NONEON-NOSVE-NEXT:    add sp, sp, #16
; NONEON-NOSVE-NEXT:    ret
; NONEON-NOSVE-NEXT:  .LBB5_5: // %cond.store
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0]
; NONEON-NOSVE-NEXT:    tbz w8, #1, .LBB5_2
; NONEON-NOSVE-NEXT:  .LBB5_6: // %cond.store1
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0, #2]
; NONEON-NOSVE-NEXT:    tbz w8, #2, .LBB5_3
; NONEON-NOSVE-NEXT:  .LBB5_7: // %cond.store3
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0, #4]
; NONEON-NOSVE-NEXT:    tbz w8, #3, .LBB5_4
; NONEON-NOSVE-NEXT:  .LBB5_8: // %cond.store5
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0, #6]
; NONEON-NOSVE-NEXT:    add sp, sp, #16
; NONEON-NOSVE-NEXT:    ret
  call void @llvm.masked.store.v4f16(<4 x half> zeroinitializer, ptr %dst, i32 8, <4 x i1> %mask)
  ret void
}

define void @masked_store_v8f16(ptr %dst, <8 x i1> %mask) {
; CHECK-LABEL: masked_store_v8f16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT:    ptrue p0.h, vl8
; CHECK-NEXT:    uunpklo z0.h, z0.b
; CHECK-NEXT:    lsl z0.h, z0.h, #15
; CHECK-NEXT:    asr z0.h, z0.h, #15
; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
; CHECK-NEXT:    mov z0.h, #0 // =0x0
; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
; CHECK-NEXT:    ret
;
; NONEON-NOSVE-LABEL: masked_store_v8f16:
; NONEON-NOSVE:       // %bb.0:
; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #2]
; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #3]
; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #1]
; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #4]
; NONEON-NOSVE-NEXT:    ldrb w13, [sp, #5]
; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #6]
; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w10, w10, #0, #1
; NONEON-NOSVE-NEXT:    ldrb w11, [sp]
; NONEON-NOSVE-NEXT:    sbfx w12, w12, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w13, w13, #0, #1
; NONEON-NOSVE-NEXT:    and w8, w8, #0x4
; NONEON-NOSVE-NEXT:    and w9, w9, #0x8
; NONEON-NOSVE-NEXT:    sbfx w14, w14, #0, #1
; NONEON-NOSVE-NEXT:    orr w8, w8, w9
; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #7]
; NONEON-NOSVE-NEXT:    and w10, w10, #0x2
; NONEON-NOSVE-NEXT:    and w12, w12, #0x10
; NONEON-NOSVE-NEXT:    bfxil w10, w11, #0, #1
; NONEON-NOSVE-NEXT:    and w11, w13, #0x20
; NONEON-NOSVE-NEXT:    orr w8, w8, w12
; NONEON-NOSVE-NEXT:    and w12, w14, #0x40
; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
; NONEON-NOSVE-NEXT:    orr w8, w10, w8
; NONEON-NOSVE-NEXT:    orr w10, w11, w12
; NONEON-NOSVE-NEXT:    orr w8, w8, w10
; NONEON-NOSVE-NEXT:    and w9, w9, #0x80
; NONEON-NOSVE-NEXT:    add w9, w8, w9
; NONEON-NOSVE-NEXT:    and w8, w9, #0xff
; NONEON-NOSVE-NEXT:    tbnz w9, #0, .LBB6_9
; NONEON-NOSVE-NEXT:  // %bb.1: // %else
; NONEON-NOSVE-NEXT:    tbnz w8, #1, .LBB6_10
; NONEON-NOSVE-NEXT:  .LBB6_2: // %else2
; NONEON-NOSVE-NEXT:    tbnz w8, #2, .LBB6_11
; NONEON-NOSVE-NEXT:  .LBB6_3: // %else4
; NONEON-NOSVE-NEXT:    tbnz w8, #3, .LBB6_12
; NONEON-NOSVE-NEXT:  .LBB6_4: // %else6
; NONEON-NOSVE-NEXT:    tbnz w8, #4, .LBB6_13
; NONEON-NOSVE-NEXT:  .LBB6_5: // %else8
; NONEON-NOSVE-NEXT:    tbnz w8, #5, .LBB6_14
; NONEON-NOSVE-NEXT:  .LBB6_6: // %else10
; NONEON-NOSVE-NEXT:    tbnz w8, #6, .LBB6_15
; NONEON-NOSVE-NEXT:  .LBB6_7: // %else12
; NONEON-NOSVE-NEXT:    tbnz w8, #7, .LBB6_16
; NONEON-NOSVE-NEXT:  .LBB6_8: // %else14
; NONEON-NOSVE-NEXT:    add sp, sp, #16
; NONEON-NOSVE-NEXT:    ret
; NONEON-NOSVE-NEXT:  .LBB6_9: // %cond.store
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0]
; NONEON-NOSVE-NEXT:    tbz w8, #1, .LBB6_2
; NONEON-NOSVE-NEXT:  .LBB6_10: // %cond.store1
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0, #2]
; NONEON-NOSVE-NEXT:    tbz w8, #2, .LBB6_3
; NONEON-NOSVE-NEXT:  .LBB6_11: // %cond.store3
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0, #4]
; NONEON-NOSVE-NEXT:    tbz w8, #3, .LBB6_4
; NONEON-NOSVE-NEXT:  .LBB6_12: // %cond.store5
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0, #6]
; NONEON-NOSVE-NEXT:    tbz w8, #4, .LBB6_5
; NONEON-NOSVE-NEXT:  .LBB6_13: // %cond.store7
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0, #8]
; NONEON-NOSVE-NEXT:    tbz w8, #5, .LBB6_6
; NONEON-NOSVE-NEXT:  .LBB6_14: // %cond.store9
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0, #10]
; NONEON-NOSVE-NEXT:    tbz w8, #6, .LBB6_7
; NONEON-NOSVE-NEXT:  .LBB6_15: // %cond.store11
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0, #12]
; NONEON-NOSVE-NEXT:    tbz w8, #7, .LBB6_8
; NONEON-NOSVE-NEXT:  .LBB6_16: // %cond.store13
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0, #14]
; NONEON-NOSVE-NEXT:    add sp, sp, #16
; NONEON-NOSVE-NEXT:    ret
  call void @llvm.masked.store.v8f16(<8 x half> zeroinitializer, ptr %dst, i32 8, <8 x i1> %mask)
  ret void
}

define void @masked_store_v16f16(ptr %dst, <16 x i1> %mask) {
; CHECK-LABEL: masked_store_v16f16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT:    uunpklo z1.h, z0.b
; CHECK-NEXT:    ptrue p0.h, vl8
; CHECK-NEXT:    mov x8, #8 // =0x8
; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
; CHECK-NEXT:    uunpklo z0.h, z0.b
; CHECK-NEXT:    lsl z1.h, z1.h, #15
; CHECK-NEXT:    asr z1.h, z1.h, #15
; CHECK-NEXT:    lsl z0.h, z0.h, #15
; CHECK-NEXT:    asr z0.h, z0.h, #15
; CHECK-NEXT:    cmpne p1.h, p0/z, z0.h, #0
; CHECK-NEXT:    cmpne p0.h, p0/z, z1.h, #0
; CHECK-NEXT:    mov z0.h, #0 // =0x0
; CHECK-NEXT:    st1h { z0.h }, p1, [x0, x8, lsl #1]
; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
; CHECK-NEXT:    ret
;
; NONEON-NOSVE-LABEL: masked_store_v16f16:
; NONEON-NOSVE:       // %bb.0:
; NONEON-NOSVE-NEXT:    str q0, [sp, #-16]!
; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #2]
; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #3]
; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #1]
; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #4]
; NONEON-NOSVE-NEXT:    ldrb w13, [sp, #5]
; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #6]
; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w10, w10, #0, #1
; NONEON-NOSVE-NEXT:    ldrb w11, [sp]
; NONEON-NOSVE-NEXT:    sbfx w12, w12, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w13, w13, #0, #1
; NONEON-NOSVE-NEXT:    and w8, w8, #0x4
; NONEON-NOSVE-NEXT:    and w9, w9, #0x8
; NONEON-NOSVE-NEXT:    sbfx w14, w14, #0, #1
; NONEON-NOSVE-NEXT:    orr w8, w8, w9
; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #7]
; NONEON-NOSVE-NEXT:    and w10, w10, #0x2
; NONEON-NOSVE-NEXT:    and w12, w12, #0x10
; NONEON-NOSVE-NEXT:    bfxil w10, w11, #0, #1
; NONEON-NOSVE-NEXT:    and w11, w13, #0x20
; NONEON-NOSVE-NEXT:    orr w8, w8, w12
; NONEON-NOSVE-NEXT:    and w12, w14, #0x40
; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
; NONEON-NOSVE-NEXT:    orr w8, w10, w8
; NONEON-NOSVE-NEXT:    orr w10, w11, w12
; NONEON-NOSVE-NEXT:    orr w8, w8, w10
; NONEON-NOSVE-NEXT:    and w9, w9, #0xffffff80
; NONEON-NOSVE-NEXT:    add w8, w8, w9
; NONEON-NOSVE-NEXT:    tbnz w8, #0, .LBB7_17
; NONEON-NOSVE-NEXT:  // %bb.1: // %else
; NONEON-NOSVE-NEXT:    tbnz w8, #1, .LBB7_18
; NONEON-NOSVE-NEXT:  .LBB7_2: // %else2
; NONEON-NOSVE-NEXT:    tbnz w8, #2, .LBB7_19
; NONEON-NOSVE-NEXT:  .LBB7_3: // %else4
; NONEON-NOSVE-NEXT:    tbnz w8, #3, .LBB7_20
; NONEON-NOSVE-NEXT:  .LBB7_4: // %else6
; NONEON-NOSVE-NEXT:    tbnz w8, #4, .LBB7_21
; NONEON-NOSVE-NEXT:  .LBB7_5: // %else8
; NONEON-NOSVE-NEXT:    tbnz w8, #5, .LBB7_22
; NONEON-NOSVE-NEXT:  .LBB7_6: // %else10
; NONEON-NOSVE-NEXT:    tbnz w8, #6, .LBB7_23
; NONEON-NOSVE-NEXT:  .LBB7_7: // %else12
; NONEON-NOSVE-NEXT:    tbnz w8, #7, .LBB7_24
; NONEON-NOSVE-NEXT:  .LBB7_8: // %else14
; NONEON-NOSVE-NEXT:    tbnz w8, #8, .LBB7_25
; NONEON-NOSVE-NEXT:  .LBB7_9: // %else16
; NONEON-NOSVE-NEXT:    tbnz w8, #9, .LBB7_26
; NONEON-NOSVE-NEXT:  .LBB7_10: // %else18
; NONEON-NOSVE-NEXT:    tbnz w8, #10, .LBB7_27
; NONEON-NOSVE-NEXT:  .LBB7_11: // %else20
; NONEON-NOSVE-NEXT:    tbnz w8, #11, .LBB7_28
; NONEON-NOSVE-NEXT:  .LBB7_12: // %else22
; NONEON-NOSVE-NEXT:    tbnz w8, #12, .LBB7_29
; NONEON-NOSVE-NEXT:  .LBB7_13: // %else24
; NONEON-NOSVE-NEXT:    tbnz w8, #13, .LBB7_30
; NONEON-NOSVE-NEXT:  .LBB7_14: // %else26
; NONEON-NOSVE-NEXT:    tbnz w8, #14, .LBB7_31
; NONEON-NOSVE-NEXT:  .LBB7_15: // %else28
; NONEON-NOSVE-NEXT:    tbnz w8, #15, .LBB7_32
; NONEON-NOSVE-NEXT:  .LBB7_16: // %else30
; NONEON-NOSVE-NEXT:    add sp, sp, #16
; NONEON-NOSVE-NEXT:    ret
; NONEON-NOSVE-NEXT:  .LBB7_17: // %cond.store
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0]
; NONEON-NOSVE-NEXT:    tbz w8, #1, .LBB7_2
; NONEON-NOSVE-NEXT:  .LBB7_18: // %cond.store1
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0, #2]
; NONEON-NOSVE-NEXT:    tbz w8, #2, .LBB7_3
; NONEON-NOSVE-NEXT:  .LBB7_19: // %cond.store3
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0, #4]
; NONEON-NOSVE-NEXT:    tbz w8, #3, .LBB7_4
; NONEON-NOSVE-NEXT:  .LBB7_20: // %cond.store5
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0, #6]
; NONEON-NOSVE-NEXT:    tbz w8, #4, .LBB7_5
; NONEON-NOSVE-NEXT:  .LBB7_21: // %cond.store7
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0, #8]
; NONEON-NOSVE-NEXT:    tbz w8, #5, .LBB7_6
; NONEON-NOSVE-NEXT:  .LBB7_22: // %cond.store9
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0, #10]
; NONEON-NOSVE-NEXT:    tbz w8, #6, .LBB7_7
; NONEON-NOSVE-NEXT:  .LBB7_23: // %cond.store11
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0, #12]
; NONEON-NOSVE-NEXT:    tbz w8, #7, .LBB7_8
; NONEON-NOSVE-NEXT:  .LBB7_24: // %cond.store13
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0, #14]
; NONEON-NOSVE-NEXT:    tbz w8, #8, .LBB7_9
; NONEON-NOSVE-NEXT:  .LBB7_25: // %cond.store15
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0, #16]
; NONEON-NOSVE-NEXT:    tbz w8, #9, .LBB7_10
; NONEON-NOSVE-NEXT:  .LBB7_26: // %cond.store17
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0, #18]
; NONEON-NOSVE-NEXT:    tbz w8, #10, .LBB7_11
; NONEON-NOSVE-NEXT:  .LBB7_27: // %cond.store19
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0, #20]
; NONEON-NOSVE-NEXT:    tbz w8, #11, .LBB7_12
; NONEON-NOSVE-NEXT:  .LBB7_28: // %cond.store21
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0, #22]
; NONEON-NOSVE-NEXT:    tbz w8, #12, .LBB7_13
; NONEON-NOSVE-NEXT:  .LBB7_29: // %cond.store23
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0, #24]
; NONEON-NOSVE-NEXT:    tbz w8, #13, .LBB7_14
; NONEON-NOSVE-NEXT:  .LBB7_30: // %cond.store25
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0, #26]
; NONEON-NOSVE-NEXT:    tbz w8, #14, .LBB7_15
; NONEON-NOSVE-NEXT:  .LBB7_31: // %cond.store27
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0, #28]
; NONEON-NOSVE-NEXT:    tbz w8, #15, .LBB7_16
; NONEON-NOSVE-NEXT:  .LBB7_32: // %cond.store29
; NONEON-NOSVE-NEXT:    fmov s0, wzr
; NONEON-NOSVE-NEXT:    str h0, [x0, #30]
; NONEON-NOSVE-NEXT:    add sp, sp, #16
; NONEON-NOSVE-NEXT:    ret
  call void @llvm.masked.store.v16f16(<16 x half> zeroinitializer, ptr %dst, i32 8, <16 x i1> %mask)
  ret void
}

define void @masked_store_v4f32(ptr %dst, <4 x i1> %mask) {
; CHECK-LABEL: masked_store_v4f32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT:    ptrue p0.s, vl4
; CHECK-NEXT:    uunpklo z0.s, z0.h
; CHECK-NEXT:    lsl z0.s, z0.s, #31
; CHECK-NEXT:    asr z0.s, z0.s, #31
; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #0
; CHECK-NEXT:    mov z0.s, #0 // =0x0
; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
; CHECK-NEXT:    ret
;
; NONEON-NOSVE-LABEL: masked_store_v4f32:
; NONEON-NOSVE:       // %bb.0:
; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #2]
; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #4]
; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #6]
; NONEON-NOSVE-NEXT:    ldrh w11, [sp]
; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w10, w10, #0, #1
; NONEON-NOSVE-NEXT:    and w8, w8, #0x2
; NONEON-NOSVE-NEXT:    and w9, w9, #0x4
; NONEON-NOSVE-NEXT:    and w10, w10, #0x8
; NONEON-NOSVE-NEXT:    bfxil w8, w11, #0, #1
; NONEON-NOSVE-NEXT:    orr w9, w9, w10
; NONEON-NOSVE-NEXT:    orr w8, w8, w9
; NONEON-NOSVE-NEXT:    tbnz w8, #0, .LBB8_5
; NONEON-NOSVE-NEXT:  // %bb.1: // %else
; NONEON-NOSVE-NEXT:    tbnz w8, #1, .LBB8_6
; NONEON-NOSVE-NEXT:  .LBB8_2: // %else2
; NONEON-NOSVE-NEXT:    tbnz w8, #2, .LBB8_7
; NONEON-NOSVE-NEXT:  .LBB8_3: // %else4
; NONEON-NOSVE-NEXT:    tbnz w8, #3, .LBB8_8
; NONEON-NOSVE-NEXT:  .LBB8_4: // %else6
; NONEON-NOSVE-NEXT:    add sp, sp, #16
; NONEON-NOSVE-NEXT:    ret
; NONEON-NOSVE-NEXT:  .LBB8_5: // %cond.store
; NONEON-NOSVE-NEXT:    str wzr, [x0]
; NONEON-NOSVE-NEXT:    tbz w8, #1, .LBB8_2
; NONEON-NOSVE-NEXT:  .LBB8_6: // %cond.store1
; NONEON-NOSVE-NEXT:    str wzr, [x0, #4]
; NONEON-NOSVE-NEXT:    tbz w8, #2, .LBB8_3
; NONEON-NOSVE-NEXT:  .LBB8_7: // %cond.store3
; NONEON-NOSVE-NEXT:    str wzr, [x0, #8]
; NONEON-NOSVE-NEXT:    tbz w8, #3, .LBB8_4
; NONEON-NOSVE-NEXT:  .LBB8_8: // %cond.store5
; NONEON-NOSVE-NEXT:    str wzr, [x0, #12]
; NONEON-NOSVE-NEXT:    add sp, sp, #16
; NONEON-NOSVE-NEXT:    ret
  call void @llvm.masked.store.v4f32(<4 x float> zeroinitializer, ptr %dst, i32 8, <4 x i1> %mask)
  ret void
}

define void @masked_store_v8f32(ptr %dst, <8 x i1> %mask) {
; CHECK-LABEL: masked_store_v8f32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sub sp, sp, #16
; CHECK-NEXT:    .cfi_def_cfa_offset 16
; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT:    mov z1.b, z0.b[7]
; CHECK-NEXT:    mov z2.b, z0.b[6]
; CHECK-NEXT:    mov z3.b, z0.b[5]
; CHECK-NEXT:    mov z4.b, z0.b[4]
; CHECK-NEXT:    ptrue p0.s, vl4
; CHECK-NEXT:    fmov w8, s1
; CHECK-NEXT:    fmov w9, s2
; CHECK-NEXT:    mov z2.b, z0.b[3]
; CHECK-NEXT:    strh w8, [sp, #14]
; CHECK-NEXT:    fmov w8, s3
; CHECK-NEXT:    mov z3.b, z0.b[2]
; CHECK-NEXT:    strh w9, [sp, #12]
; CHECK-NEXT:    fmov w9, s4
; CHECK-NEXT:    mov z4.b, z0.b[1]
; CHECK-NEXT:    strh w8, [sp, #10]
; CHECK-NEXT:    mov x8, #4 // =0x4
; CHECK-NEXT:    strh w9, [sp, #8]
; CHECK-NEXT:    fmov w9, s0
; CHECK-NEXT:    ldr d1, [sp, #8]
; CHECK-NEXT:    uunpklo z1.s, z1.h
; CHECK-NEXT:    lsl z1.s, z1.s, #31
; CHECK-NEXT:    asr z1.s, z1.s, #31
; CHECK-NEXT:    cmpne p1.s, p0/z, z1.s, #0
; CHECK-NEXT:    mov z1.s, #0 // =0x0
; CHECK-NEXT:    st1w { z1.s }, p1, [x0, x8, lsl #2]
; CHECK-NEXT:    fmov w8, s2
; CHECK-NEXT:    strh w9, [sp]
; CHECK-NEXT:    strh w8, [sp, #6]
; CHECK-NEXT:    fmov w8, s3
; CHECK-NEXT:    strh w8, [sp, #4]
; CHECK-NEXT:    fmov w8, s4
; CHECK-NEXT:    strh w8, [sp, #2]
; CHECK-NEXT:    ldr d0, [sp]
; CHECK-NEXT:    uunpklo z0.s, z0.h
; CHECK-NEXT:    lsl z0.s, z0.s, #31
; CHECK-NEXT:    asr z0.s, z0.s, #31
; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #0
; CHECK-NEXT:    st1w { z1.s }, p0, [x0]
; CHECK-NEXT:    add sp, sp, #16
; CHECK-NEXT:    ret
;
; NONEON-NOSVE-LABEL: masked_store_v8f32:
; NONEON-NOSVE:       // %bb.0:
; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #2]
; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #3]
; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #1]
; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #4]
; NONEON-NOSVE-NEXT:    ldrb w13, [sp, #5]
; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #6]
; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w10, w10, #0, #1
; NONEON-NOSVE-NEXT:    ldrb w11, [sp]
; NONEON-NOSVE-NEXT:    sbfx w12, w12, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w13, w13, #0, #1
; NONEON-NOSVE-NEXT:    and w8, w8, #0x4
; NONEON-NOSVE-NEXT:    and w9, w9, #0x8
; NONEON-NOSVE-NEXT:    sbfx w14, w14, #0, #1
; NONEON-NOSVE-NEXT:    orr w8, w8, w9
; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #7]
; NONEON-NOSVE-NEXT:    and w10, w10, #0x2
; NONEON-NOSVE-NEXT:    and w12, w12, #0x10
; NONEON-NOSVE-NEXT:    bfxil w10, w11, #0, #1
; NONEON-NOSVE-NEXT:    and w11, w13, #0x20
; NONEON-NOSVE-NEXT:    orr w8, w8, w12
; NONEON-NOSVE-NEXT:    and w12, w14, #0x40
; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
; NONEON-NOSVE-NEXT:    orr w8, w10, w8
; NONEON-NOSVE-NEXT:    orr w10, w11, w12
; NONEON-NOSVE-NEXT:    orr w8, w8, w10
; NONEON-NOSVE-NEXT:    and w9, w9, #0x80
; NONEON-NOSVE-NEXT:    add w9, w8, w9
; NONEON-NOSVE-NEXT:    and w8, w9, #0xff
; NONEON-NOSVE-NEXT:    tbnz w9, #0, .LBB9_9
; NONEON-NOSVE-NEXT:  // %bb.1: // %else
; NONEON-NOSVE-NEXT:    tbnz w8, #1, .LBB9_10
; NONEON-NOSVE-NEXT:  .LBB9_2: // %else2
; NONEON-NOSVE-NEXT:    tbnz w8, #2, .LBB9_11
; NONEON-NOSVE-NEXT:  .LBB9_3: // %else4
; NONEON-NOSVE-NEXT:    tbnz w8, #3, .LBB9_12
; NONEON-NOSVE-NEXT:  .LBB9_4: // %else6
; NONEON-NOSVE-NEXT:    tbnz w8, #4, .LBB9_13
; NONEON-NOSVE-NEXT:  .LBB9_5: // %else8
; NONEON-NOSVE-NEXT:    tbnz w8, #5, .LBB9_14
; NONEON-NOSVE-NEXT:  .LBB9_6: // %else10
; NONEON-NOSVE-NEXT:    tbnz w8, #6, .LBB9_15
; NONEON-NOSVE-NEXT:  .LBB9_7: // %else12
; NONEON-NOSVE-NEXT:    tbnz w8, #7, .LBB9_16
; NONEON-NOSVE-NEXT:  .LBB9_8: // %else14
; NONEON-NOSVE-NEXT:    add sp, sp, #16
; NONEON-NOSVE-NEXT:    ret
; NONEON-NOSVE-NEXT:  .LBB9_9: // %cond.store
; NONEON-NOSVE-NEXT:    str wzr, [x0]
; NONEON-NOSVE-NEXT:    tbz w8, #1, .LBB9_2
; NONEON-NOSVE-NEXT:  .LBB9_10: // %cond.store1
; NONEON-NOSVE-NEXT:    str wzr, [x0, #4]
; NONEON-NOSVE-NEXT:    tbz w8, #2, .LBB9_3
; NONEON-NOSVE-NEXT:  .LBB9_11: // %cond.store3
; NONEON-NOSVE-NEXT:    str wzr, [x0, #8]
; NONEON-NOSVE-NEXT:    tbz w8, #3, .LBB9_4
; NONEON-NOSVE-NEXT:  .LBB9_12: // %cond.store5
; NONEON-NOSVE-NEXT:    str wzr, [x0, #12]
; NONEON-NOSVE-NEXT:    tbz w8, #4, .LBB9_5
; NONEON-NOSVE-NEXT:  .LBB9_13: // %cond.store7
; NONEON-NOSVE-NEXT:    str wzr, [x0, #16]
; NONEON-NOSVE-NEXT:    tbz w8, #5, .LBB9_6
; NONEON-NOSVE-NEXT:  .LBB9_14: // %cond.store9
; NONEON-NOSVE-NEXT:    str wzr, [x0, #20]
; NONEON-NOSVE-NEXT:    tbz w8, #6, .LBB9_7
; NONEON-NOSVE-NEXT:  .LBB9_15: // %cond.store11
; NONEON-NOSVE-NEXT:    str wzr, [x0, #24]
; NONEON-NOSVE-NEXT:    tbz w8, #7, .LBB9_8
; NONEON-NOSVE-NEXT:  .LBB9_16: // %cond.store13
; NONEON-NOSVE-NEXT:    str wzr, [x0, #28]
; NONEON-NOSVE-NEXT:    add sp, sp, #16
; NONEON-NOSVE-NEXT:    ret
  call void @llvm.masked.store.v8f32(<8 x float> zeroinitializer, ptr %dst, i32 8, <8 x i1> %mask)
  ret void
}

define void @masked_store_v2f64(ptr %dst, <2 x i1> %mask) {
; CHECK-LABEL: masked_store_v2f64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT:    ptrue p0.d, vl2
; CHECK-NEXT:    uunpklo z0.d, z0.s
; CHECK-NEXT:    lsl z0.d, z0.d, #63
; CHECK-NEXT:    asr z0.d, z0.d, #63
; CHECK-NEXT:    cmpne p0.d, p0/z, z0.d, #0
; CHECK-NEXT:    mov z0.d, #0 // =0x0
; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
; CHECK-NEXT:    ret
;
; NONEON-NOSVE-LABEL: masked_store_v2f64:
; NONEON-NOSVE:       // %bb.0:
; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT:    ldr w8, [sp, #4]
; NONEON-NOSVE-NEXT:    ldrb w9, [sp]
; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
; NONEON-NOSVE-NEXT:    and w8, w8, #0x2
; NONEON-NOSVE-NEXT:    bfxil w8, w9, #0, #1
; NONEON-NOSVE-NEXT:    tbnz w8, #0, .LBB10_3
; NONEON-NOSVE-NEXT:  // %bb.1: // %else
; NONEON-NOSVE-NEXT:    tbnz w8, #1, .LBB10_4
; NONEON-NOSVE-NEXT:  .LBB10_2: // %else2
; NONEON-NOSVE-NEXT:    add sp, sp, #16
; NONEON-NOSVE-NEXT:    ret
; NONEON-NOSVE-NEXT:  .LBB10_3: // %cond.store
; NONEON-NOSVE-NEXT:    str xzr, [x0]
; NONEON-NOSVE-NEXT:    tbz w8, #1, .LBB10_2
; NONEON-NOSVE-NEXT:  .LBB10_4: // %cond.store1
; NONEON-NOSVE-NEXT:    str xzr, [x0, #8]
; NONEON-NOSVE-NEXT:    add sp, sp, #16
; NONEON-NOSVE-NEXT:    ret
  call void @llvm.masked.store.v2f64(<2 x double> zeroinitializer, ptr %dst, i32 8, <2 x i1> %mask)
  ret void
}

define void @masked_store_v4f64(ptr %dst, <4 x i1> %mask) {
; CHECK-LABEL: masked_store_v4f64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT:    ptrue p0.d, vl2
; CHECK-NEXT:    mov x8, #2 // =0x2
; CHECK-NEXT:    uunpklo z0.s, z0.h
; CHECK-NEXT:    uunpklo z1.d, z0.s
; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
; CHECK-NEXT:    uunpklo z0.d, z0.s
; CHECK-NEXT:    lsl z1.d, z1.d, #63
; CHECK-NEXT:    lsl z0.d, z0.d, #63
; CHECK-NEXT:    asr z1.d, z1.d, #63
; CHECK-NEXT:    asr z0.d, z0.d, #63
; CHECK-NEXT:    cmpne p1.d, p0/z, z0.d, #0
; CHECK-NEXT:    cmpne p0.d, p0/z, z1.d, #0
; CHECK-NEXT:    mov z0.d, #0 // =0x0
; CHECK-NEXT:    st1d { z0.d }, p1, [x0, x8, lsl #3]
; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
; CHECK-NEXT:    ret
;
; NONEON-NOSVE-LABEL: masked_store_v4f64:
; NONEON-NOSVE:       // %bb.0:
; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #2]
; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #4]
; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #6]
; NONEON-NOSVE-NEXT:    ldrh w11, [sp]
; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
; NONEON-NOSVE-NEXT:    sbfx w10, w10, #0, #1
; NONEON-NOSVE-NEXT:    and w8, w8, #0x2
; NONEON-NOSVE-NEXT:    and w9, w9, #0x4
; NONEON-NOSVE-NEXT:    and w10, w10, #0x8
; NONEON-NOSVE-NEXT:    bfxil w8, w11, #0, #1
; NONEON-NOSVE-NEXT:    orr w9, w9, w10
; NONEON-NOSVE-NEXT:    orr w8, w8, w9
; NONEON-NOSVE-NEXT:    tbnz w8, #0, .LBB11_5
; NONEON-NOSVE-NEXT:  // %bb.1: // %else
; NONEON-NOSVE-NEXT:    tbnz w8, #1, .LBB11_6
; NONEON-NOSVE-NEXT:  .LBB11_2: // %else2
; NONEON-NOSVE-NEXT:    tbnz w8, #2, .LBB11_7
; NONEON-NOSVE-NEXT:  .LBB11_3: // %else4
; NONEON-NOSVE-NEXT:    tbnz w8, #3, .LBB11_8
; NONEON-NOSVE-NEXT:  .LBB11_4: // %else6
; NONEON-NOSVE-NEXT:    add sp, sp, #16
; NONEON-NOSVE-NEXT:    ret
; NONEON-NOSVE-NEXT:  .LBB11_5: // %cond.store
; NONEON-NOSVE-NEXT:    str xzr, [x0]
; NONEON-NOSVE-NEXT:    tbz w8, #1, .LBB11_2
; NONEON-NOSVE-NEXT:  .LBB11_6: // %cond.store1
; NONEON-NOSVE-NEXT:    str xzr, [x0, #8]
; NONEON-NOSVE-NEXT:    tbz w8, #2, .LBB11_3
; NONEON-NOSVE-NEXT:  .LBB11_7: // %cond.store3
; NONEON-NOSVE-NEXT:    str xzr, [x0, #16]
; NONEON-NOSVE-NEXT:    tbz w8, #3, .LBB11_4
; NONEON-NOSVE-NEXT:  .LBB11_8: // %cond.store5
; NONEON-NOSVE-NEXT:    str xzr, [x0, #24]
; NONEON-NOSVE-NEXT:    add sp, sp, #16
; NONEON-NOSVE-NEXT:    ret
  call void @llvm.masked.store.v4f64(<4 x double> zeroinitializer, ptr %dst, i32 8, <4 x i1> %mask)
  ret void
}

declare void @llvm.masked.store.v4i8(<4 x i8>, ptr, i32, <4 x i1>)
declare void @llvm.masked.store.v8i8(<8 x i8>, ptr, i32, <8 x i1>)
declare void @llvm.masked.store.v16i8(<16 x i8>, ptr, i32, <16 x i1>)
declare void @llvm.masked.store.v32i8(<32 x i8>, ptr, i32, <32 x i1>)
declare void @llvm.masked.store.v2f16(<2 x half>, ptr, i32, <2 x i1>)
declare void @llvm.masked.store.v4f16(<4 x half>, ptr, i32, <4 x i1>)
declare void @llvm.masked.store.v8f16(<8 x half>, ptr, i32, <8 x i1>)
declare void @llvm.masked.store.v16f16(<16 x half>, ptr, i32, <16 x i1>)
declare void @llvm.masked.store.v4f32(<4 x float>, ptr, i32, <4 x i1>)
declare void @llvm.masked.store.v8f32(<8 x float>, ptr, i32, <8 x i1>)
declare void @llvm.masked.store.v2f64(<2 x double>, ptr, i32, <2 x i1>)
declare void @llvm.masked.store.v4f64(<4 x double>, ptr, i32, <4 x i1>)