llvm/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE


target triple = "aarch64-unknown-linux-gnu"

define <2 x i64> @masked_gather_v2i64(ptr %a, ptr %b) vscale_range(2, 2) {
; CHECK-LABEL: masked_gather_v2i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sub sp, sp, #16
; CHECK-NEXT:    .cfi_def_cfa_offset 16
; CHECK-NEXT:    ptrue p0.d, vl2
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    cmpeq p1.d, p0/z, z0.d, #0
; CHECK-NEXT:    index z0.d, #1, #1
; CHECK-NEXT:    mov z1.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT:    and z0.d, z1.d, z0.d
; CHECK-NEXT:    ldr q1, [x1]
; CHECK-NEXT:    uaddv d0, p0, z0.d
; CHECK-NEXT:    ptrue p0.d
; CHECK-NEXT:    fmov x8, d0
; CHECK-NEXT:    strb w8, [sp, #12]
; CHECK-NEXT:    and w8, w8, #0xff
; CHECK-NEXT:    tbz w8, #0, .LBB0_2
; CHECK-NEXT:  // %bb.1: // %cond.load
; CHECK-NEXT:    fmov x9, d1
; CHECK-NEXT:    ld1rd { z0.d }, p0/z, [x9]
; CHECK-NEXT:    tbnz w8, #1, .LBB0_3
; CHECK-NEXT:    b .LBB0_4
; CHECK-NEXT:  .LBB0_2:
; CHECK-NEXT:    adrp x9, .LCPI0_0
; CHECK-NEXT:    ldr q0, [x9, :lo12:.LCPI0_0]
; CHECK-NEXT:    tbz w8, #1, .LBB0_4
; CHECK-NEXT:  .LBB0_3: // %cond.load1
; CHECK-NEXT:    mov w8, #1 // =0x1
; CHECK-NEXT:    index z2.d, #0, #1
; CHECK-NEXT:    mov z1.d, z1.d[1]
; CHECK-NEXT:    mov z3.d, x8
; CHECK-NEXT:    fmov x8, d1
; CHECK-NEXT:    cmpeq p0.d, p0/z, z2.d, z3.d
; CHECK-NEXT:    ldr x8, [x8]
; CHECK-NEXT:    mov z0.d, p0/m, x8
; CHECK-NEXT:  .LBB0_4: // %else2
; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT:    add sp, sp, #16
; CHECK-NEXT:    ret
;
; NONEON-NOSVE-LABEL: masked_gather_v2i64:
; NONEON-NOSVE:       // %bb.0:
; NONEON-NOSVE-NEXT:    sub sp, sp, #144
; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 144
; NONEON-NOSVE-NEXT:    ldr q0, [x0]
; NONEON-NOSVE-NEXT:    ldr q1, [x1]
; NONEON-NOSVE-NEXT:    mov w8, #2 // =0x2
; NONEON-NOSVE-NEXT:    str q0, [sp, #112]
; NONEON-NOSVE-NEXT:    ldp x10, x9, [sp, #112]
; NONEON-NOSVE-NEXT:    cmp x9, #0
; NONEON-NOSVE-NEXT:    csel x8, x8, xzr, eq
; NONEON-NOSVE-NEXT:    cmp x10, #0
; NONEON-NOSVE-NEXT:    csetm x9, eq
; NONEON-NOSVE-NEXT:    sub w8, w8, w9
; NONEON-NOSVE-NEXT:    strb w8, [sp, #140]
; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
; NONEON-NOSVE-NEXT:    tbz w8, #0, .LBB0_2
; NONEON-NOSVE-NEXT:  // %bb.1: // %cond.load
; NONEON-NOSVE-NEXT:    str q1, [sp, #96]
; NONEON-NOSVE-NEXT:    ldr x9, [sp, #96]
; NONEON-NOSVE-NEXT:    ldr x9, [x9]
; NONEON-NOSVE-NEXT:    str x9, [sp, #80]
; NONEON-NOSVE-NEXT:    ldr q0, [sp, #80]
; NONEON-NOSVE-NEXT:    tbnz w8, #1, .LBB0_3
; NONEON-NOSVE-NEXT:    b .LBB0_4
; NONEON-NOSVE-NEXT:  .LBB0_2:
; NONEON-NOSVE-NEXT:    adrp x9, .LCPI0_0
; NONEON-NOSVE-NEXT:    ldr q0, [x9, :lo12:.LCPI0_0]
; NONEON-NOSVE-NEXT:    tbz w8, #1, .LBB0_4
; NONEON-NOSVE-NEXT:  .LBB0_3: // %cond.load1
; NONEON-NOSVE-NEXT:    str q1, [sp, #64]
; NONEON-NOSVE-NEXT:    ldr x8, [sp, #72]
; NONEON-NOSVE-NEXT:    ldr x8, [x8]
; NONEON-NOSVE-NEXT:    str q0, [sp]
; NONEON-NOSVE-NEXT:    ldr x9, [sp]
; NONEON-NOSVE-NEXT:    str x8, [sp, #48]
; NONEON-NOSVE-NEXT:    ldr q0, [sp, #48]
; NONEON-NOSVE-NEXT:    str q0, [sp, #16]
; NONEON-NOSVE-NEXT:    ldr x8, [sp, #16]
; NONEON-NOSVE-NEXT:    stp x9, x8, [sp, #32]
; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
; NONEON-NOSVE-NEXT:  .LBB0_4: // %else2
; NONEON-NOSVE-NEXT:    add sp, sp, #144
; NONEON-NOSVE-NEXT:    ret
  %vals = load <2 x i64>, ptr %a
  %ptrs = load <2 x ptr>, ptr %b
  %mask = icmp eq <2 x i64> %vals, zeroinitializer
  %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x ptr> %ptrs, i32 8, <2 x i1> %mask, <2 x i64> poison)
  ret <2 x i64> %res
}

define void @masked_scatter_v2i64(ptr %a, ptr %b) vscale_range(2, 2) {
; CHECK-LABEL: masked_scatter_v2i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sub sp, sp, #16
; CHECK-NEXT:    .cfi_def_cfa_offset 16
; CHECK-NEXT:    ptrue p0.d, vl2
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    index z1.d, #1, #1
; CHECK-NEXT:    cmpeq p1.d, p0/z, z0.d, #0
; CHECK-NEXT:    mov z2.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT:    and z1.d, z2.d, z1.d
; CHECK-NEXT:    uaddv d1, p0, z1.d
; CHECK-NEXT:    fmov x8, d1
; CHECK-NEXT:    ldr q1, [x1]
; CHECK-NEXT:    strb w8, [sp, #12]
; CHECK-NEXT:    and w8, w8, #0xff
; CHECK-NEXT:    tbnz w8, #0, .LBB1_3
; CHECK-NEXT:  // %bb.1: // %else
; CHECK-NEXT:    tbnz w8, #1, .LBB1_4
; CHECK-NEXT:  .LBB1_2: // %else2
; CHECK-NEXT:    add sp, sp, #16
; CHECK-NEXT:    ret
; CHECK-NEXT:  .LBB1_3: // %cond.store
; CHECK-NEXT:    fmov x9, d0
; CHECK-NEXT:    fmov x10, d1
; CHECK-NEXT:    str x9, [x10]
; CHECK-NEXT:    tbz w8, #1, .LBB1_2
; CHECK-NEXT:  .LBB1_4: // %cond.store1
; CHECK-NEXT:    mov z0.d, z0.d[1]
; CHECK-NEXT:    mov z1.d, z1.d[1]
; CHECK-NEXT:    fmov x8, d0
; CHECK-NEXT:    fmov x9, d1
; CHECK-NEXT:    str x8, [x9]
; CHECK-NEXT:    add sp, sp, #16
; CHECK-NEXT:    ret
;
; NONEON-NOSVE-LABEL: masked_scatter_v2i64:
; NONEON-NOSVE:       // %bb.0:
; NONEON-NOSVE-NEXT:    sub sp, sp, #96
; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT:    ldr q1, [x0]
; NONEON-NOSVE-NEXT:    ldr q0, [x1]
; NONEON-NOSVE-NEXT:    mov w8, #2 // =0x2
; NONEON-NOSVE-NEXT:    str q1, [sp, #64]
; NONEON-NOSVE-NEXT:    ldp x10, x9, [sp, #64]
; NONEON-NOSVE-NEXT:    cmp x9, #0
; NONEON-NOSVE-NEXT:    csel x8, x8, xzr, eq
; NONEON-NOSVE-NEXT:    cmp x10, #0
; NONEON-NOSVE-NEXT:    csetm x9, eq
; NONEON-NOSVE-NEXT:    sub w8, w8, w9
; NONEON-NOSVE-NEXT:    strb w8, [sp, #92]
; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
; NONEON-NOSVE-NEXT:    tbnz w8, #0, .LBB1_3
; NONEON-NOSVE-NEXT:  // %bb.1: // %else
; NONEON-NOSVE-NEXT:    tbnz w8, #1, .LBB1_4
; NONEON-NOSVE-NEXT:  .LBB1_2: // %else2
; NONEON-NOSVE-NEXT:    add sp, sp, #96
; NONEON-NOSVE-NEXT:    ret
; NONEON-NOSVE-NEXT:  .LBB1_3: // %cond.store
; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
; NONEON-NOSVE-NEXT:    ldr x9, [sp, #32]
; NONEON-NOSVE-NEXT:    ldr x10, [sp, #48]
; NONEON-NOSVE-NEXT:    str x9, [x10]
; NONEON-NOSVE-NEXT:    tbz w8, #1, .LBB1_2
; NONEON-NOSVE-NEXT:  .LBB1_4: // %cond.store1
; NONEON-NOSVE-NEXT:    stp q1, q0, [sp]
; NONEON-NOSVE-NEXT:    ldr x8, [sp, #8]
; NONEON-NOSVE-NEXT:    ldr x9, [sp, #24]
; NONEON-NOSVE-NEXT:    str x8, [x9]
; NONEON-NOSVE-NEXT:    add sp, sp, #96
; NONEON-NOSVE-NEXT:    ret
  %vals = load <2 x i64>, ptr %a
  %ptrs = load <2 x ptr>, ptr %b
  %mask = icmp eq <2 x i64> %vals, zeroinitializer
  call void @llvm.masked.scatter.v2i64(<2 x i64> %vals, <2 x ptr> %ptrs, i32 8, <2 x i1> %mask)
  ret void
}

declare void @llvm.masked.scatter.v2i64(<2 x i64>, <2 x ptr>, i32, <2 x i1>)
declare <2 x i64> @llvm.masked.gather.v2i64(<2 x ptr>, i32, <2 x i1>, <2 x i64>)