llvm/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+use-scalar-inc-vl < %s | FileCheck %s -check-prefix=USE_SCALAR_INC
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s -check-prefix=USE_SCALAR_INC
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s -check-prefix=USE_SCALAR_INC

;
; CNTB
;

define i64 @cntb() {
; CHECK-LABEL: cntb:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cntb x0, vl2
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: cntb:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    cntb x0, vl2
; USE_SCALAR_INC-NEXT:    ret
  %out = call i64 @llvm.aarch64.sve.cntb(i32 2)
  ret i64 %out
}

define i64 @cntb_mul3() {
; CHECK-LABEL: cntb_mul3:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cntb x0, vl6, mul #3
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: cntb_mul3:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    cntb x0, vl6, mul #3
; USE_SCALAR_INC-NEXT:    ret
  %cnt = call i64 @llvm.aarch64.sve.cntb(i32 6)
  %out = mul i64 %cnt, 3
  ret i64 %out
}

define i64 @cntb_mul4() {
; CHECK-LABEL: cntb_mul4:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cntb x0, vl8, mul #4
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: cntb_mul4:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    cntb x0, vl8, mul #4
; USE_SCALAR_INC-NEXT:    ret
  %cnt = call i64 @llvm.aarch64.sve.cntb(i32 8)
  %out = mul i64 %cnt, 4
  ret i64 %out
}

;
; CNTH
;

define i64 @cnth() {
; CHECK-LABEL: cnth:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cnth x0, vl3
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: cnth:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    cnth x0, vl3
; USE_SCALAR_INC-NEXT:    ret
  %out = call i64 @llvm.aarch64.sve.cnth(i32 3)
  ret i64 %out
}

define i64 @cnth_mul5() {
; CHECK-LABEL: cnth_mul5:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cnth x0, vl7, mul #5
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: cnth_mul5:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    cnth x0, vl7, mul #5
; USE_SCALAR_INC-NEXT:    ret
  %cnt = call i64 @llvm.aarch64.sve.cnth(i32 7)
  %out = mul i64 %cnt, 5
  ret i64 %out
}

define i64 @cnth_mul8() {
; CHECK-LABEL: cnth_mul8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cnth x0, vl5, mul #8
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: cnth_mul8:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    cnth x0, vl5, mul #8
; USE_SCALAR_INC-NEXT:    ret
  %cnt = call i64 @llvm.aarch64.sve.cnth(i32 5)
  %out = mul i64 %cnt, 8
  ret i64 %out
}

;
; CNTW
;

define i64 @cntw() {
; CHECK-LABEL: cntw:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cntw x0, vl4
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: cntw:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    cntw x0, vl4
; USE_SCALAR_INC-NEXT:    ret
  %out = call i64 @llvm.aarch64.sve.cntw(i32 4)
  ret i64 %out
}

define i64 @cntw_mul11() {
; CHECK-LABEL: cntw_mul11:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cntw x0, vl8, mul #11
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: cntw_mul11:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    cntw x0, vl8, mul #11
; USE_SCALAR_INC-NEXT:    ret
  %cnt = call i64 @llvm.aarch64.sve.cntw(i32 8)
  %out = mul i64 %cnt, 11
  ret i64 %out
}

define i64 @cntw_mul2() {
; CHECK-LABEL: cntw_mul2:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cntw x0, vl6, mul #2
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: cntw_mul2:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    cntw x0, vl6, mul #2
; USE_SCALAR_INC-NEXT:    ret
  %cnt = call i64 @llvm.aarch64.sve.cntw(i32 6)
  %out = mul i64 %cnt, 2
  ret i64 %out
}

;
; CNTD
;

define i64 @cntd() {
; CHECK-LABEL: cntd:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cntd x0, vl5
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: cntd:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    cntd x0, vl5
; USE_SCALAR_INC-NEXT:    ret
  %out = call i64 @llvm.aarch64.sve.cntd(i32 5)
  ret i64 %out
}

define i64 @cntd_mul15() {
; CHECK-LABEL: cntd_mul15:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cntd x0, vl16, mul #15
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: cntd_mul15:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    cntd x0, vl16, mul #15
; USE_SCALAR_INC-NEXT:    ret
  %cnt = call i64 @llvm.aarch64.sve.cntd(i32 9)
  %out = mul i64 %cnt, 15
  ret i64 %out
}

define i64 @cntd_mul16() {
; CHECK-LABEL: cntd_mul16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cntd x0, vl32, mul #16
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: cntd_mul16:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    cntd x0, vl32, mul #16
; USE_SCALAR_INC-NEXT:    ret
  %cnt = call i64 @llvm.aarch64.sve.cntd(i32 10)
  %out = mul i64 %cnt, 16
  ret i64 %out
}

;
; CNTP
;

define i64 @cntp_b8(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
; CHECK-LABEL: cntp_b8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cntp x0, p0, p1.b
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: cntp_b8:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    cntp x0, p0, p1.b
; USE_SCALAR_INC-NEXT:    ret
  %out = call i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1> %pg,
                                                 <vscale x 16 x i1> %a)
  ret i64 %out
}

define i64 @cntp_b16(<vscale x 8 x i1> %pg, <vscale x 8 x i1> %a) {
; CHECK-LABEL: cntp_b16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cntp x0, p0, p1.h
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: cntp_b16:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    cntp x0, p0, p1.h
; USE_SCALAR_INC-NEXT:    ret
  %out = call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %pg,
                                                <vscale x 8 x i1> %a)
  ret i64 %out
}

define i64 @cntp_b32(<vscale x 4 x i1> %pg, <vscale x 4 x i1> %a) {
; CHECK-LABEL: cntp_b32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cntp x0, p0, p1.s
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: cntp_b32:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    cntp x0, p0, p1.s
; USE_SCALAR_INC-NEXT:    ret
  %out = call i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1> %pg,
                                                <vscale x 4 x i1> %a)
  ret i64 %out
}

define i64 @cntp_b64(<vscale x 2 x i1> %pg, <vscale x 2 x i1> %a) {
; CHECK-LABEL: cntp_b64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cntp x0, p0, p1.d
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: cntp_b64:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    cntp x0, p0, p1.d
; USE_SCALAR_INC-NEXT:    ret
  %out = call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %pg,
                                                <vscale x 2 x i1> %a)
  ret i64 %out
}

;
; INCB
;

define i64 @incb(i64 %a) {
; CHECK-LABEL: incb:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cntb x8, vl5
; CHECK-NEXT:    add x0, x8, x0
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: incb:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    incb x0, vl5
; USE_SCALAR_INC-NEXT:    ret
  %cnt = call i64 @llvm.aarch64.sve.cntb(i32 5)
  %out = add i64 %cnt, %a
  ret i64 %out
}

define i64 @incb_mul(i64 %a) {
; CHECK-LABEL: incb_mul:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cntb x8, vl4
; CHECK-NEXT:    add x0, x0, x8, lsl #2
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: incb_mul:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    incb x0, vl4, mul #4
; USE_SCALAR_INC-NEXT:    ret
  %cnt = call i64 @llvm.aarch64.sve.cntb(i32 4)
  %mul = mul i64 %cnt, 4
  %out = add i64 %mul, %a
  ret i64 %out
}

;
; DECB
;

define i64 @decb(i64 %a) {
; CHECK-LABEL: decb:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cntb x8, vl6
; CHECK-NEXT:    sub x0, x0, x8
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: decb:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    decb x0, vl6
; USE_SCALAR_INC-NEXT:    ret
  %cnt = call i64 @llvm.aarch64.sve.cntb(i32 6)
  %out = sub i64 %a, %cnt
  ret i64 %out
}

define i64 @decb_mul(i64 %a) {
; CHECK-LABEL: decb_mul:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cntb x8, vl7
; CHECK-NEXT:    sub x0, x0, x8, lsl #3
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: decb_mul:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    decb x0, vl7, mul #8
; USE_SCALAR_INC-NEXT:    ret
  %cnt = call i64 @llvm.aarch64.sve.cntb(i32 7)
  %mul = mul i64 %cnt, 8
  %out = sub i64 %a, %mul
  ret i64 %out
}

;
; INCH
;

define i64 @inch(i64 %a) {
; CHECK-LABEL: inch:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cnth x8, vl4
; CHECK-NEXT:    add x0, x8, x0
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: inch:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    inch x0, vl4
; USE_SCALAR_INC-NEXT:    ret
  %cnt = call i64 @llvm.aarch64.sve.cnth(i32 4)
  %out = add i64 %cnt, %a
  ret i64 %out
}

define i64 @inch_mul(i64 %a) {
; CHECK-LABEL: inch_mul:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cnth x8, vl8, mul #5
; CHECK-NEXT:    add x0, x8, x0
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: inch_mul:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    inch x0, vl8, mul #5
; USE_SCALAR_INC-NEXT:    ret
  %cnt = call i64 @llvm.aarch64.sve.cnth(i32 8)
  %mul = mul i64 %cnt, 5
  %out = add i64 %mul, %a
  ret i64 %out
}

;
; DECH
;

define i64 @dech(i64 %a) {
; CHECK-LABEL: dech:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cnth x8, vl1
; CHECK-NEXT:    sub x0, x0, x8
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: dech:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    dech x0, vl1
; USE_SCALAR_INC-NEXT:    ret
  %cnt = call i64 @llvm.aarch64.sve.cnth(i32 1)
  %out = sub i64 %a, %cnt
  ret i64 %out
}

define i64 @dech_mul(i64 %a) {
; CHECK-LABEL: dech_mul:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cnth x8, vl16, mul #7
; CHECK-NEXT:    sub x0, x0, x8
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: dech_mul:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    dech x0, vl16, mul #7
; USE_SCALAR_INC-NEXT:    ret
  %cnt = call i64 @llvm.aarch64.sve.cnth(i32 9)
  %mul = mul i64 %cnt, 7
  %out = sub i64 %a, %mul
  ret i64 %out
}

;
; INCW
;

define i64 @incw(i64 %a) {
; CHECK-LABEL: incw:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cntw x8, #16
; CHECK-NEXT:    add x0, x8, x0
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: incw:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    incw x0, #16
; USE_SCALAR_INC-NEXT:    ret
  %cnt = call i64 @llvm.aarch64.sve.cntw(i32 16)
  %out = add i64 %cnt, %a
  ret i64 %out
}

define i64 @incw_mul(i64 %a) {
; CHECK-LABEL: incw_mul:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cntw x8, vl32, mul #12
; CHECK-NEXT:    add x0, x8, x0
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: incw_mul:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    incw x0, vl32, mul #12
; USE_SCALAR_INC-NEXT:    ret
  %cnt = call i64 @llvm.aarch64.sve.cntw(i32 10)
  %mul = mul i64 %cnt, 12
  %out = add i64 %mul, %a
  ret i64 %out
}

;
; DECW
;

define i64 @decw(i64 %a) {
; CHECK-LABEL: decw:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cntw x8
; CHECK-NEXT:    sub x0, x0, x8
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: decw:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    decw x0
; USE_SCALAR_INC-NEXT:    ret
  %cnt = call i64 @llvm.aarch64.sve.cntw(i32 31)
  %out = sub i64 %a, %cnt
  ret i64 %out
}

define i64 @decw_mul(i64 %a) {
; CHECK-LABEL: decw_mul:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cntw x8, vl128
; CHECK-NEXT:    sub x0, x0, x8, lsl #4
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: decw_mul:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    decw x0, vl128, mul #16
; USE_SCALAR_INC-NEXT:    ret
  %cnt = call i64 @llvm.aarch64.sve.cntw(i32 12)
  %mul = mul i64 %cnt, 16
  %out = sub i64 %a, %mul
  ret i64 %out
}

define i64 @incd(i64 %a) {
; CHECK-LABEL: incd:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cntd x8, vl8
; CHECK-NEXT:    add x0, x8, x0
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: incd:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    incd x0, vl8
; USE_SCALAR_INC-NEXT:    ret
  %cnt = call i64 @llvm.aarch64.sve.cntd(i32 8)
  %out = add i64 %cnt, %a
  ret i64 %out
}

define i64 @incd_mul(i64 %a) {
; CHECK-LABEL: incd_mul:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cntd x8, all, mul #15
; CHECK-NEXT:    add x0, x8, x0
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: incd_mul:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    incd x0, all, mul #15
; USE_SCALAR_INC-NEXT:    ret
  %cnt = call i64 @llvm.aarch64.sve.cntd(i32 31)
  %mul = mul i64 %cnt, 15
  %out = add i64 %mul, %a
  ret i64 %out
}

;
; DECD
;

define i64 @decd(i64 %a) {
; CHECK-LABEL: decd:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cntd x8, #16
; CHECK-NEXT:    sub x0, x0, x8
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: decd:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    decd x0, #16
; USE_SCALAR_INC-NEXT:    ret
  %cnt = call i64 @llvm.aarch64.sve.cntd(i32 16)
  %out = sub i64 %a, %cnt
  ret i64 %out
}

define i64 @decd_mul(i64 %a) {
; CHECK-LABEL: decd_mul:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cntd x8, vl2, mul #9
; CHECK-NEXT:    sub x0, x0, x8
; CHECK-NEXT:    ret
;
; USE_SCALAR_INC-LABEL: decd_mul:
; USE_SCALAR_INC:       // %bb.0:
; USE_SCALAR_INC-NEXT:    decd x0, vl2, mul #9
; USE_SCALAR_INC-NEXT:    ret
  %cnt = call i64 @llvm.aarch64.sve.cntd(i32 2)
  %mul = mul i64 %cnt, 9
  %out = sub i64 %a, %mul
  ret i64 %out
}

declare i64 @llvm.aarch64.sve.cntb(i32 %pattern)
declare i64 @llvm.aarch64.sve.cnth(i32 %pattern)
declare i64 @llvm.aarch64.sve.cntw(i32 %pattern)
declare i64 @llvm.aarch64.sve.cntd(i32 %pattern)

declare i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
declare i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
declare i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
declare i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)