llvm/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts.ll

; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s

; FIXED WIDTH

define i8 @ctz_v8i1(<8 x i1> %a) {
; CHECK-LABEL: .LCPI0_0:
; CHECK-NEXT:   .byte 8
; CHECK-NEXT:   .byte 7
; CHECK-NEXT:   .byte 6
; CHECK-NEXT:   .byte 5
; CHECK-NEXT:   .byte 4
; CHECK-NEXT:   .byte 3
; CHECK-NEXT:   .byte 2
; CHECK-NEXT:   .byte 1
; CHECK-LABEL: ctz_v8i1:
; CHECK:       // %bb.0:
; CHECK-NEXT:    shl v0.8b, v0.8b, #7
; CHECK-NEXT:    adrp x8, .LCPI0_0
; CHECK-NEXT:    mov w9, #8 // =0x8
; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI0_0]
; CHECK-NEXT:    cmlt v0.8b, v0.8b, #0
; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
; CHECK-NEXT:    umaxv b0, v0.8b
; CHECK-NEXT:    fmov w8, s0
; CHECK-NEXT:    sub w0, w9, w8
; CHECK-NEXT:    ret
  %res = call i8 @llvm.experimental.cttz.elts.i8.v8i1(<8 x i1> %a, i1 0)
  ret i8 %res
}

define i32 @ctz_v16i1(<16 x i1> %a) {
; CHECK-LABEL: .LCPI1_0:
; CHECK-NEXT:   .byte 16
; CHECK-NEXT:   .byte 15
; CHECK-NEXT:   .byte 14
; CHECK-NEXT:   .byte 13
; CHECK-NEXT:   .byte 12
; CHECK-NEXT:   .byte 11
; CHECK-NEXT:   .byte 10
; CHECK-NEXT:   .byte 9
; CHECK-NEXT:   .byte 8
; CHECK-NEXT:   .byte 7
; CHECK-NEXT:   .byte 6
; CHECK-NEXT:   .byte 5
; CHECK-NEXT:   .byte 4
; CHECK-NEXT:   .byte 3
; CHECK-NEXT:   .byte 2
; CHECK-NEXT:   .byte 1
; CHECK-LABEL: ctz_v16i1:
; CHECK:       // %bb.0:
; CHECK-NEXT:    shl v0.16b, v0.16b, #7
; CHECK-NEXT:    adrp x8, .LCPI1_0
; CHECK-NEXT:    mov w9, #16 // =0x10
; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI1_0]
; CHECK-NEXT:    cmlt v0.16b, v0.16b, #0
; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
; CHECK-NEXT:    umaxv b0, v0.16b
; CHECK-NEXT:    fmov w8, s0
; CHECK-NEXT:    sub w8, w9, w8
; CHECK-NEXT:    and w0, w8, #0xff
; CHECK-NEXT:    ret
  %res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 0)
  ret i32 %res
}

define i16 @ctz_v4i32(<4 x i32> %a) {
; CHECK-LABEL: .LCPI2_0:
; CHECK-NEXT:   .hword 4
; CHECK-NEXT:   .hword 3
; CHECK-NEXT:   .hword 2
; CHECK-NEXT:   .hword 1
; CHECK-LABEL: ctz_v4i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cmtst v0.4s, v0.4s, v0.4s
; CHECK-NEXT:    adrp x8, .LCPI2_0
; CHECK-NEXT:    mov w9, #4 // =0x4
; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI2_0]
; CHECK-NEXT:    xtn v0.4h, v0.4s
; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
; CHECK-NEXT:    umaxv h0, v0.4h
; CHECK-NEXT:    fmov w8, s0
; CHECK-NEXT:    sub w8, w9, w8
; CHECK-NEXT:    and w0, w8, #0xff
; CHECK-NEXT:    ret
  %res = call i16 @llvm.experimental.cttz.elts.i16.v4i32(<4 x i32> %a, i1 0)
  ret i16 %res
}

define i7 @ctz_i7_v8i1(<8 x i1> %a) {
; CHECK-LABEL: .LCPI3_0:
; CHECK-NEXT:   .byte 8
; CHECK-NEXT:   .byte 7
; CHECK-NEXT:   .byte 6
; CHECK-NEXT:   .byte 5
; CHECK-NEXT:   .byte 4
; CHECK-NEXT:   .byte 3
; CHECK-NEXT:   .byte 2
; CHECK-NEXT:   .byte 1
; CHECK-LABEL: ctz_i7_v8i1:
; CHECK:       // %bb.0:
; CHECK-NEXT:    shl v0.8b, v0.8b, #7
; CHECK-NEXT:    adrp x8, .LCPI3_0
; CHECK-NEXT:    mov w9, #8 // =0x8
; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI3_0]
; CHECK-NEXT:    cmlt v0.8b, v0.8b, #0
; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
; CHECK-NEXT:    umaxv b0, v0.8b
; CHECK-NEXT:    fmov w8, s0
; CHECK-NEXT:    sub w0, w9, w8
; CHECK-NEXT:    ret
  %res = call i7 @llvm.experimental.cttz.elts.i7.v8i1(<8 x i1> %a, i1 0)
  ret i7 %res
}

; ZERO IS POISON

define i8 @ctz_v8i1_poison(<8 x i1> %a) {
; CHECK-LABEL: .LCPI4_0:
; CHECK-NEXT:   .byte 8
; CHECK-NEXT:   .byte 7
; CHECK-NEXT:   .byte 6
; CHECK-NEXT:   .byte 5
; CHECK-NEXT:   .byte 4
; CHECK-NEXT:   .byte 3
; CHECK-NEXT:   .byte 2
; CHECK-NEXT:   .byte 1
; CHECK-LABEL: ctz_v8i1_poison:
; CHECK:       // %bb.0:
; CHECK-NEXT:    shl v0.8b, v0.8b, #7
; CHECK-NEXT:    adrp x8, .LCPI4_0
; CHECK-NEXT:    mov w9, #8 // =0x8
; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI4_0]
; CHECK-NEXT:    cmlt v0.8b, v0.8b, #0
; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
; CHECK-NEXT:    umaxv b0, v0.8b
; CHECK-NEXT:    fmov w8, s0
; CHECK-NEXT:    sub w0, w9, w8
; CHECK-NEXT:    ret
  %res = call i8 @llvm.experimental.cttz.elts.i8.v8i1(<8 x i1> %a, i1 1)
  ret i8 %res
}

declare i8 @llvm.experimental.cttz.elts.i8.v8i1(<8 x i1>, i1)
declare i7 @llvm.experimental.cttz.elts.i7.v8i1(<8 x i1>, i1)
declare i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1>, i1)
declare i16 @llvm.experimental.cttz.elts.i16.v4i32(<4 x i32>, i1)