llvm/llvm/test/CodeGen/AArch64/abd-combine.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s

define <8 x i16> @abdu_base(<8 x i16> %src1, <8 x i16> %src2) {
; CHECK-LABEL: abdu_base:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
; CHECK-NEXT:    ret
  %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
  %zextsrc2 = zext <8 x i16> %src2 to <8 x i32>
  %sub = sub <8 x i32> %zextsrc1, %zextsrc2
  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
  %result = trunc <8 x i32> %abs to <8 x i16>
  ret <8 x i16> %result
}

define <8 x i16> @abdu_const(<8 x i16> %src1) {
; CHECK-LABEL: abdu_const:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi v1.4s, #1
; CHECK-NEXT:    ushll2 v2.4s, v0.8h, #0
; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
; CHECK-NEXT:    sub v1.4s, v2.4s, v1.4s
; CHECK-NEXT:    abs v1.4s, v1.4s
; CHECK-NEXT:    abs v0.4s, v0.4s
; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
; CHECK-NEXT:    ret
  %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
  %sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
  %result = trunc <8 x i32> %abs to <8 x i16>
  ret <8 x i16> %result
}

define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) {
; CHECK-LABEL: abdu_const_lhs:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi v1.4s, #1
; CHECK-NEXT:    usubw v2.4s, v1.4s, v0.4h
; CHECK-NEXT:    usubw2 v0.4s, v1.4s, v0.8h
; CHECK-NEXT:    abs v0.4s, v0.4s
; CHECK-NEXT:    abs v1.4s, v2.4s
; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
; CHECK-NEXT:    ret
  %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
  %sub = sub <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1
  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
  %result = trunc <8 x i32> %abs to <8 x i16>
  ret <8 x i16> %result
}

define <8 x i16> @abdu_const_zero(<8 x i16> %src1) {
; CHECK-LABEL: abdu_const_zero:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi v1.2d, #0000000000000000
; CHECK-NEXT:    ushll v2.4s, v0.4h, #0
; CHECK-NEXT:    usubw2 v0.4s, v1.4s, v0.8h
; CHECK-NEXT:    neg v1.4s, v2.4s
; CHECK-NEXT:    abs v0.4s, v0.4s
; CHECK-NEXT:    abs v1.4s, v1.4s
; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
; CHECK-NEXT:    ret
  %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
  %sub = sub <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
  %result = trunc <8 x i32> %abs to <8 x i16>
  ret <8 x i16> %result
}

define <8 x i16> @abdu_const_both() {
; CHECK-LABEL: abdu_const_both:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi v0.8h, #2
; CHECK-NEXT:    ret
  %sub = sub <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
  %result = trunc <8 x i32> %abs to <8 x i16>
  ret <8 x i16> %result
}

define <8 x i16> @abdu_const_bothhigh() {
; CHECK-LABEL: abdu_const_bothhigh:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi v0.8h, #1
; CHECK-NEXT:    ret
  %zextsrc1 = zext <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534> to <8 x i32>
  %zextsrc2 = zext <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535> to <8 x i32>
  %sub = sub <8 x i32> %zextsrc1, %zextsrc2
  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
  %result = trunc <8 x i32> %abs to <8 x i16>
  ret <8 x i16> %result
}

define <8 x i16> @abdu_undef(<8 x i16> %src1) {
; CHECK-LABEL: abdu_undef:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ret
  %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
  %zextsrc2 = zext <8 x i16> undef to <8 x i32>
  %sub = sub <8 x i32> %zextsrc1, %zextsrc2
  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
  %result = trunc <8 x i32> %abs to <8 x i16>
  ret <8 x i16> %result
}

define <8 x i16> @abdu_ugt(<8 x i16>, <8 x i16>) {
; CHECK-LABEL: abdu_ugt:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
; CHECK-NEXT:    ret
  %3 = icmp ugt <8 x i16> %0, %1
  %4 = sub <8 x i16> %0, %1
  %5 = sub <8 x i16> %1, %0
  %6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5
  ret <8 x i16> %6
}

define <8 x i16> @abdu_uge(<8 x i16>, <8 x i16>) {
; CHECK-LABEL: abdu_uge:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
; CHECK-NEXT:    ret
  %3 = icmp uge <8 x i16> %0, %1
  %4 = sub <8 x i16> %0, %1
  %5 = sub <8 x i16> %1, %0
  %6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5
  ret <8 x i16> %6
}

define <8 x i16> @abdu_ult(<8 x i16>, <8 x i16>) {
; CHECK-LABEL: abdu_ult:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
; CHECK-NEXT:    ret
  %3 = icmp ult <8 x i16> %0, %1
  %4 = sub <8 x i16> %0, %1
  %5 = sub <8 x i16> %1, %0
  %6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4
  ret <8 x i16> %6
}

define <8 x i16> @abdu_ule(<8 x i16>, <8 x i16>) {
; CHECK-LABEL: abdu_ule:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
; CHECK-NEXT:    ret
  %3 = icmp ule <8 x i16> %0, %1
  %4 = sub <8 x i16> %0, %1
  %5 = sub <8 x i16> %1, %0
  %6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4
  ret <8 x i16> %6
}

define <8 x i16> @abds_sgt(<8 x i16>, <8 x i16>) {
; CHECK-LABEL: abds_sgt:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
; CHECK-NEXT:    ret
  %3 = icmp sgt <8 x i16> %0, %1
  %4 = sub <8 x i16> %0, %1
  %5 = sub <8 x i16> %1, %0
  %6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5
  ret <8 x i16> %6
}

define <8 x i16> @abds_sge(<8 x i16>, <8 x i16>) {
; CHECK-LABEL: abds_sge:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
; CHECK-NEXT:    ret
  %3 = icmp sge <8 x i16> %0, %1
  %4 = sub <8 x i16> %0, %1
  %5 = sub <8 x i16> %1, %0
  %6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5
  ret <8 x i16> %6
}

define <8 x i16> @abds_slt(<8 x i16>, <8 x i16>) {
; CHECK-LABEL: abds_slt:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
; CHECK-NEXT:    ret
  %3 = icmp slt <8 x i16> %0, %1
  %4 = sub <8 x i16> %0, %1
  %5 = sub <8 x i16> %1, %0
  %6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4
  ret <8 x i16> %6
}

define <8 x i16> @abds_sle(<8 x i16>, <8 x i16>) {
; CHECK-LABEL: abds_sle:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
; CHECK-NEXT:    ret
  %3 = icmp sle <8 x i16> %0, %1
  %4 = sub <8 x i16> %0, %1
  %5 = sub <8 x i16> %1, %0
  %6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4
  ret <8 x i16> %6
}


define <8 x i16> @abdu_i_base(<8 x i16> %src1, <8 x i16> %src2) {
; CHECK-LABEL: abdu_i_base:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
; CHECK-NEXT:    ret
  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %src1, <8 x i16> %src2)
  ret <8 x i16> %result
}

define <8 x i16> @abdu_i_const(<8 x i16> %src1) {
; CHECK-LABEL: abdu_i_const:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi v1.8h, #1
; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
; CHECK-NEXT:    ret
  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %src1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
  ret <8 x i16> %result
}

define <8 x i16> @abdu_i_const_lhs(<8 x i16> %src1) {
; CHECK-LABEL: abdu_i_const_lhs:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi v1.8h, #1
; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
; CHECK-NEXT:    ret
  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1)
  ret <8 x i16> %result
}

define <8 x i16> @abdu_i_const_zero(float %t, <8 x i16> %src1) {
; CHECK-LABEL: abdu_i_const_zero:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov v0.16b, v1.16b
; CHECK-NEXT:    ret
  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
  ret <8 x i16> %result
}

define <8 x i16> @abdu_i_const_both() {
; CHECK-LABEL: abdu_i_const_both:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi v0.8h, #2
; CHECK-NEXT:    ret
  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
  ret <8 x i16> %result
}

define <8 x i16> @abdu_i_const_bothhigh() {
; CHECK-LABEL: abdu_i_const_bothhigh:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi v0.8h, #1
; CHECK-NEXT:    ret
  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>, <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535>)
  ret <8 x i16> %result
}

define <8 x i16> @abdu_i_const_onehigh() {
; CHECK-LABEL: abdu_i_const_onehigh:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov w8, #32765 // =0x7ffd
; CHECK-NEXT:    dup v0.8h, w8
; CHECK-NEXT:    ret
  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
  ret <8 x i16> %result
}

define <8 x i16> @abdu_i_const_oneneg() {
; CHECK-LABEL: abdu_i_const_oneneg:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi v0.8h, #128, lsl #8
; CHECK-NEXT:    ret
  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>)
  ret <8 x i16> %result
}

define <8 x i16> @abdu_i_zero(<8 x i16> %t, <8 x i16> %src1) {
; CHECK-LABEL: abdu_i_zero:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov v0.16b, v1.16b
; CHECK-NEXT:    ret
  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
  ret <8 x i16> %result
}

define <8 x i16> @abdu_i_undef(<8 x i16> %t, <8 x i16> %src1) {
; CHECK-LABEL: abdu_i_undef:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi v0.2d, #0000000000000000
; CHECK-NEXT:    ret
  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> undef, <8 x i16> %src1)
  ret <8 x i16> %result
}

define <8 x i16> @abdu_i_reassoc(<8 x i16> %src1) {
; CHECK-LABEL: abdu_i_reassoc:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi v1.8h, #3
; CHECK-NEXT:    movi v2.8h, #1
; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
; CHECK-NEXT:    uabd v0.8h, v0.8h, v2.8h
; CHECK-NEXT:    ret
  %r1 = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %src1, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
  %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %r1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
  ret <8 x i16> %result
}





define <8 x i16> @abds_base(<8 x i16> %src1, <8 x i16> %src2) {
; CHECK-LABEL: abds_base:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
; CHECK-NEXT:    ret
  %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
  %zextsrc2 = sext <8 x i16> %src2 to <8 x i32>
  %sub = sub <8 x i32> %zextsrc1, %zextsrc2
  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
  %result = trunc <8 x i32> %abs to <8 x i16>
  ret <8 x i16> %result
}

define <8 x i16> @abds_const(<8 x i16> %src1) {
; CHECK-LABEL: abds_const:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi v1.4s, #1
; CHECK-NEXT:    sshll2 v2.4s, v0.8h, #0
; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
; CHECK-NEXT:    sub v1.4s, v2.4s, v1.4s
; CHECK-NEXT:    abs v1.4s, v1.4s
; CHECK-NEXT:    abs v0.4s, v0.4s
; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
; CHECK-NEXT:    ret
  %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
  %sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
  %result = trunc <8 x i32> %abs to <8 x i16>
  ret <8 x i16> %result
}

define <8 x i16> @abds_const_lhs(<8 x i16> %src1) {
; CHECK-LABEL: abds_const_lhs:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi v1.4s, #1
; CHECK-NEXT:    ssubw v2.4s, v1.4s, v0.4h
; CHECK-NEXT:    ssubw2 v0.4s, v1.4s, v0.8h
; CHECK-NEXT:    abs v0.4s, v0.4s
; CHECK-NEXT:    abs v1.4s, v2.4s
; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
; CHECK-NEXT:    ret
  %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
  %sub = sub <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1
  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
  %result = trunc <8 x i32> %abs to <8 x i16>
  ret <8 x i16> %result
}

define <8 x i16> @abds_const_zero(<8 x i16> %src1) {
; CHECK-LABEL: abds_const_zero:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi v1.2d, #0000000000000000
; CHECK-NEXT:    sshll v2.4s, v0.4h, #0
; CHECK-NEXT:    ssubw2 v0.4s, v1.4s, v0.8h
; CHECK-NEXT:    neg v1.4s, v2.4s
; CHECK-NEXT:    abs v0.4s, v0.4s
; CHECK-NEXT:    abs v1.4s, v1.4s
; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
; CHECK-NEXT:    ret
  %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
  %sub = sub <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
  %result = trunc <8 x i32> %abs to <8 x i16>
  ret <8 x i16> %result
}

define <8 x i16> @abds_const_both() {
; CHECK-LABEL: abds_const_both:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi v0.8h, #2
; CHECK-NEXT:    ret
  %sub = sub <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
  %result = trunc <8 x i32> %abs to <8 x i16>
  ret <8 x i16> %result
}

define <8 x i16> @abds_const_bothhigh() {
; CHECK-LABEL: abds_const_bothhigh:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi v0.8h, #1
; CHECK-NEXT:    ret
  %zextsrc1 = sext <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534> to <8 x i32>
  %zextsrc2 = sext <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535> to <8 x i32>
  %sub = sub <8 x i32> %zextsrc1, %zextsrc2
  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
  %result = trunc <8 x i32> %abs to <8 x i16>
  ret <8 x i16> %result
}

define <8 x i16> @abds_undef(<8 x i16> %src1) {
; CHECK-LABEL: abds_undef:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sshll v1.4s, v0.4h, #0
; CHECK-NEXT:    sshll2 v0.4s, v0.8h, #0
; CHECK-NEXT:    abs v0.4s, v0.4s
; CHECK-NEXT:    abs v1.4s, v1.4s
; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
; CHECK-NEXT:    ret
  %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
  %zextsrc2 = sext <8 x i16> undef to <8 x i32>
  %sub = sub <8 x i32> %zextsrc1, %zextsrc2
  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0)
  %result = trunc <8 x i32> %abs to <8 x i16>
  ret <8 x i16> %result
}



define <8 x i16> @abds_i_base(<8 x i16> %src1, <8 x i16> %src2) {
; CHECK-LABEL: abds_i_base:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
; CHECK-NEXT:    ret
  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %src1, <8 x i16> %src2)
  ret <8 x i16> %result
}

define <8 x i16> @abds_i_const(<8 x i16> %src1) {
; CHECK-LABEL: abds_i_const:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi v1.8h, #1
; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
; CHECK-NEXT:    ret
  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %src1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
  ret <8 x i16> %result
}

define <8 x i16> @abds_i_const_lhs(<8 x i16> %src1) {
; CHECK-LABEL: abds_i_const_lhs:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi v1.8h, #1
; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
; CHECK-NEXT:    ret
  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1)
  ret <8 x i16> %result
}

define <8 x i16> @abds_i_const_zero(<8 x i16> %src1) {
; CHECK-LABEL: abds_i_const_zero:
; CHECK:       // %bb.0:
; CHECK-NEXT:    abs v0.8h, v0.8h
; CHECK-NEXT:    ret
  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
  ret <8 x i16> %result
}

define <8 x i16> @abds_i_const_both() {
; CHECK-LABEL: abds_i_const_both:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi v0.8h, #2
; CHECK-NEXT:    ret
  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
  ret <8 x i16> %result
}

define <8 x i16> @abds_i_const_bothhigh() {
; CHECK-LABEL: abds_i_const_bothhigh:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi v0.8h, #1
; CHECK-NEXT:    ret
  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>)
  ret <8 x i16> %result
}

define <8 x i16> @abds_i_const_onehigh() {
; CHECK-LABEL: abds_i_const_onehigh:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov w8, #32765 // =0x7ffd
; CHECK-NEXT:    dup v0.8h, w8
; CHECK-NEXT:    ret
  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
  ret <8 x i16> %result
}

define <8 x i16> @abds_i_const_oneneg() {
; CHECK-LABEL: abds_i_const_oneneg:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi v0.8h, #128, lsl #8
; CHECK-NEXT:    ret
  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>)
  ret <8 x i16> %result
}

define <8 x i16> @abds_i_zero(<8 x i16> %t, <8 x i16> %src1) {
; CHECK-LABEL: abds_i_zero:
; CHECK:       // %bb.0:
; CHECK-NEXT:    abs v0.8h, v1.8h
; CHECK-NEXT:    ret
  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1)
  ret <8 x i16> %result
}

define <8 x i16> @abds_i_undef(<8 x i16> %t, <8 x i16> %src1) {
; CHECK-LABEL: abds_i_undef:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi v0.2d, #0000000000000000
; CHECK-NEXT:    ret
  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> undef, <8 x i16> %src1)
  ret <8 x i16> %result
}

define <8 x i16> @abds_i_reassoc(<8 x i16> %src1) {
; CHECK-LABEL: abds_i_reassoc:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi v1.8h, #3
; CHECK-NEXT:    movi v2.8h, #1
; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
; CHECK-NEXT:    sabd v0.8h, v0.8h, v2.8h
; CHECK-NEXT:    ret
  %r1 = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %src1, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
  %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %r1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
  ret <8 x i16> %result
}

define <1 x i64> @recursive() {
; CHECK-LABEL: recursive:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi v0.8b, #254
; CHECK-NEXT:    ushll v1.8h, v0.8b, #0
; CHECK-NEXT:    dup v0.8b, v0.b[0]
; CHECK-NEXT:    saddlp v1.1d, v1.2s
; CHECK-NEXT:    orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT:    ret
  %1 = tail call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> zeroinitializer, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
  %2 = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
  %3 = zext <8 x i8> %2 to <8 x i16>
  %4 = bitcast <8 x i16> %3 to <4 x i32>
  %5 = shufflevector <4 x i32> %4, <4 x i32> zeroinitializer, <2 x i32> <i32 0, i32 1>
  %6 = shufflevector <8 x i8> %2, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
  %7 = bitcast <16 x i8> %6 to <2 x i64>
  %8 = shufflevector <2 x i64> %7, <2 x i64> zeroinitializer, <1 x i32> zeroinitializer
  %9 = tail call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> %5)
  %10 = or <1 x i64> %8, %9
  ret <1 x i64> %10
}

declare <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8>, <8 x i8>)
declare <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32>)
declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>)
declare <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1)