llvm/llvm/test/CodeGen/AArch64/faddp-half.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc --mtriple=aarch64 -mattr=+fullfp16 < %s | FileCheck %s
; RUN: llc --mtriple=aarch64 < %s | FileCheck %s --check-prefix=CHECKNOFP16

define half @faddp_2xhalf(<2 x half> %a) {
; CHECK-LABEL: faddp_2xhalf:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT:    faddp h0, v0.2h
; CHECK-NEXT:    ret
;
; CHECKNOFP16-LABEL: faddp_2xhalf:
; CHECKNOFP16:       // %bb.0: // %entry
; CHECKNOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECKNOFP16-NEXT:    dup v1.4h, v0.h[1]
; CHECKNOFP16-NEXT:    fcvtl v0.4s, v0.4h
; CHECKNOFP16-NEXT:    fcvtl v1.4s, v1.4h
; CHECKNOFP16-NEXT:    fadd v0.4s, v0.4s, v1.4s
; CHECKNOFP16-NEXT:    fcvtn v0.4h, v0.4s
; CHECKNOFP16-NEXT:    // kill: def $h0 killed $h0 killed $q0
; CHECKNOFP16-NEXT:    ret
entry:
  %shift = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 undef>
  %0 = fadd <2 x half> %a, %shift
  %1 = extractelement <2 x half> %0, i32 0
  ret half %1
}

define half @faddp_2xhalf_commute(<2 x half> %a) {
; CHECK-LABEL: faddp_2xhalf_commute:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT:    faddp h0, v0.2h
; CHECK-NEXT:    ret
;
; CHECKNOFP16-LABEL: faddp_2xhalf_commute:
; CHECKNOFP16:       // %bb.0: // %entry
; CHECKNOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECKNOFP16-NEXT:    dup v1.4h, v0.h[1]
; CHECKNOFP16-NEXT:    fcvtl v0.4s, v0.4h
; CHECKNOFP16-NEXT:    fcvtl v1.4s, v1.4h
; CHECKNOFP16-NEXT:    fadd v0.4s, v1.4s, v0.4s
; CHECKNOFP16-NEXT:    fcvtn v0.4h, v0.4s
; CHECKNOFP16-NEXT:    // kill: def $h0 killed $h0 killed $q0
; CHECKNOFP16-NEXT:    ret
entry:
  %shift = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 undef>
  %0 = fadd <2 x half> %shift, %a
  %1 = extractelement <2 x half> %0, i32 0
  ret half %1
}

define half @faddp_4xhalf(<4 x half> %a) {
; CHECK-LABEL: faddp_4xhalf:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT:    faddp h0, v0.2h
; CHECK-NEXT:    ret
;
; CHECKNOFP16-LABEL: faddp_4xhalf:
; CHECKNOFP16:       // %bb.0: // %entry
; CHECKNOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECKNOFP16-NEXT:    dup v1.4h, v0.h[1]
; CHECKNOFP16-NEXT:    fcvtl v0.4s, v0.4h
; CHECKNOFP16-NEXT:    fcvtl v1.4s, v1.4h
; CHECKNOFP16-NEXT:    fadd v0.4s, v0.4s, v1.4s
; CHECKNOFP16-NEXT:    fcvtn v0.4h, v0.4s
; CHECKNOFP16-NEXT:    // kill: def $h0 killed $h0 killed $q0
; CHECKNOFP16-NEXT:    ret
entry:
  %shift = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
  %0 = fadd <4 x half> %a, %shift
  %1 = extractelement <4 x half> %0, i32 0
  ret half %1
}

define half @faddp_4xhalf_commute(<4 x half> %a) {
; CHECK-LABEL: faddp_4xhalf_commute:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT:    faddp h0, v0.2h
; CHECK-NEXT:    ret
;
; CHECKNOFP16-LABEL: faddp_4xhalf_commute:
; CHECKNOFP16:       // %bb.0: // %entry
; CHECKNOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECKNOFP16-NEXT:    dup v1.4h, v0.h[1]
; CHECKNOFP16-NEXT:    fcvtl v0.4s, v0.4h
; CHECKNOFP16-NEXT:    fcvtl v1.4s, v1.4h
; CHECKNOFP16-NEXT:    fadd v0.4s, v1.4s, v0.4s
; CHECKNOFP16-NEXT:    fcvtn v0.4h, v0.4s
; CHECKNOFP16-NEXT:    // kill: def $h0 killed $h0 killed $q0
; CHECKNOFP16-NEXT:    ret
entry:
  %shift = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
  %0 = fadd <4 x half> %shift, %a
  %1 = extractelement <4 x half> %0, i32 0
  ret half %1
}

define half @faddp_8xhalf(<8 x half> %a) {
; CHECK-LABEL: faddp_8xhalf:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    faddp h0, v0.2h
; CHECK-NEXT:    ret
;
; CHECKNOFP16-LABEL: faddp_8xhalf:
; CHECKNOFP16:       // %bb.0: // %entry
; CHECKNOFP16-NEXT:    dup v1.8h, v0.h[1]
; CHECKNOFP16-NEXT:    fcvtl v2.4s, v0.4h
; CHECKNOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
; CHECKNOFP16-NEXT:    fcvtl v3.4s, v1.4h
; CHECKNOFP16-NEXT:    fcvtl2 v1.4s, v1.8h
; CHECKNOFP16-NEXT:    fadd v2.4s, v2.4s, v3.4s
; CHECKNOFP16-NEXT:    fadd v1.4s, v0.4s, v1.4s
; CHECKNOFP16-NEXT:    fcvtn v0.4h, v2.4s
; CHECKNOFP16-NEXT:    fcvtn2 v0.8h, v1.4s
; CHECKNOFP16-NEXT:    // kill: def $h0 killed $h0 killed $q0
; CHECKNOFP16-NEXT:    ret
entry:
  %shift = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  %0 = fadd <8 x half> %a, %shift
  %1 = extractelement <8 x half> %0, i32 0
  ret half %1
}

define half @faddp_8xhalf_commute(<8 x half> %a) {
; CHECK-LABEL: faddp_8xhalf_commute:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    faddp h0, v0.2h
; CHECK-NEXT:    ret
;
; CHECKNOFP16-LABEL: faddp_8xhalf_commute:
; CHECKNOFP16:       // %bb.0: // %entry
; CHECKNOFP16-NEXT:    dup v1.8h, v0.h[1]
; CHECKNOFP16-NEXT:    fcvtl v2.4s, v0.4h
; CHECKNOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
; CHECKNOFP16-NEXT:    fcvtl v3.4s, v1.4h
; CHECKNOFP16-NEXT:    fcvtl2 v1.4s, v1.8h
; CHECKNOFP16-NEXT:    fadd v2.4s, v3.4s, v2.4s
; CHECKNOFP16-NEXT:    fadd v1.4s, v1.4s, v0.4s
; CHECKNOFP16-NEXT:    fcvtn v0.4h, v2.4s
; CHECKNOFP16-NEXT:    fcvtn2 v0.8h, v1.4s
; CHECKNOFP16-NEXT:    // kill: def $h0 killed $h0 killed $q0
; CHECKNOFP16-NEXT:    ret
entry:
  %shift = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  %0 = fadd <8 x half> %shift, %a
  %1 = extractelement <8 x half> %0, i32 0
  ret half %1
}

define <8 x half> @addp_v8f16(<8 x half> %a) {
; CHECK-LABEL: addp_v8f16:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    rev32 v1.8h, v0.8h
; CHECK-NEXT:    fadd v0.8h, v1.8h, v0.8h
; CHECK-NEXT:    ret
;
; CHECKNOFP16-LABEL: addp_v8f16:
; CHECKNOFP16:       // %bb.0: // %entry
; CHECKNOFP16-NEXT:    rev32 v1.8h, v0.8h
; CHECKNOFP16-NEXT:    fcvtl v2.4s, v0.4h
; CHECKNOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
; CHECKNOFP16-NEXT:    fcvtl v3.4s, v1.4h
; CHECKNOFP16-NEXT:    fcvtl2 v1.4s, v1.8h
; CHECKNOFP16-NEXT:    fadd v2.4s, v3.4s, v2.4s
; CHECKNOFP16-NEXT:    fadd v1.4s, v1.4s, v0.4s
; CHECKNOFP16-NEXT:    fcvtn v0.4h, v2.4s
; CHECKNOFP16-NEXT:    fcvtn2 v0.8h, v1.4s
; CHECKNOFP16-NEXT:    ret
entry:
  %s = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
  %b = fadd reassoc <8 x half> %s, %a
  ret <8 x half> %b
}

define <16 x half> @addp_v16f16(<16 x half> %a) {
; CHECK-LABEL: addp_v16f16:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    faddp v1.8h, v0.8h, v1.8h
; CHECK-NEXT:    zip1 v0.8h, v1.8h, v1.8h
; CHECK-NEXT:    zip2 v1.8h, v1.8h, v1.8h
; CHECK-NEXT:    ret
;
; CHECKNOFP16-LABEL: addp_v16f16:
; CHECKNOFP16:       // %bb.0: // %entry
; CHECKNOFP16-NEXT:    rev32 v2.8h, v0.8h
; CHECKNOFP16-NEXT:    rev32 v3.8h, v1.8h
; CHECKNOFP16-NEXT:    fcvtl v4.4s, v0.4h
; CHECKNOFP16-NEXT:    fcvtl v6.4s, v1.4h
; CHECKNOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
; CHECKNOFP16-NEXT:    fcvtl2 v1.4s, v1.8h
; CHECKNOFP16-NEXT:    fcvtl v5.4s, v2.4h
; CHECKNOFP16-NEXT:    fcvtl v7.4s, v3.4h
; CHECKNOFP16-NEXT:    fcvtl2 v2.4s, v2.8h
; CHECKNOFP16-NEXT:    fcvtl2 v3.4s, v3.8h
; CHECKNOFP16-NEXT:    fadd v4.4s, v5.4s, v4.4s
; CHECKNOFP16-NEXT:    fadd v5.4s, v7.4s, v6.4s
; CHECKNOFP16-NEXT:    fadd v2.4s, v2.4s, v0.4s
; CHECKNOFP16-NEXT:    fadd v3.4s, v3.4s, v1.4s
; CHECKNOFP16-NEXT:    fcvtn v0.4h, v4.4s
; CHECKNOFP16-NEXT:    fcvtn v1.4h, v5.4s
; CHECKNOFP16-NEXT:    fcvtn2 v0.8h, v2.4s
; CHECKNOFP16-NEXT:    fcvtn2 v1.8h, v3.4s
; CHECKNOFP16-NEXT:    ret
entry:
  %s = shufflevector <16 x half> %a, <16 x half> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
  %b = fadd reassoc <16 x half> %s, %a
  ret <16 x half> %b
}