llvm/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN:   -mcpu=pwr9 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR9LE
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN:   -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR9BE
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN:   -mcpu=pwr10 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR10LE
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN:   -mcpu=pwr10 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR10BE

;;
;; Vectors of i8
;;
define dso_local i8 @v2i8(<2 x i8> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v2i8:
; PWR9LE:       # %bb.0: # %entry
; PWR9LE-NEXT:    vspltb v3, v2, 14
; PWR9LE-NEXT:    li r3, 0
; PWR9LE-NEXT:    vaddubm v2, v2, v3
; PWR9LE-NEXT:    vextubrx r3, r3, v2
; PWR9LE-NEXT:    blr
;
; PWR9BE-LABEL: v2i8:
; PWR9BE:       # %bb.0: # %entry
; PWR9BE-NEXT:    vspltb v3, v2, 1
; PWR9BE-NEXT:    li r3, 0
; PWR9BE-NEXT:    vaddubm v2, v2, v3
; PWR9BE-NEXT:    vextublx r3, r3, v2
; PWR9BE-NEXT:    blr
;
; PWR10LE-LABEL: v2i8:
; PWR10LE:       # %bb.0: # %entry
; PWR10LE-NEXT:    vspltb v3, v2, 14
; PWR10LE-NEXT:    li r3, 0
; PWR10LE-NEXT:    vaddubm v2, v2, v3
; PWR10LE-NEXT:    vextubrx r3, r3, v2
; PWR10LE-NEXT:    blr
;
; PWR10BE-LABEL: v2i8:
; PWR10BE:       # %bb.0: # %entry
; PWR10BE-NEXT:    vspltb v3, v2, 1
; PWR10BE-NEXT:    li r3, 0
; PWR10BE-NEXT:    vaddubm v2, v2, v3
; PWR10BE-NEXT:    vextublx r3, r3, v2
; PWR10BE-NEXT:    blr
entry:
  %0 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> %a)
  ret i8 %0
}

define dso_local i8 @v4i8(<4 x i8> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v4i8:
; PWR9LE:       # %bb.0: # %entry
; PWR9LE-NEXT:    vsplth v3, v2, 6
; PWR9LE-NEXT:    li r3, 0
; PWR9LE-NEXT:    vaddubm v2, v2, v3
; PWR9LE-NEXT:    vspltb v3, v2, 14
; PWR9LE-NEXT:    vaddubm v2, v2, v3
; PWR9LE-NEXT:    vextubrx r3, r3, v2
; PWR9LE-NEXT:    blr
;
; PWR9BE-LABEL: v4i8:
; PWR9BE:       # %bb.0: # %entry
; PWR9BE-NEXT:    vsplth v3, v2, 1
; PWR9BE-NEXT:    li r3, 0
; PWR9BE-NEXT:    vaddubm v2, v2, v3
; PWR9BE-NEXT:    vspltb v3, v2, 1
; PWR9BE-NEXT:    vaddubm v2, v2, v3
; PWR9BE-NEXT:    vextublx r3, r3, v2
; PWR9BE-NEXT:    blr
;
; PWR10LE-LABEL: v4i8:
; PWR10LE:       # %bb.0: # %entry
; PWR10LE-NEXT:    vsplth v3, v2, 6
; PWR10LE-NEXT:    li r3, 0
; PWR10LE-NEXT:    vaddubm v2, v2, v3
; PWR10LE-NEXT:    vspltb v3, v2, 14
; PWR10LE-NEXT:    vaddubm v2, v2, v3
; PWR10LE-NEXT:    vextubrx r3, r3, v2
; PWR10LE-NEXT:    blr
;
; PWR10BE-LABEL: v4i8:
; PWR10BE:       # %bb.0: # %entry
; PWR10BE-NEXT:    vsplth v3, v2, 1
; PWR10BE-NEXT:    li r3, 0
; PWR10BE-NEXT:    vaddubm v2, v2, v3
; PWR10BE-NEXT:    vspltb v3, v2, 1
; PWR10BE-NEXT:    vaddubm v2, v2, v3
; PWR10BE-NEXT:    vextublx r3, r3, v2
; PWR10BE-NEXT:    blr
entry:
  %0 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %a)
  ret i8 %0
}

define dso_local i8 @v8i8(<8 x i8> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v8i8:
; PWR9LE:       # %bb.0: # %entry
; PWR9LE-NEXT:    xxspltw v3, v2, 2
; PWR9LE-NEXT:    li r3, 0
; PWR9LE-NEXT:    vaddubm v2, v2, v3
; PWR9LE-NEXT:    vsplth v3, v2, 6
; PWR9LE-NEXT:    vaddubm v2, v2, v3
; PWR9LE-NEXT:    vspltb v3, v2, 14
; PWR9LE-NEXT:    vaddubm v2, v2, v3
; PWR9LE-NEXT:    vextubrx r3, r3, v2
; PWR9LE-NEXT:    blr
;
; PWR9BE-LABEL: v8i8:
; PWR9BE:       # %bb.0: # %entry
; PWR9BE-NEXT:    xxspltw v3, v2, 1
; PWR9BE-NEXT:    li r3, 0
; PWR9BE-NEXT:    vaddubm v2, v2, v3
; PWR9BE-NEXT:    vsplth v3, v2, 1
; PWR9BE-NEXT:    vaddubm v2, v2, v3
; PWR9BE-NEXT:    vspltb v3, v2, 1
; PWR9BE-NEXT:    vaddubm v2, v2, v3
; PWR9BE-NEXT:    vextublx r3, r3, v2
; PWR9BE-NEXT:    blr
;
; PWR10LE-LABEL: v8i8:
; PWR10LE:       # %bb.0: # %entry
; PWR10LE-NEXT:    xxspltw v3, v2, 2
; PWR10LE-NEXT:    li r3, 0
; PWR10LE-NEXT:    vaddubm v2, v2, v3
; PWR10LE-NEXT:    vsplth v3, v2, 6
; PWR10LE-NEXT:    vaddubm v2, v2, v3
; PWR10LE-NEXT:    vspltb v3, v2, 14
; PWR10LE-NEXT:    vaddubm v2, v2, v3
; PWR10LE-NEXT:    vextubrx r3, r3, v2
; PWR10LE-NEXT:    blr
;
; PWR10BE-LABEL: v8i8:
; PWR10BE:       # %bb.0: # %entry
; PWR10BE-NEXT:    xxspltw v3, v2, 1
; PWR10BE-NEXT:    li r3, 0
; PWR10BE-NEXT:    vaddubm v2, v2, v3
; PWR10BE-NEXT:    vsplth v3, v2, 1
; PWR10BE-NEXT:    vaddubm v2, v2, v3
; PWR10BE-NEXT:    vspltb v3, v2, 1
; PWR10BE-NEXT:    vaddubm v2, v2, v3
; PWR10BE-NEXT:    vextublx r3, r3, v2
; PWR10BE-NEXT:    blr
entry:
  %0 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a)
  ret i8 %0
}

define dso_local signext i8 @v16i8_sign(<16 x i8> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v16i8_sign:
; PWR9LE:       # %bb.0: # %entry
; PWR9LE-NEXT:    xxswapd v3, v2
; PWR9LE-NEXT:    li r3, 0
; PWR9LE-NEXT:    vaddubm v2, v2, v3
; PWR9LE-NEXT:    xxspltw v3, v2, 2
; PWR9LE-NEXT:    vaddubm v2, v2, v3
; PWR9LE-NEXT:    vsplth v3, v2, 6
; PWR9LE-NEXT:    vaddubm v2, v2, v3
; PWR9LE-NEXT:    vspltb v3, v2, 14
; PWR9LE-NEXT:    vaddubm v2, v2, v3
; PWR9LE-NEXT:    vextubrx r3, r3, v2
; PWR9LE-NEXT:    extsb r3, r3
; PWR9LE-NEXT:    blr
;
; PWR9BE-LABEL: v16i8_sign:
; PWR9BE:       # %bb.0: # %entry
; PWR9BE-NEXT:    xxswapd v3, v2
; PWR9BE-NEXT:    li r3, 0
; PWR9BE-NEXT:    vaddubm v2, v2, v3
; PWR9BE-NEXT:    xxspltw v3, v2, 1
; PWR9BE-NEXT:    vaddubm v2, v2, v3
; PWR9BE-NEXT:    vsplth v3, v2, 1
; PWR9BE-NEXT:    vaddubm v2, v2, v3
; PWR9BE-NEXT:    vspltb v3, v2, 1
; PWR9BE-NEXT:    vaddubm v2, v2, v3
; PWR9BE-NEXT:    vextublx r3, r3, v2
; PWR9BE-NEXT:    extsb r3, r3
; PWR9BE-NEXT:    blr
;
; PWR10LE-LABEL: v16i8_sign:
; PWR10LE:       # %bb.0: # %entry
; PWR10LE-NEXT:    xxswapd v3, v2
; PWR10LE-NEXT:    li r3, 0
; PWR10LE-NEXT:    vaddubm v2, v2, v3
; PWR10LE-NEXT:    xxspltw v3, v2, 2
; PWR10LE-NEXT:    vaddubm v2, v2, v3
; PWR10LE-NEXT:    vsplth v3, v2, 6
; PWR10LE-NEXT:    vaddubm v2, v2, v3
; PWR10LE-NEXT:    vspltb v3, v2, 14
; PWR10LE-NEXT:    vaddubm v2, v2, v3
; PWR10LE-NEXT:    vextubrx r3, r3, v2
; PWR10LE-NEXT:    extsb r3, r3
; PWR10LE-NEXT:    blr
;
; PWR10BE-LABEL: v16i8_sign:
; PWR10BE:       # %bb.0: # %entry
; PWR10BE-NEXT:    xxswapd v3, v2
; PWR10BE-NEXT:    li r3, 0
; PWR10BE-NEXT:    vaddubm v2, v2, v3
; PWR10BE-NEXT:    xxspltw v3, v2, 1
; PWR10BE-NEXT:    vaddubm v2, v2, v3
; PWR10BE-NEXT:    vsplth v3, v2, 1
; PWR10BE-NEXT:    vaddubm v2, v2, v3
; PWR10BE-NEXT:    vspltb v3, v2, 1
; PWR10BE-NEXT:    vaddubm v2, v2, v3
; PWR10BE-NEXT:    vextublx r3, r3, v2
; PWR10BE-NEXT:    extsb r3, r3
; PWR10BE-NEXT:    blr
entry:
  %0 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a)
  ret i8 %0
}

define dso_local zeroext i8 @v16i8_zero(<16 x i8> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v16i8_zero:
; PWR9LE:       # %bb.0: # %entry
; PWR9LE-NEXT:    xxswapd v3, v2
; PWR9LE-NEXT:    li r3, 0
; PWR9LE-NEXT:    vaddubm v2, v2, v3
; PWR9LE-NEXT:    xxspltw v3, v2, 2
; PWR9LE-NEXT:    vaddubm v2, v2, v3
; PWR9LE-NEXT:    vsplth v3, v2, 6
; PWR9LE-NEXT:    vaddubm v2, v2, v3
; PWR9LE-NEXT:    vspltb v3, v2, 14
; PWR9LE-NEXT:    vaddubm v2, v2, v3
; PWR9LE-NEXT:    vextubrx r3, r3, v2
; PWR9LE-NEXT:    clrldi r3, r3, 56
; PWR9LE-NEXT:    blr
;
; PWR9BE-LABEL: v16i8_zero:
; PWR9BE:       # %bb.0: # %entry
; PWR9BE-NEXT:    xxswapd v3, v2
; PWR9BE-NEXT:    li r3, 0
; PWR9BE-NEXT:    vaddubm v2, v2, v3
; PWR9BE-NEXT:    xxspltw v3, v2, 1
; PWR9BE-NEXT:    vaddubm v2, v2, v3
; PWR9BE-NEXT:    vsplth v3, v2, 1
; PWR9BE-NEXT:    vaddubm v2, v2, v3
; PWR9BE-NEXT:    vspltb v3, v2, 1
; PWR9BE-NEXT:    vaddubm v2, v2, v3
; PWR9BE-NEXT:    vextublx r3, r3, v2
; PWR9BE-NEXT:    clrldi r3, r3, 56
; PWR9BE-NEXT:    blr
;
; PWR10LE-LABEL: v16i8_zero:
; PWR10LE:       # %bb.0: # %entry
; PWR10LE-NEXT:    xxswapd v3, v2
; PWR10LE-NEXT:    li r3, 0
; PWR10LE-NEXT:    vaddubm v2, v2, v3
; PWR10LE-NEXT:    xxspltw v3, v2, 2
; PWR10LE-NEXT:    vaddubm v2, v2, v3
; PWR10LE-NEXT:    vsplth v3, v2, 6
; PWR10LE-NEXT:    vaddubm v2, v2, v3
; PWR10LE-NEXT:    vspltb v3, v2, 14
; PWR10LE-NEXT:    vaddubm v2, v2, v3
; PWR10LE-NEXT:    vextubrx r3, r3, v2
; PWR10LE-NEXT:    clrldi r3, r3, 56
; PWR10LE-NEXT:    blr
;
; PWR10BE-LABEL: v16i8_zero:
; PWR10BE:       # %bb.0: # %entry
; PWR10BE-NEXT:    xxswapd v3, v2
; PWR10BE-NEXT:    li r3, 0
; PWR10BE-NEXT:    vaddubm v2, v2, v3
; PWR10BE-NEXT:    xxspltw v3, v2, 1
; PWR10BE-NEXT:    vaddubm v2, v2, v3
; PWR10BE-NEXT:    vsplth v3, v2, 1
; PWR10BE-NEXT:    vaddubm v2, v2, v3
; PWR10BE-NEXT:    vspltb v3, v2, 1
; PWR10BE-NEXT:    vaddubm v2, v2, v3
; PWR10BE-NEXT:    vextublx r3, r3, v2
; PWR10BE-NEXT:    clrldi r3, r3, 56
; PWR10BE-NEXT:    blr
entry:
  %0 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a)
  ret i8 %0
}

define dso_local i8 @v32i8(<32 x i8> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v32i8:
; PWR9LE:       # %bb.0: # %entry
; PWR9LE-NEXT:    vaddubm v2, v2, v3
; PWR9LE-NEXT:    li r3, 0
; PWR9LE-NEXT:    xxswapd v3, v2
; PWR9LE-NEXT:    vaddubm v2, v2, v3
; PWR9LE-NEXT:    xxspltw v3, v2, 2
; PWR9LE-NEXT:    vaddubm v2, v2, v3
; PWR9LE-NEXT:    vsplth v3, v2, 6
; PWR9LE-NEXT:    vaddubm v2, v2, v3
; PWR9LE-NEXT:    vspltb v3, v2, 14
; PWR9LE-NEXT:    vaddubm v2, v2, v3
; PWR9LE-NEXT:    vextubrx r3, r3, v2
; PWR9LE-NEXT:    blr
;
; PWR9BE-LABEL: v32i8:
; PWR9BE:       # %bb.0: # %entry
; PWR9BE-NEXT:    vaddubm v2, v2, v3
; PWR9BE-NEXT:    li r3, 0
; PWR9BE-NEXT:    xxswapd v3, v2
; PWR9BE-NEXT:    vaddubm v2, v2, v3
; PWR9BE-NEXT:    xxspltw v3, v2, 1
; PWR9BE-NEXT:    vaddubm v2, v2, v3
; PWR9BE-NEXT:    vsplth v3, v2, 1
; PWR9BE-NEXT:    vaddubm v2, v2, v3
; PWR9BE-NEXT:    vspltb v3, v2, 1
; PWR9BE-NEXT:    vaddubm v2, v2, v3
; PWR9BE-NEXT:    vextublx r3, r3, v2
; PWR9BE-NEXT:    blr
;
; PWR10LE-LABEL: v32i8:
; PWR10LE:       # %bb.0: # %entry
; PWR10LE-NEXT:    vaddubm v2, v2, v3
; PWR10LE-NEXT:    li r3, 0
; PWR10LE-NEXT:    xxswapd v3, v2
; PWR10LE-NEXT:    vaddubm v2, v2, v3
; PWR10LE-NEXT:    xxspltw v3, v2, 2
; PWR10LE-NEXT:    vaddubm v2, v2, v3
; PWR10LE-NEXT:    vsplth v3, v2, 6
; PWR10LE-NEXT:    vaddubm v2, v2, v3
; PWR10LE-NEXT:    vspltb v3, v2, 14
; PWR10LE-NEXT:    vaddubm v2, v2, v3
; PWR10LE-NEXT:    vextubrx r3, r3, v2
; PWR10LE-NEXT:    blr
;
; PWR10BE-LABEL: v32i8:
; PWR10BE:       # %bb.0: # %entry
; PWR10BE-NEXT:    vaddubm v2, v2, v3
; PWR10BE-NEXT:    li r3, 0
; PWR10BE-NEXT:    xxswapd v3, v2
; PWR10BE-NEXT:    vaddubm v2, v2, v3
; PWR10BE-NEXT:    xxspltw v3, v2, 1
; PWR10BE-NEXT:    vaddubm v2, v2, v3
; PWR10BE-NEXT:    vsplth v3, v2, 1
; PWR10BE-NEXT:    vaddubm v2, v2, v3
; PWR10BE-NEXT:    vspltb v3, v2, 1
; PWR10BE-NEXT:    vaddubm v2, v2, v3
; PWR10BE-NEXT:    vextublx r3, r3, v2
; PWR10BE-NEXT:    blr
entry:
  %0 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %a)
  ret i8 %0
}

declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>) #0
declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>) #0
declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>) #0
declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) #0
declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8>) #0

;;
;; Vectors of i16
;;
define dso_local i16 @v2i16(<2 x i16> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v2i16:
; PWR9LE:       # %bb.0: # %entry
; PWR9LE-NEXT:    vsplth v3, v2, 6
; PWR9LE-NEXT:    li r3, 0
; PWR9LE-NEXT:    vadduhm v2, v2, v3
; PWR9LE-NEXT:    vextuhrx r3, r3, v2
; PWR9LE-NEXT:    blr
;
; PWR9BE-LABEL: v2i16:
; PWR9BE:       # %bb.0: # %entry
; PWR9BE-NEXT:    vsplth v3, v2, 1
; PWR9BE-NEXT:    li r3, 0
; PWR9BE-NEXT:    vadduhm v2, v2, v3
; PWR9BE-NEXT:    vextuhlx r3, r3, v2
; PWR9BE-NEXT:    blr
;
; PWR10LE-LABEL: v2i16:
; PWR10LE:       # %bb.0: # %entry
; PWR10LE-NEXT:    vsplth v3, v2, 6
; PWR10LE-NEXT:    li r3, 0
; PWR10LE-NEXT:    vadduhm v2, v2, v3
; PWR10LE-NEXT:    vextuhrx r3, r3, v2
; PWR10LE-NEXT:    blr
;
; PWR10BE-LABEL: v2i16:
; PWR10BE:       # %bb.0: # %entry
; PWR10BE-NEXT:    vsplth v3, v2, 1
; PWR10BE-NEXT:    li r3, 0
; PWR10BE-NEXT:    vadduhm v2, v2, v3
; PWR10BE-NEXT:    vextuhlx r3, r3, v2
; PWR10BE-NEXT:    blr
entry:
  %0 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a)
  ret i16 %0
}

define dso_local i16 @v4i16(<4 x i16> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v4i16:
; PWR9LE:       # %bb.0: # %entry
; PWR9LE-NEXT:    xxspltw v3, v2, 2
; PWR9LE-NEXT:    li r3, 0
; PWR9LE-NEXT:    vadduhm v2, v2, v3
; PWR9LE-NEXT:    vsplth v3, v2, 6
; PWR9LE-NEXT:    vadduhm v2, v2, v3
; PWR9LE-NEXT:    vextuhrx r3, r3, v2
; PWR9LE-NEXT:    blr
;
; PWR9BE-LABEL: v4i16:
; PWR9BE:       # %bb.0: # %entry
; PWR9BE-NEXT:    xxspltw v3, v2, 1
; PWR9BE-NEXT:    li r3, 0
; PWR9BE-NEXT:    vadduhm v2, v2, v3
; PWR9BE-NEXT:    vsplth v3, v2, 1
; PWR9BE-NEXT:    vadduhm v2, v2, v3
; PWR9BE-NEXT:    vextuhlx r3, r3, v2
; PWR9BE-NEXT:    blr
;
; PWR10LE-LABEL: v4i16:
; PWR10LE:       # %bb.0: # %entry
; PWR10LE-NEXT:    xxspltw v3, v2, 2
; PWR10LE-NEXT:    li r3, 0
; PWR10LE-NEXT:    vadduhm v2, v2, v3
; PWR10LE-NEXT:    vsplth v3, v2, 6
; PWR10LE-NEXT:    vadduhm v2, v2, v3
; PWR10LE-NEXT:    vextuhrx r3, r3, v2
; PWR10LE-NEXT:    blr
;
; PWR10BE-LABEL: v4i16:
; PWR10BE:       # %bb.0: # %entry
; PWR10BE-NEXT:    xxspltw v3, v2, 1
; PWR10BE-NEXT:    li r3, 0
; PWR10BE-NEXT:    vadduhm v2, v2, v3
; PWR10BE-NEXT:    vsplth v3, v2, 1
; PWR10BE-NEXT:    vadduhm v2, v2, v3
; PWR10BE-NEXT:    vextuhlx r3, r3, v2
; PWR10BE-NEXT:    blr
entry:
  %0 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a)
  ret i16 %0
}

define dso_local i16 @v8i16(<8 x i16> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v8i16:
; PWR9LE:       # %bb.0: # %entry
; PWR9LE-NEXT:    xxswapd v3, v2
; PWR9LE-NEXT:    li r3, 0
; PWR9LE-NEXT:    vadduhm v2, v2, v3
; PWR9LE-NEXT:    xxspltw v3, v2, 2
; PWR9LE-NEXT:    vadduhm v2, v2, v3
; PWR9LE-NEXT:    vsplth v3, v2, 6
; PWR9LE-NEXT:    vadduhm v2, v2, v3
; PWR9LE-NEXT:    vextuhrx r3, r3, v2
; PWR9LE-NEXT:    blr
;
; PWR9BE-LABEL: v8i16:
; PWR9BE:       # %bb.0: # %entry
; PWR9BE-NEXT:    xxswapd v3, v2
; PWR9BE-NEXT:    li r3, 0
; PWR9BE-NEXT:    vadduhm v2, v2, v3
; PWR9BE-NEXT:    xxspltw v3, v2, 1
; PWR9BE-NEXT:    vadduhm v2, v2, v3
; PWR9BE-NEXT:    vsplth v3, v2, 1
; PWR9BE-NEXT:    vadduhm v2, v2, v3
; PWR9BE-NEXT:    vextuhlx r3, r3, v2
; PWR9BE-NEXT:    blr
;
; PWR10LE-LABEL: v8i16:
; PWR10LE:       # %bb.0: # %entry
; PWR10LE-NEXT:    xxswapd v3, v2
; PWR10LE-NEXT:    li r3, 0
; PWR10LE-NEXT:    vadduhm v2, v2, v3
; PWR10LE-NEXT:    xxspltw v3, v2, 2
; PWR10LE-NEXT:    vadduhm v2, v2, v3
; PWR10LE-NEXT:    vsplth v3, v2, 6
; PWR10LE-NEXT:    vadduhm v2, v2, v3
; PWR10LE-NEXT:    vextuhrx r3, r3, v2
; PWR10LE-NEXT:    blr
;
; PWR10BE-LABEL: v8i16:
; PWR10BE:       # %bb.0: # %entry
; PWR10BE-NEXT:    xxswapd v3, v2
; PWR10BE-NEXT:    li r3, 0
; PWR10BE-NEXT:    vadduhm v2, v2, v3
; PWR10BE-NEXT:    xxspltw v3, v2, 1
; PWR10BE-NEXT:    vadduhm v2, v2, v3
; PWR10BE-NEXT:    vsplth v3, v2, 1
; PWR10BE-NEXT:    vadduhm v2, v2, v3
; PWR10BE-NEXT:    vextuhlx r3, r3, v2
; PWR10BE-NEXT:    blr
entry:
  %0 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a)
  ret i16 %0
}

define dso_local zeroext i16 @v16i16(<16 x i16> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v16i16:
; PWR9LE:       # %bb.0: # %entry
; PWR9LE-NEXT:    vadduhm v2, v2, v3
; PWR9LE-NEXT:    li r3, 0
; PWR9LE-NEXT:    xxswapd v3, v2
; PWR9LE-NEXT:    vadduhm v2, v2, v3
; PWR9LE-NEXT:    xxspltw v3, v2, 2
; PWR9LE-NEXT:    vadduhm v2, v2, v3
; PWR9LE-NEXT:    vsplth v3, v2, 6
; PWR9LE-NEXT:    vadduhm v2, v2, v3
; PWR9LE-NEXT:    vextuhrx r3, r3, v2
; PWR9LE-NEXT:    clrldi r3, r3, 48
; PWR9LE-NEXT:    blr
;
; PWR9BE-LABEL: v16i16:
; PWR9BE:       # %bb.0: # %entry
; PWR9BE-NEXT:    vadduhm v2, v2, v3
; PWR9BE-NEXT:    li r3, 0
; PWR9BE-NEXT:    xxswapd v3, v2
; PWR9BE-NEXT:    vadduhm v2, v2, v3
; PWR9BE-NEXT:    xxspltw v3, v2, 1
; PWR9BE-NEXT:    vadduhm v2, v2, v3
; PWR9BE-NEXT:    vsplth v3, v2, 1
; PWR9BE-NEXT:    vadduhm v2, v2, v3
; PWR9BE-NEXT:    vextuhlx r3, r3, v2
; PWR9BE-NEXT:    clrldi r3, r3, 48
; PWR9BE-NEXT:    blr
;
; PWR10LE-LABEL: v16i16:
; PWR10LE:       # %bb.0: # %entry
; PWR10LE-NEXT:    vadduhm v2, v2, v3
; PWR10LE-NEXT:    li r3, 0
; PWR10LE-NEXT:    xxswapd v3, v2
; PWR10LE-NEXT:    vadduhm v2, v2, v3
; PWR10LE-NEXT:    xxspltw v3, v2, 2
; PWR10LE-NEXT:    vadduhm v2, v2, v3
; PWR10LE-NEXT:    vsplth v3, v2, 6
; PWR10LE-NEXT:    vadduhm v2, v2, v3
; PWR10LE-NEXT:    vextuhrx r3, r3, v2
; PWR10LE-NEXT:    clrldi r3, r3, 48
; PWR10LE-NEXT:    blr
;
; PWR10BE-LABEL: v16i16:
; PWR10BE:       # %bb.0: # %entry
; PWR10BE-NEXT:    vadduhm v2, v2, v3
; PWR10BE-NEXT:    li r3, 0
; PWR10BE-NEXT:    xxswapd v3, v2
; PWR10BE-NEXT:    vadduhm v2, v2, v3
; PWR10BE-NEXT:    xxspltw v3, v2, 1
; PWR10BE-NEXT:    vadduhm v2, v2, v3
; PWR10BE-NEXT:    vsplth v3, v2, 1
; PWR10BE-NEXT:    vadduhm v2, v2, v3
; PWR10BE-NEXT:    vextuhlx r3, r3, v2
; PWR10BE-NEXT:    clrldi r3, r3, 48
; PWR10BE-NEXT:    blr
entry:
  %0 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a)
  ret i16 %0
}

define dso_local signext i16 @v16i8tov16i16_sign(<16 x i8> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v16i8tov16i16_sign:
; PWR9LE:       # %bb.0: # %entry
; PWR9LE-NEXT:    vmrghb v3, v2, v2
; PWR9LE-NEXT:    vspltish v4, 8
; PWR9LE-NEXT:    li r3, 0
; PWR9LE-NEXT:    vmrglb v2, v2, v2
; PWR9LE-NEXT:    vslh v3, v3, v4
; PWR9LE-NEXT:    vslh v2, v2, v4
; PWR9LE-NEXT:    vsrah v3, v3, v4
; PWR9LE-NEXT:    vsrah v2, v2, v4
; PWR9LE-NEXT:    vadduhm v2, v2, v3
; PWR9LE-NEXT:    xxswapd v3, v2
; PWR9LE-NEXT:    vadduhm v2, v2, v3
; PWR9LE-NEXT:    xxspltw v3, v2, 2
; PWR9LE-NEXT:    vadduhm v2, v2, v3
; PWR9LE-NEXT:    vsplth v3, v2, 6
; PWR9LE-NEXT:    vadduhm v2, v2, v3
; PWR9LE-NEXT:    vextuhrx r3, r3, v2
; PWR9LE-NEXT:    extsh r3, r3
; PWR9LE-NEXT:    blr
;
; PWR9BE-LABEL: v16i8tov16i16_sign:
; PWR9BE:       # %bb.0: # %entry
; PWR9BE-NEXT:    vmrglb v3, v2, v2
; PWR9BE-NEXT:    vspltish v4, 8
; PWR9BE-NEXT:    li r3, 0
; PWR9BE-NEXT:    vmrghb v2, v2, v2
; PWR9BE-NEXT:    vslh v3, v3, v4
; PWR9BE-NEXT:    vslh v2, v2, v4
; PWR9BE-NEXT:    vsrah v3, v3, v4
; PWR9BE-NEXT:    vsrah v2, v2, v4
; PWR9BE-NEXT:    vadduhm v2, v2, v3
; PWR9BE-NEXT:    xxswapd v3, v2
; PWR9BE-NEXT:    vadduhm v2, v2, v3
; PWR9BE-NEXT:    xxspltw v3, v2, 1
; PWR9BE-NEXT:    vadduhm v2, v2, v3
; PWR9BE-NEXT:    vsplth v3, v2, 1
; PWR9BE-NEXT:    vadduhm v2, v2, v3
; PWR9BE-NEXT:    vextuhlx r3, r3, v2
; PWR9BE-NEXT:    extsh r3, r3
; PWR9BE-NEXT:    blr
;
; PWR10LE-LABEL: v16i8tov16i16_sign:
; PWR10LE:       # %bb.0: # %entry
; PWR10LE-NEXT:    vmrghb v3, v2, v2
; PWR10LE-NEXT:    xxspltiw v4, 524296
; PWR10LE-NEXT:    vmrglb v2, v2, v2
; PWR10LE-NEXT:    li r3, 0
; PWR10LE-NEXT:    vslh v3, v3, v4
; PWR10LE-NEXT:    vslh v2, v2, v4
; PWR10LE-NEXT:    vsrah v3, v3, v4
; PWR10LE-NEXT:    vsrah v2, v2, v4
; PWR10LE-NEXT:    vadduhm v2, v2, v3
; PWR10LE-NEXT:    xxswapd v3, v2
; PWR10LE-NEXT:    vadduhm v2, v2, v3
; PWR10LE-NEXT:    xxspltw v3, v2, 2
; PWR10LE-NEXT:    vadduhm v2, v2, v3
; PWR10LE-NEXT:    vsplth v3, v2, 6
; PWR10LE-NEXT:    vadduhm v2, v2, v3
; PWR10LE-NEXT:    vextuhrx r3, r3, v2
; PWR10LE-NEXT:    extsh r3, r3
; PWR10LE-NEXT:    blr
;
; PWR10BE-LABEL: v16i8tov16i16_sign:
; PWR10BE:       # %bb.0: # %entry
; PWR10BE-NEXT:    vmrglb v3, v2, v2
; PWR10BE-NEXT:    xxspltiw v4, 524296
; PWR10BE-NEXT:    vmrghb v2, v2, v2
; PWR10BE-NEXT:    li r3, 0
; PWR10BE-NEXT:    vslh v3, v3, v4
; PWR10BE-NEXT:    vslh v2, v2, v4
; PWR10BE-NEXT:    vsrah v3, v3, v4
; PWR10BE-NEXT:    vsrah v2, v2, v4
; PWR10BE-NEXT:    vadduhm v2, v2, v3
; PWR10BE-NEXT:    xxswapd v3, v2
; PWR10BE-NEXT:    vadduhm v2, v2, v3
; PWR10BE-NEXT:    xxspltw v3, v2, 1
; PWR10BE-NEXT:    vadduhm v2, v2, v3
; PWR10BE-NEXT:    vsplth v3, v2, 1
; PWR10BE-NEXT:    vadduhm v2, v2, v3
; PWR10BE-NEXT:    vextuhlx r3, r3, v2
; PWR10BE-NEXT:    extsh r3, r3
; PWR10BE-NEXT:    blr
entry:
  %0 = sext <16 x i8> %a to <16 x i16>
  %1 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %0)
  ret i16 %1
}

define dso_local zeroext i16 @v16i8tov16i16_zero(<16 x i8> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v16i8tov16i16_zero:
; PWR9LE:       # %bb.0: # %entry
; PWR9LE-NEXT:    xxlxor v3, v3, v3
; PWR9LE-NEXT:    li r3, 0
; PWR9LE-NEXT:    vmrghb v4, v3, v2
; PWR9LE-NEXT:    vmrglb v2, v3, v2
; PWR9LE-NEXT:    vadduhm v2, v2, v4
; PWR9LE-NEXT:    xxswapd v3, v2
; PWR9LE-NEXT:    vadduhm v2, v2, v3
; PWR9LE-NEXT:    xxspltw v3, v2, 2
; PWR9LE-NEXT:    vadduhm v2, v2, v3
; PWR9LE-NEXT:    vsplth v3, v2, 6
; PWR9LE-NEXT:    vadduhm v2, v2, v3
; PWR9LE-NEXT:    vextuhrx r3, r3, v2
; PWR9LE-NEXT:    clrldi r3, r3, 48
; PWR9LE-NEXT:    blr
;
; PWR9BE-LABEL: v16i8tov16i16_zero:
; PWR9BE:       # %bb.0: # %entry
; PWR9BE-NEXT:    xxlxor v3, v3, v3
; PWR9BE-NEXT:    li r3, 0
; PWR9BE-NEXT:    vmrglb v4, v3, v2
; PWR9BE-NEXT:    vmrghb v2, v3, v2
; PWR9BE-NEXT:    vadduhm v2, v2, v4
; PWR9BE-NEXT:    xxswapd v3, v2
; PWR9BE-NEXT:    vadduhm v2, v2, v3
; PWR9BE-NEXT:    xxspltw v3, v2, 1
; PWR9BE-NEXT:    vadduhm v2, v2, v3
; PWR9BE-NEXT:    vsplth v3, v2, 1
; PWR9BE-NEXT:    vadduhm v2, v2, v3
; PWR9BE-NEXT:    vextuhlx r3, r3, v2
; PWR9BE-NEXT:    clrldi r3, r3, 48
; PWR9BE-NEXT:    blr
;
; PWR10LE-LABEL: v16i8tov16i16_zero:
; PWR10LE:       # %bb.0: # %entry
; PWR10LE-NEXT:    xxlxor v3, v3, v3
; PWR10LE-NEXT:    li r3, 0
; PWR10LE-NEXT:    vmrghb v4, v3, v2
; PWR10LE-NEXT:    vmrglb v2, v3, v2
; PWR10LE-NEXT:    vadduhm v2, v2, v4
; PWR10LE-NEXT:    xxswapd v3, v2
; PWR10LE-NEXT:    vadduhm v2, v2, v3
; PWR10LE-NEXT:    xxspltw v3, v2, 2
; PWR10LE-NEXT:    vadduhm v2, v2, v3
; PWR10LE-NEXT:    vsplth v3, v2, 6
; PWR10LE-NEXT:    vadduhm v2, v2, v3
; PWR10LE-NEXT:    vextuhrx r3, r3, v2
; PWR10LE-NEXT:    clrldi r3, r3, 48
; PWR10LE-NEXT:    blr
;
; PWR10BE-LABEL: v16i8tov16i16_zero:
; PWR10BE:       # %bb.0: # %entry
; PWR10BE-NEXT:    xxlxor v3, v3, v3
; PWR10BE-NEXT:    li r3, 0
; PWR10BE-NEXT:    vmrglb v4, v3, v2
; PWR10BE-NEXT:    vmrghb v2, v3, v2
; PWR10BE-NEXT:    vadduhm v2, v2, v4
; PWR10BE-NEXT:    xxswapd v3, v2
; PWR10BE-NEXT:    vadduhm v2, v2, v3
; PWR10BE-NEXT:    xxspltw v3, v2, 1
; PWR10BE-NEXT:    vadduhm v2, v2, v3
; PWR10BE-NEXT:    vsplth v3, v2, 1
; PWR10BE-NEXT:    vadduhm v2, v2, v3
; PWR10BE-NEXT:    vextuhlx r3, r3, v2
; PWR10BE-NEXT:    clrldi r3, r3, 48
; PWR10BE-NEXT:    blr
entry:
  %0 = zext <16 x i8> %a to <16 x i16>
  %1 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %0)
  ret i16 %1
}

declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>) #0
declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>) #0
declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) #0
declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) #0

;;
;; Vectors of i32
;;
define dso_local zeroext i32 @v2i32(<2 x i32> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v2i32:
; PWR9LE:       # %bb.0: # %entry
; PWR9LE-NEXT:    xxspltw v3, v2, 2
; PWR9LE-NEXT:    li r3, 0
; PWR9LE-NEXT:    vadduwm v2, v2, v3
; PWR9LE-NEXT:    vextuwrx r3, r3, v2
; PWR9LE-NEXT:    blr
;
; PWR9BE-LABEL: v2i32:
; PWR9BE:       # %bb.0: # %entry
; PWR9BE-NEXT:    xxspltw v3, v2, 1
; PWR9BE-NEXT:    li r3, 0
; PWR9BE-NEXT:    vadduwm v2, v2, v3
; PWR9BE-NEXT:    vextuwlx r3, r3, v2
; PWR9BE-NEXT:    blr
;
; PWR10LE-LABEL: v2i32:
; PWR10LE:       # %bb.0: # %entry
; PWR10LE-NEXT:    xxspltw v3, v2, 2
; PWR10LE-NEXT:    li r3, 0
; PWR10LE-NEXT:    vadduwm v2, v2, v3
; PWR10LE-NEXT:    vextuwrx r3, r3, v2
; PWR10LE-NEXT:    blr
;
; PWR10BE-LABEL: v2i32:
; PWR10BE:       # %bb.0: # %entry
; PWR10BE-NEXT:    xxspltw v3, v2, 1
; PWR10BE-NEXT:    li r3, 0
; PWR10BE-NEXT:    vadduwm v2, v2, v3
; PWR10BE-NEXT:    vextuwlx r3, r3, v2
; PWR10BE-NEXT:    blr
entry:
  %0 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a)
  ret i32 %0
}

define dso_local zeroext i32 @v4i32(<4 x i32> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v4i32:
; PWR9LE:       # %bb.0: # %entry
; PWR9LE-NEXT:    xxswapd v3, v2
; PWR9LE-NEXT:    li r3, 0
; PWR9LE-NEXT:    vadduwm v2, v2, v3
; PWR9LE-NEXT:    xxspltw v3, v2, 2
; PWR9LE-NEXT:    vadduwm v2, v2, v3
; PWR9LE-NEXT:    vextuwrx r3, r3, v2
; PWR9LE-NEXT:    blr
;
; PWR9BE-LABEL: v4i32:
; PWR9BE:       # %bb.0: # %entry
; PWR9BE-NEXT:    xxswapd v3, v2
; PWR9BE-NEXT:    li r3, 0
; PWR9BE-NEXT:    vadduwm v2, v2, v3
; PWR9BE-NEXT:    xxspltw v3, v2, 1
; PWR9BE-NEXT:    vadduwm v2, v2, v3
; PWR9BE-NEXT:    vextuwlx r3, r3, v2
; PWR9BE-NEXT:    blr
;
; PWR10LE-LABEL: v4i32:
; PWR10LE:       # %bb.0: # %entry
; PWR10LE-NEXT:    xxswapd v3, v2
; PWR10LE-NEXT:    li r3, 0
; PWR10LE-NEXT:    vadduwm v2, v2, v3
; PWR10LE-NEXT:    xxspltw v3, v2, 2
; PWR10LE-NEXT:    vadduwm v2, v2, v3
; PWR10LE-NEXT:    vextuwrx r3, r3, v2
; PWR10LE-NEXT:    blr
;
; PWR10BE-LABEL: v4i32:
; PWR10BE:       # %bb.0: # %entry
; PWR10BE-NEXT:    xxswapd v3, v2
; PWR10BE-NEXT:    li r3, 0
; PWR10BE-NEXT:    vadduwm v2, v2, v3
; PWR10BE-NEXT:    xxspltw v3, v2, 1
; PWR10BE-NEXT:    vadduwm v2, v2, v3
; PWR10BE-NEXT:    vextuwlx r3, r3, v2
; PWR10BE-NEXT:    blr
entry:
  %0 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
  ret i32 %0
}

define dso_local zeroext i32 @v8i32(<8 x i32> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v8i32:
; PWR9LE:       # %bb.0: # %entry
; PWR9LE-NEXT:    vadduwm v2, v2, v3
; PWR9LE-NEXT:    li r3, 0
; PWR9LE-NEXT:    xxswapd v3, v2
; PWR9LE-NEXT:    vadduwm v2, v2, v3
; PWR9LE-NEXT:    xxspltw v3, v2, 2
; PWR9LE-NEXT:    vadduwm v2, v2, v3
; PWR9LE-NEXT:    vextuwrx r3, r3, v2
; PWR9LE-NEXT:    blr
;
; PWR9BE-LABEL: v8i32:
; PWR9BE:       # %bb.0: # %entry
; PWR9BE-NEXT:    vadduwm v2, v2, v3
; PWR9BE-NEXT:    li r3, 0
; PWR9BE-NEXT:    xxswapd v3, v2
; PWR9BE-NEXT:    vadduwm v2, v2, v3
; PWR9BE-NEXT:    xxspltw v3, v2, 1
; PWR9BE-NEXT:    vadduwm v2, v2, v3
; PWR9BE-NEXT:    vextuwlx r3, r3, v2
; PWR9BE-NEXT:    blr
;
; PWR10LE-LABEL: v8i32:
; PWR10LE:       # %bb.0: # %entry
; PWR10LE-NEXT:    vadduwm v2, v2, v3
; PWR10LE-NEXT:    li r3, 0
; PWR10LE-NEXT:    xxswapd v3, v2
; PWR10LE-NEXT:    vadduwm v2, v2, v3
; PWR10LE-NEXT:    xxspltw v3, v2, 2
; PWR10LE-NEXT:    vadduwm v2, v2, v3
; PWR10LE-NEXT:    vextuwrx r3, r3, v2
; PWR10LE-NEXT:    blr
;
; PWR10BE-LABEL: v8i32:
; PWR10BE:       # %bb.0: # %entry
; PWR10BE-NEXT:    vadduwm v2, v2, v3
; PWR10BE-NEXT:    li r3, 0
; PWR10BE-NEXT:    xxswapd v3, v2
; PWR10BE-NEXT:    vadduwm v2, v2, v3
; PWR10BE-NEXT:    xxspltw v3, v2, 1
; PWR10BE-NEXT:    vadduwm v2, v2, v3
; PWR10BE-NEXT:    vextuwlx r3, r3, v2
; PWR10BE-NEXT:    blr
entry:
  %0 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a)
  ret i32 %0
}

define dso_local zeroext i32 @v16i32(<16 x i32> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v16i32:
; PWR9LE:       # %bb.0: # %entry
; PWR9LE-NEXT:    vadduwm v3, v3, v5
; PWR9LE-NEXT:    vadduwm v2, v2, v4
; PWR9LE-NEXT:    li r3, 0
; PWR9LE-NEXT:    vadduwm v2, v2, v3
; PWR9LE-NEXT:    xxswapd v3, v2
; PWR9LE-NEXT:    vadduwm v2, v2, v3
; PWR9LE-NEXT:    xxspltw v3, v2, 2
; PWR9LE-NEXT:    vadduwm v2, v2, v3
; PWR9LE-NEXT:    vextuwrx r3, r3, v2
; PWR9LE-NEXT:    blr
;
; PWR9BE-LABEL: v16i32:
; PWR9BE:       # %bb.0: # %entry
; PWR9BE-NEXT:    vadduwm v3, v3, v5
; PWR9BE-NEXT:    vadduwm v2, v2, v4
; PWR9BE-NEXT:    li r3, 0
; PWR9BE-NEXT:    vadduwm v2, v2, v3
; PWR9BE-NEXT:    xxswapd v3, v2
; PWR9BE-NEXT:    vadduwm v2, v2, v3
; PWR9BE-NEXT:    xxspltw v3, v2, 1
; PWR9BE-NEXT:    vadduwm v2, v2, v3
; PWR9BE-NEXT:    vextuwlx r3, r3, v2
; PWR9BE-NEXT:    blr
;
; PWR10LE-LABEL: v16i32:
; PWR10LE:       # %bb.0: # %entry
; PWR10LE-NEXT:    vadduwm v3, v3, v5
; PWR10LE-NEXT:    vadduwm v2, v2, v4
; PWR10LE-NEXT:    li r3, 0
; PWR10LE-NEXT:    vadduwm v2, v2, v3
; PWR10LE-NEXT:    xxswapd v3, v2
; PWR10LE-NEXT:    vadduwm v2, v2, v3
; PWR10LE-NEXT:    xxspltw v3, v2, 2
; PWR10LE-NEXT:    vadduwm v2, v2, v3
; PWR10LE-NEXT:    vextuwrx r3, r3, v2
; PWR10LE-NEXT:    blr
;
; PWR10BE-LABEL: v16i32:
; PWR10BE:       # %bb.0: # %entry
; PWR10BE-NEXT:    vadduwm v3, v3, v5
; PWR10BE-NEXT:    vadduwm v2, v2, v4
; PWR10BE-NEXT:    li r3, 0
; PWR10BE-NEXT:    vadduwm v2, v2, v3
; PWR10BE-NEXT:    xxswapd v3, v2
; PWR10BE-NEXT:    vadduwm v2, v2, v3
; PWR10BE-NEXT:    xxspltw v3, v2, 1
; PWR10BE-NEXT:    vadduwm v2, v2, v3
; PWR10BE-NEXT:    vextuwlx r3, r3, v2
; PWR10BE-NEXT:    blr
entry:
  %0 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a)
  ret i32 %0
}

define dso_local zeroext i32 @v32i32(<32 x i32> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v32i32:
; PWR9LE:       # %bb.0: # %entry
; PWR9LE-NEXT:    vadduwm v4, v4, v8
; PWR9LE-NEXT:    vadduwm v2, v2, v6
; PWR9LE-NEXT:    li r3, 0
; PWR9LE-NEXT:    vadduwm v5, v5, v9
; PWR9LE-NEXT:    vadduwm v3, v3, v7
; PWR9LE-NEXT:    vadduwm v3, v3, v5
; PWR9LE-NEXT:    vadduwm v2, v2, v4
; PWR9LE-NEXT:    vadduwm v2, v2, v3
; PWR9LE-NEXT:    xxswapd v3, v2
; PWR9LE-NEXT:    vadduwm v2, v2, v3
; PWR9LE-NEXT:    xxspltw v3, v2, 2
; PWR9LE-NEXT:    vadduwm v2, v2, v3
; PWR9LE-NEXT:    vextuwrx r3, r3, v2
; PWR9LE-NEXT:    blr
;
; PWR9BE-LABEL: v32i32:
; PWR9BE:       # %bb.0: # %entry
; PWR9BE-NEXT:    vadduwm v4, v4, v8
; PWR9BE-NEXT:    vadduwm v2, v2, v6
; PWR9BE-NEXT:    li r3, 0
; PWR9BE-NEXT:    vadduwm v5, v5, v9
; PWR9BE-NEXT:    vadduwm v3, v3, v7
; PWR9BE-NEXT:    vadduwm v3, v3, v5
; PWR9BE-NEXT:    vadduwm v2, v2, v4
; PWR9BE-NEXT:    vadduwm v2, v2, v3
; PWR9BE-NEXT:    xxswapd v3, v2
; PWR9BE-NEXT:    vadduwm v2, v2, v3
; PWR9BE-NEXT:    xxspltw v3, v2, 1
; PWR9BE-NEXT:    vadduwm v2, v2, v3
; PWR9BE-NEXT:    vextuwlx r3, r3, v2
; PWR9BE-NEXT:    blr
;
; PWR10LE-LABEL: v32i32:
; PWR10LE:       # %bb.0: # %entry
; PWR10LE-NEXT:    vadduwm v4, v4, v8
; PWR10LE-NEXT:    vadduwm v2, v2, v6
; PWR10LE-NEXT:    vadduwm v5, v5, v9
; PWR10LE-NEXT:    vadduwm v3, v3, v7
; PWR10LE-NEXT:    li r3, 0
; PWR10LE-NEXT:    vadduwm v3, v3, v5
; PWR10LE-NEXT:    vadduwm v2, v2, v4
; PWR10LE-NEXT:    vadduwm v2, v2, v3
; PWR10LE-NEXT:    xxswapd v3, v2
; PWR10LE-NEXT:    vadduwm v2, v2, v3
; PWR10LE-NEXT:    xxspltw v3, v2, 2
; PWR10LE-NEXT:    vadduwm v2, v2, v3
; PWR10LE-NEXT:    vextuwrx r3, r3, v2
; PWR10LE-NEXT:    blr
;
; PWR10BE-LABEL: v32i32:
; PWR10BE:       # %bb.0: # %entry
; PWR10BE-NEXT:    vadduwm v4, v4, v8
; PWR10BE-NEXT:    vadduwm v2, v2, v6
; PWR10BE-NEXT:    vadduwm v5, v5, v9
; PWR10BE-NEXT:    vadduwm v3, v3, v7
; PWR10BE-NEXT:    li r3, 0
; PWR10BE-NEXT:    vadduwm v3, v3, v5
; PWR10BE-NEXT:    vadduwm v2, v2, v4
; PWR10BE-NEXT:    vadduwm v2, v2, v3
; PWR10BE-NEXT:    xxswapd v3, v2
; PWR10BE-NEXT:    vadduwm v2, v2, v3
; PWR10BE-NEXT:    xxspltw v3, v2, 1
; PWR10BE-NEXT:    vadduwm v2, v2, v3
; PWR10BE-NEXT:    vextuwlx r3, r3, v2
; PWR10BE-NEXT:    blr
entry:
  %0 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %a)
  ret i32 %0
}

define dso_local signext i32 @v16i8tov16i32_sign(<16 x i8> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v16i8tov16i32_sign:
; PWR9LE:       # %bb.0: # %entry
; PWR9LE-NEXT:    addis r3, r2, .LCPI17_0@toc@ha
; PWR9LE-NEXT:    addi r3, r3, .LCPI17_0@toc@l
; PWR9LE-NEXT:    lxv v3, 0(r3)
; PWR9LE-NEXT:    addis r3, r2, .LCPI17_1@toc@ha
; PWR9LE-NEXT:    addi r3, r3, .LCPI17_1@toc@l
; PWR9LE-NEXT:    lxv v4, 0(r3)
; PWR9LE-NEXT:    addis r3, r2, .LCPI17_2@toc@ha
; PWR9LE-NEXT:    vperm v3, v2, v2, v3
; PWR9LE-NEXT:    addi r3, r3, .LCPI17_2@toc@l
; PWR9LE-NEXT:    lxv v5, 0(r3)
; PWR9LE-NEXT:    addis r3, r2, .LCPI17_3@toc@ha
; PWR9LE-NEXT:    vextsb2w v3, v3
; PWR9LE-NEXT:    vperm v4, v2, v2, v4
; PWR9LE-NEXT:    addi r3, r3, .LCPI17_3@toc@l
; PWR9LE-NEXT:    lxv v0, 0(r3)
; PWR9LE-NEXT:    vextsb2w v4, v4
; PWR9LE-NEXT:    li r3, 0
; PWR9LE-NEXT:    vperm v5, v2, v2, v5
; PWR9LE-NEXT:    vadduwm v3, v4, v3
; PWR9LE-NEXT:    vextsb2w v5, v5
; PWR9LE-NEXT:    vperm v2, v2, v2, v0
; PWR9LE-NEXT:    vextsb2w v2, v2
; PWR9LE-NEXT:    vadduwm v2, v2, v5
; PWR9LE-NEXT:    vadduwm v2, v3, v2
; PWR9LE-NEXT:    xxswapd v3, v2
; PWR9LE-NEXT:    vadduwm v2, v2, v3
; PWR9LE-NEXT:    xxspltw v3, v2, 2
; PWR9LE-NEXT:    vadduwm v2, v2, v3
; PWR9LE-NEXT:    vextuwrx r3, r3, v2
; PWR9LE-NEXT:    extsw r3, r3
; PWR9LE-NEXT:    blr
;
; PWR9BE-LABEL: v16i8tov16i32_sign:
; PWR9BE:       # %bb.0: # %entry
; PWR9BE-NEXT:    addis r3, r2, .LCPI17_0@toc@ha
; PWR9BE-NEXT:    addi r3, r3, .LCPI17_0@toc@l
; PWR9BE-NEXT:    lxv v3, 0(r3)
; PWR9BE-NEXT:    addis r3, r2, .LCPI17_1@toc@ha
; PWR9BE-NEXT:    addi r3, r3, .LCPI17_1@toc@l
; PWR9BE-NEXT:    lxv v4, 0(r3)
; PWR9BE-NEXT:    addis r3, r2, .LCPI17_2@toc@ha
; PWR9BE-NEXT:    vperm v3, v2, v2, v3
; PWR9BE-NEXT:    addi r3, r3, .LCPI17_2@toc@l
; PWR9BE-NEXT:    lxv v5, 0(r3)
; PWR9BE-NEXT:    addis r3, r2, .LCPI17_3@toc@ha
; PWR9BE-NEXT:    vextsb2w v3, v3
; PWR9BE-NEXT:    vperm v4, v2, v2, v4
; PWR9BE-NEXT:    addi r3, r3, .LCPI17_3@toc@l
; PWR9BE-NEXT:    lxv v0, 0(r3)
; PWR9BE-NEXT:    vextsb2w v4, v4
; PWR9BE-NEXT:    li r3, 0
; PWR9BE-NEXT:    vperm v5, v2, v2, v5
; PWR9BE-NEXT:    vadduwm v3, v4, v3
; PWR9BE-NEXT:    vextsb2w v5, v5
; PWR9BE-NEXT:    vperm v2, v2, v2, v0
; PWR9BE-NEXT:    vextsb2w v2, v2
; PWR9BE-NEXT:    vadduwm v2, v2, v5
; PWR9BE-NEXT:    vadduwm v2, v3, v2
; PWR9BE-NEXT:    xxswapd v3, v2
; PWR9BE-NEXT:    vadduwm v2, v2, v3
; PWR9BE-NEXT:    xxspltw v3, v2, 1
; PWR9BE-NEXT:    vadduwm v2, v2, v3
; PWR9BE-NEXT:    vextuwlx r3, r3, v2
; PWR9BE-NEXT:    extsw r3, r3
; PWR9BE-NEXT:    blr
;
; PWR10LE-LABEL: v16i8tov16i32_sign:
; PWR10LE:       # %bb.0: # %entry
; PWR10LE-NEXT:    plxv v3, .LCPI17_0@PCREL(0), 1
; PWR10LE-NEXT:    plxv v4, .LCPI17_1@PCREL(0), 1
; PWR10LE-NEXT:    li r3, 0
; PWR10LE-NEXT:    vperm v3, v2, v2, v3
; PWR10LE-NEXT:    plxv v5, .LCPI17_2@PCREL(0), 1
; PWR10LE-NEXT:    plxv v0, .LCPI17_3@PCREL(0), 1
; PWR10LE-NEXT:    vperm v4, v2, v2, v4
; PWR10LE-NEXT:    vperm v5, v2, v2, v5
; PWR10LE-NEXT:    vperm v2, v2, v2, v0
; PWR10LE-NEXT:    vextsb2w v3, v3
; PWR10LE-NEXT:    vextsb2w v4, v4
; PWR10LE-NEXT:    vextsb2w v5, v5
; PWR10LE-NEXT:    vextsb2w v2, v2
; PWR10LE-NEXT:    vadduwm v2, v2, v5
; PWR10LE-NEXT:    vadduwm v3, v4, v3
; PWR10LE-NEXT:    vadduwm v2, v3, v2
; PWR10LE-NEXT:    xxswapd v3, v2
; PWR10LE-NEXT:    vadduwm v2, v2, v3
; PWR10LE-NEXT:    xxspltw v3, v2, 2
; PWR10LE-NEXT:    vadduwm v2, v2, v3
; PWR10LE-NEXT:    vextuwrx r3, r3, v2
; PWR10LE-NEXT:    extsw r3, r3
; PWR10LE-NEXT:    blr
;
; PWR10BE-LABEL: v16i8tov16i32_sign:
; PWR10BE:       # %bb.0: # %entry
; PWR10BE-NEXT:    addis r3, r2, .LCPI17_0@toc@ha
; PWR10BE-NEXT:    addi r3, r3, .LCPI17_0@toc@l
; PWR10BE-NEXT:    lxv v3, 0(r3)
; PWR10BE-NEXT:    addis r3, r2, .LCPI17_1@toc@ha
; PWR10BE-NEXT:    addi r3, r3, .LCPI17_1@toc@l
; PWR10BE-NEXT:    lxv v4, 0(r3)
; PWR10BE-NEXT:    addis r3, r2, .LCPI17_2@toc@ha
; PWR10BE-NEXT:    vperm v3, v2, v2, v3
; PWR10BE-NEXT:    addi r3, r3, .LCPI17_2@toc@l
; PWR10BE-NEXT:    vextsb2w v3, v3
; PWR10BE-NEXT:    lxv v5, 0(r3)
; PWR10BE-NEXT:    addis r3, r2, .LCPI17_3@toc@ha
; PWR10BE-NEXT:    vperm v4, v2, v2, v4
; PWR10BE-NEXT:    addi r3, r3, .LCPI17_3@toc@l
; PWR10BE-NEXT:    vextsb2w v4, v4
; PWR10BE-NEXT:    lxv v0, 0(r3)
; PWR10BE-NEXT:    li r3, 0
; PWR10BE-NEXT:    vperm v5, v2, v2, v5
; PWR10BE-NEXT:    vadduwm v3, v4, v3
; PWR10BE-NEXT:    vextsb2w v5, v5
; PWR10BE-NEXT:    vperm v2, v2, v2, v0
; PWR10BE-NEXT:    vextsb2w v2, v2
; PWR10BE-NEXT:    vadduwm v2, v2, v5
; PWR10BE-NEXT:    vadduwm v2, v3, v2
; PWR10BE-NEXT:    xxswapd v3, v2
; PWR10BE-NEXT:    vadduwm v2, v2, v3
; PWR10BE-NEXT:    xxspltw v3, v2, 1
; PWR10BE-NEXT:    vadduwm v2, v2, v3
; PWR10BE-NEXT:    vextuwlx r3, r3, v2
; PWR10BE-NEXT:    extsw r3, r3
; PWR10BE-NEXT:    blr
entry:
  %0 = sext <16 x i8> %a to <16 x i32>
  %1 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %0)
  ret i32 %1
}

define dso_local zeroext i32 @v16i8tov16i32_zero(<16 x i8> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v16i8tov16i32_zero:
; PWR9LE:       # %bb.0: # %entry
; PWR9LE-NEXT:    addis r3, r2, .LCPI18_0@toc@ha
; PWR9LE-NEXT:    xxlxor v4, v4, v4
; PWR9LE-NEXT:    addi r3, r3, .LCPI18_0@toc@l
; PWR9LE-NEXT:    lxv v3, 0(r3)
; PWR9LE-NEXT:    addis r3, r2, .LCPI18_1@toc@ha
; PWR9LE-NEXT:    addi r3, r3, .LCPI18_1@toc@l
; PWR9LE-NEXT:    lxv v5, 0(r3)
; PWR9LE-NEXT:    addis r3, r2, .LCPI18_2@toc@ha
; PWR9LE-NEXT:    vperm v3, v4, v2, v3
; PWR9LE-NEXT:    addi r3, r3, .LCPI18_2@toc@l
; PWR9LE-NEXT:    lxv v0, 0(r3)
; PWR9LE-NEXT:    addis r3, r2, .LCPI18_3@toc@ha
; PWR9LE-NEXT:    vperm v5, v4, v2, v5
; PWR9LE-NEXT:    addi r3, r3, .LCPI18_3@toc@l
; PWR9LE-NEXT:    lxv v1, 0(r3)
; PWR9LE-NEXT:    vadduwm v3, v5, v3
; PWR9LE-NEXT:    li r3, 0
; PWR9LE-NEXT:    vperm v0, v4, v2, v0
; PWR9LE-NEXT:    vperm v2, v4, v2, v1
; PWR9LE-NEXT:    vadduwm v2, v2, v0
; PWR9LE-NEXT:    vadduwm v2, v3, v2
; PWR9LE-NEXT:    xxswapd v3, v2
; PWR9LE-NEXT:    vadduwm v2, v2, v3
; PWR9LE-NEXT:    xxspltw v3, v2, 2
; PWR9LE-NEXT:    vadduwm v2, v2, v3
; PWR9LE-NEXT:    vextuwrx r3, r3, v2
; PWR9LE-NEXT:    blr
;
; PWR9BE-LABEL: v16i8tov16i32_zero:
; PWR9BE:       # %bb.0: # %entry
; PWR9BE-NEXT:    addis r3, r2, .LCPI18_0@toc@ha
; PWR9BE-NEXT:    xxlxor v4, v4, v4
; PWR9BE-NEXT:    addi r3, r3, .LCPI18_0@toc@l
; PWR9BE-NEXT:    lxv v3, 0(r3)
; PWR9BE-NEXT:    addis r3, r2, .LCPI18_1@toc@ha
; PWR9BE-NEXT:    addi r3, r3, .LCPI18_1@toc@l
; PWR9BE-NEXT:    lxv v5, 0(r3)
; PWR9BE-NEXT:    addis r3, r2, .LCPI18_2@toc@ha
; PWR9BE-NEXT:    vperm v3, v4, v2, v3
; PWR9BE-NEXT:    addi r3, r3, .LCPI18_2@toc@l
; PWR9BE-NEXT:    lxv v0, 0(r3)
; PWR9BE-NEXT:    addis r3, r2, .LCPI18_3@toc@ha
; PWR9BE-NEXT:    vperm v5, v4, v2, v5
; PWR9BE-NEXT:    addi r3, r3, .LCPI18_3@toc@l
; PWR9BE-NEXT:    lxv v1, 0(r3)
; PWR9BE-NEXT:    vadduwm v3, v5, v3
; PWR9BE-NEXT:    li r3, 0
; PWR9BE-NEXT:    vperm v0, v4, v2, v0
; PWR9BE-NEXT:    vperm v2, v4, v2, v1
; PWR9BE-NEXT:    vadduwm v2, v2, v0
; PWR9BE-NEXT:    vadduwm v2, v3, v2
; PWR9BE-NEXT:    xxswapd v3, v2
; PWR9BE-NEXT:    vadduwm v2, v2, v3
; PWR9BE-NEXT:    xxspltw v3, v2, 1
; PWR9BE-NEXT:    vadduwm v2, v2, v3
; PWR9BE-NEXT:    vextuwlx r3, r3, v2
; PWR9BE-NEXT:    blr
;
; PWR10LE-LABEL: v16i8tov16i32_zero:
; PWR10LE:       # %bb.0: # %entry
; PWR10LE-NEXT:    plxv v3, .LCPI18_0@PCREL(0), 1
; PWR10LE-NEXT:    plxv v5, .LCPI18_1@PCREL(0), 1
; PWR10LE-NEXT:    xxlxor v4, v4, v4
; PWR10LE-NEXT:    li r3, 0
; PWR10LE-NEXT:    vperm v3, v4, v2, v3
; PWR10LE-NEXT:    plxv v0, .LCPI18_2@PCREL(0), 1
; PWR10LE-NEXT:    plxv v1, .LCPI18_3@PCREL(0), 1
; PWR10LE-NEXT:    vperm v5, v4, v2, v5
; PWR10LE-NEXT:    vperm v0, v4, v2, v0
; PWR10LE-NEXT:    vperm v2, v4, v2, v1
; PWR10LE-NEXT:    vadduwm v2, v2, v0
; PWR10LE-NEXT:    vadduwm v3, v5, v3
; PWR10LE-NEXT:    vadduwm v2, v3, v2
; PWR10LE-NEXT:    xxswapd v3, v2
; PWR10LE-NEXT:    vadduwm v2, v2, v3
; PWR10LE-NEXT:    xxspltw v3, v2, 2
; PWR10LE-NEXT:    vadduwm v2, v2, v3
; PWR10LE-NEXT:    vextuwrx r3, r3, v2
; PWR10LE-NEXT:    blr
;
; PWR10BE-LABEL: v16i8tov16i32_zero:
; PWR10BE:       # %bb.0: # %entry
; PWR10BE-NEXT:    addis r3, r2, .LCPI18_0@toc@ha
; PWR10BE-NEXT:    xxlxor v4, v4, v4
; PWR10BE-NEXT:    addi r3, r3, .LCPI18_0@toc@l
; PWR10BE-NEXT:    lxv v3, 0(r3)
; PWR10BE-NEXT:    addis r3, r2, .LCPI18_1@toc@ha
; PWR10BE-NEXT:    addi r3, r3, .LCPI18_1@toc@l
; PWR10BE-NEXT:    lxv v5, 0(r3)
; PWR10BE-NEXT:    addis r3, r2, .LCPI18_2@toc@ha
; PWR10BE-NEXT:    vperm v3, v4, v2, v3
; PWR10BE-NEXT:    addi r3, r3, .LCPI18_2@toc@l
; PWR10BE-NEXT:    lxv v0, 0(r3)
; PWR10BE-NEXT:    addis r3, r2, .LCPI18_3@toc@ha
; PWR10BE-NEXT:    vperm v5, v4, v2, v5
; PWR10BE-NEXT:    addi r3, r3, .LCPI18_3@toc@l
; PWR10BE-NEXT:    vadduwm v3, v5, v3
; PWR10BE-NEXT:    lxv v1, 0(r3)
; PWR10BE-NEXT:    li r3, 0
; PWR10BE-NEXT:    vperm v0, v4, v2, v0
; PWR10BE-NEXT:    vperm v2, v4, v2, v1
; PWR10BE-NEXT:    vadduwm v2, v2, v0
; PWR10BE-NEXT:    vadduwm v2, v3, v2
; PWR10BE-NEXT:    xxswapd v3, v2
; PWR10BE-NEXT:    vadduwm v2, v2, v3
; PWR10BE-NEXT:    xxspltw v3, v2, 1
; PWR10BE-NEXT:    vadduwm v2, v2, v3
; PWR10BE-NEXT:    vextuwlx r3, r3, v2
; PWR10BE-NEXT:    blr
entry:
  %0 = zext <16 x i8> %a to <16 x i32>
  %1 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %0)
  ret i32 %1
}

declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>) #0
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #0
declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) #0
declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) #0
declare i32 @llvm.vector.reduce.add.v32i32(<32 x i32>) #0

;;
;; Vectors of i64
;;
define dso_local i64 @v2i64(<2 x i64> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v2i64:
; PWR9LE:       # %bb.0: # %entry
; PWR9LE-NEXT:    xxswapd v3, v2
; PWR9LE-NEXT:    vaddudm v2, v2, v3
; PWR9LE-NEXT:    mfvsrld r3, v2
; PWR9LE-NEXT:    blr
;
; PWR9BE-LABEL: v2i64:
; PWR9BE:       # %bb.0: # %entry
; PWR9BE-NEXT:    xxswapd v3, v2
; PWR9BE-NEXT:    vaddudm v2, v2, v3
; PWR9BE-NEXT:    mfvsrd r3, v2
; PWR9BE-NEXT:    blr
;
; PWR10LE-LABEL: v2i64:
; PWR10LE:       # %bb.0: # %entry
; PWR10LE-NEXT:    xxswapd v3, v2
; PWR10LE-NEXT:    vaddudm v2, v2, v3
; PWR10LE-NEXT:    mfvsrld r3, v2
; PWR10LE-NEXT:    blr
;
; PWR10BE-LABEL: v2i64:
; PWR10BE:       # %bb.0: # %entry
; PWR10BE-NEXT:    xxswapd v3, v2
; PWR10BE-NEXT:    vaddudm v2, v2, v3
; PWR10BE-NEXT:    mfvsrd r3, v2
; PWR10BE-NEXT:    blr
entry:
  %0 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a)
  ret i64 %0
}

define dso_local i64 @v4i64(<4 x i64> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v4i64:
; PWR9LE:       # %bb.0: # %entry
; PWR9LE-NEXT:    vaddudm v2, v2, v3
; PWR9LE-NEXT:    xxswapd v3, v2
; PWR9LE-NEXT:    vaddudm v2, v2, v3
; PWR9LE-NEXT:    mfvsrld r3, v2
; PWR9LE-NEXT:    blr
;
; PWR9BE-LABEL: v4i64:
; PWR9BE:       # %bb.0: # %entry
; PWR9BE-NEXT:    vaddudm v2, v2, v3
; PWR9BE-NEXT:    xxswapd v3, v2
; PWR9BE-NEXT:    vaddudm v2, v2, v3
; PWR9BE-NEXT:    mfvsrd r3, v2
; PWR9BE-NEXT:    blr
;
; PWR10LE-LABEL: v4i64:
; PWR10LE:       # %bb.0: # %entry
; PWR10LE-NEXT:    vaddudm v2, v2, v3
; PWR10LE-NEXT:    xxswapd v3, v2
; PWR10LE-NEXT:    vaddudm v2, v2, v3
; PWR10LE-NEXT:    mfvsrld r3, v2
; PWR10LE-NEXT:    blr
;
; PWR10BE-LABEL: v4i64:
; PWR10BE:       # %bb.0: # %entry
; PWR10BE-NEXT:    vaddudm v2, v2, v3
; PWR10BE-NEXT:    xxswapd v3, v2
; PWR10BE-NEXT:    vaddudm v2, v2, v3
; PWR10BE-NEXT:    mfvsrd r3, v2
; PWR10BE-NEXT:    blr
entry:
  %0 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a)
  ret i64 %0
}

define dso_local i64 @v8i64(<8 x i64> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v8i64:
; PWR9LE:       # %bb.0: # %entry
; PWR9LE-NEXT:    vaddudm v3, v3, v5
; PWR9LE-NEXT:    vaddudm v2, v2, v4
; PWR9LE-NEXT:    vaddudm v2, v2, v3
; PWR9LE-NEXT:    xxswapd v3, v2
; PWR9LE-NEXT:    vaddudm v2, v2, v3
; PWR9LE-NEXT:    mfvsrld r3, v2
; PWR9LE-NEXT:    blr
;
; PWR9BE-LABEL: v8i64:
; PWR9BE:       # %bb.0: # %entry
; PWR9BE-NEXT:    vaddudm v3, v3, v5
; PWR9BE-NEXT:    vaddudm v2, v2, v4
; PWR9BE-NEXT:    vaddudm v2, v2, v3
; PWR9BE-NEXT:    xxswapd v3, v2
; PWR9BE-NEXT:    vaddudm v2, v2, v3
; PWR9BE-NEXT:    mfvsrd r3, v2
; PWR9BE-NEXT:    blr
;
; PWR10LE-LABEL: v8i64:
; PWR10LE:       # %bb.0: # %entry
; PWR10LE-NEXT:    vaddudm v3, v3, v5
; PWR10LE-NEXT:    vaddudm v2, v2, v4
; PWR10LE-NEXT:    vaddudm v2, v2, v3
; PWR10LE-NEXT:    xxswapd v3, v2
; PWR10LE-NEXT:    vaddudm v2, v2, v3
; PWR10LE-NEXT:    mfvsrld r3, v2
; PWR10LE-NEXT:    blr
;
; PWR10BE-LABEL: v8i64:
; PWR10BE:       # %bb.0: # %entry
; PWR10BE-NEXT:    vaddudm v3, v3, v5
; PWR10BE-NEXT:    vaddudm v2, v2, v4
; PWR10BE-NEXT:    vaddudm v2, v2, v3
; PWR10BE-NEXT:    xxswapd v3, v2
; PWR10BE-NEXT:    vaddudm v2, v2, v3
; PWR10BE-NEXT:    mfvsrd r3, v2
; PWR10BE-NEXT:    blr
entry:
  %0 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a)
  ret i64 %0
}

define dso_local i64 @v16i64(<16 x i64> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v16i64:
; PWR9LE:       # %bb.0: # %entry
; PWR9LE-NEXT:    vaddudm v4, v4, v8
; PWR9LE-NEXT:    vaddudm v2, v2, v6
; PWR9LE-NEXT:    vaddudm v5, v5, v9
; PWR9LE-NEXT:    vaddudm v3, v3, v7
; PWR9LE-NEXT:    vaddudm v3, v3, v5
; PWR9LE-NEXT:    vaddudm v2, v2, v4
; PWR9LE-NEXT:    vaddudm v2, v2, v3
; PWR9LE-NEXT:    xxswapd v3, v2
; PWR9LE-NEXT:    vaddudm v2, v2, v3
; PWR9LE-NEXT:    mfvsrld r3, v2
; PWR9LE-NEXT:    blr
;
; PWR9BE-LABEL: v16i64:
; PWR9BE:       # %bb.0: # %entry
; PWR9BE-NEXT:    vaddudm v4, v4, v8
; PWR9BE-NEXT:    vaddudm v2, v2, v6
; PWR9BE-NEXT:    vaddudm v5, v5, v9
; PWR9BE-NEXT:    vaddudm v3, v3, v7
; PWR9BE-NEXT:    vaddudm v3, v3, v5
; PWR9BE-NEXT:    vaddudm v2, v2, v4
; PWR9BE-NEXT:    vaddudm v2, v2, v3
; PWR9BE-NEXT:    xxswapd v3, v2
; PWR9BE-NEXT:    vaddudm v2, v2, v3
; PWR9BE-NEXT:    mfvsrd r3, v2
; PWR9BE-NEXT:    blr
;
; PWR10LE-LABEL: v16i64:
; PWR10LE:       # %bb.0: # %entry
; PWR10LE-NEXT:    vaddudm v4, v4, v8
; PWR10LE-NEXT:    vaddudm v5, v5, v9
; PWR10LE-NEXT:    vaddudm v3, v3, v7
; PWR10LE-NEXT:    vaddudm v3, v3, v5
; PWR10LE-NEXT:    vaddudm v2, v2, v6
; PWR10LE-NEXT:    vaddudm v2, v2, v4
; PWR10LE-NEXT:    vaddudm v2, v2, v3
; PWR10LE-NEXT:    xxswapd v3, v2
; PWR10LE-NEXT:    vaddudm v2, v2, v3
; PWR10LE-NEXT:    mfvsrld r3, v2
; PWR10LE-NEXT:    blr
;
; PWR10BE-LABEL: v16i64:
; PWR10BE:       # %bb.0: # %entry
; PWR10BE-NEXT:    vaddudm v4, v4, v8
; PWR10BE-NEXT:    vaddudm v5, v5, v9
; PWR10BE-NEXT:    vaddudm v3, v3, v7
; PWR10BE-NEXT:    vaddudm v3, v3, v5
; PWR10BE-NEXT:    vaddudm v2, v2, v6
; PWR10BE-NEXT:    vaddudm v2, v2, v4
; PWR10BE-NEXT:    vaddudm v2, v2, v3
; PWR10BE-NEXT:    xxswapd v3, v2
; PWR10BE-NEXT:    vaddudm v2, v2, v3
; PWR10BE-NEXT:    mfvsrd r3, v2
; PWR10BE-NEXT:    blr
entry:
  %0 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a)
  ret i64 %0
}

define dso_local i64 @v16i8tov16i64_sign(<16 x i8> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v16i8tov16i64_sign:
; PWR9LE:       # %bb.0: # %entry
; PWR9LE-NEXT:    addis r3, r2, .LCPI23_0@toc@ha
; PWR9LE-NEXT:    addi r3, r3, .LCPI23_0@toc@l
; PWR9LE-NEXT:    lxv v3, 0(r3)
; PWR9LE-NEXT:    addis r3, r2, .LCPI23_1@toc@ha
; PWR9LE-NEXT:    addi r3, r3, .LCPI23_1@toc@l
; PWR9LE-NEXT:    lxv v4, 0(r3)
; PWR9LE-NEXT:    addis r3, r2, .LCPI23_2@toc@ha
; PWR9LE-NEXT:    vperm v3, v2, v2, v3
; PWR9LE-NEXT:    addi r3, r3, .LCPI23_2@toc@l
; PWR9LE-NEXT:    lxv v5, 0(r3)
; PWR9LE-NEXT:    addis r3, r2, .LCPI23_3@toc@ha
; PWR9LE-NEXT:    vextsb2d v3, v3
; PWR9LE-NEXT:    vperm v4, v2, v2, v4
; PWR9LE-NEXT:    addi r3, r3, .LCPI23_3@toc@l
; PWR9LE-NEXT:    lxv v0, 0(r3)
; PWR9LE-NEXT:    addis r3, r2, .LCPI23_4@toc@ha
; PWR9LE-NEXT:    vextsb2d v4, v4
; PWR9LE-NEXT:    vperm v5, v2, v2, v5
; PWR9LE-NEXT:    addi r3, r3, .LCPI23_4@toc@l
; PWR9LE-NEXT:    vaddudm v3, v4, v3
; PWR9LE-NEXT:    lxv v1, 0(r3)
; PWR9LE-NEXT:    addis r3, r2, .LCPI23_5@toc@ha
; PWR9LE-NEXT:    vextsb2d v5, v5
; PWR9LE-NEXT:    vperm v0, v2, v2, v0
; PWR9LE-NEXT:    addi r3, r3, .LCPI23_5@toc@l
; PWR9LE-NEXT:    lxv v6, 0(r3)
; PWR9LE-NEXT:    addis r3, r2, .LCPI23_6@toc@ha
; PWR9LE-NEXT:    vperm v1, v2, v2, v1
; PWR9LE-NEXT:    vextsb2d v0, v0
; PWR9LE-NEXT:    addi r3, r3, .LCPI23_6@toc@l
; PWR9LE-NEXT:    vaddudm v5, v0, v5
; PWR9LE-NEXT:    lxv v7, 0(r3)
; PWR9LE-NEXT:    addis r3, r2, .LCPI23_7@toc@ha
; PWR9LE-NEXT:    vperm v6, v2, v2, v6
; PWR9LE-NEXT:    vextsb2d v1, v1
; PWR9LE-NEXT:    vaddudm v3, v3, v5
; PWR9LE-NEXT:    addi r3, r3, .LCPI23_7@toc@l
; PWR9LE-NEXT:    lxv v8, 0(r3)
; PWR9LE-NEXT:    vextsb2d v6, v6
; PWR9LE-NEXT:    vperm v7, v2, v2, v7
; PWR9LE-NEXT:    vaddudm v1, v6, v1
; PWR9LE-NEXT:    vextsb2d v7, v7
; PWR9LE-NEXT:    vperm v2, v2, v2, v8
; PWR9LE-NEXT:    vextsb2d v2, v2
; PWR9LE-NEXT:    vaddudm v2, v2, v7
; PWR9LE-NEXT:    vaddudm v2, v1, v2
; PWR9LE-NEXT:    vaddudm v2, v2, v3
; PWR9LE-NEXT:    xxswapd v3, v2
; PWR9LE-NEXT:    vaddudm v2, v2, v3
; PWR9LE-NEXT:    mfvsrld r3, v2
; PWR9LE-NEXT:    blr
;
; PWR9BE-LABEL: v16i8tov16i64_sign:
; PWR9BE:       # %bb.0: # %entry
; PWR9BE-NEXT:    addis r3, r2, .LCPI23_0@toc@ha
; PWR9BE-NEXT:    addi r3, r3, .LCPI23_0@toc@l
; PWR9BE-NEXT:    lxv v3, 0(r3)
; PWR9BE-NEXT:    addis r3, r2, .LCPI23_1@toc@ha
; PWR9BE-NEXT:    addi r3, r3, .LCPI23_1@toc@l
; PWR9BE-NEXT:    lxv v4, 0(r3)
; PWR9BE-NEXT:    addis r3, r2, .LCPI23_2@toc@ha
; PWR9BE-NEXT:    vperm v3, v2, v2, v3
; PWR9BE-NEXT:    addi r3, r3, .LCPI23_2@toc@l
; PWR9BE-NEXT:    lxv v5, 0(r3)
; PWR9BE-NEXT:    addis r3, r2, .LCPI23_3@toc@ha
; PWR9BE-NEXT:    vextsb2d v3, v3
; PWR9BE-NEXT:    vperm v4, v2, v2, v4
; PWR9BE-NEXT:    addi r3, r3, .LCPI23_3@toc@l
; PWR9BE-NEXT:    lxv v0, 0(r3)
; PWR9BE-NEXT:    addis r3, r2, .LCPI23_4@toc@ha
; PWR9BE-NEXT:    vextsb2d v4, v4
; PWR9BE-NEXT:    vperm v5, v2, v2, v5
; PWR9BE-NEXT:    addi r3, r3, .LCPI23_4@toc@l
; PWR9BE-NEXT:    vaddudm v3, v4, v3
; PWR9BE-NEXT:    lxv v1, 0(r3)
; PWR9BE-NEXT:    addis r3, r2, .LCPI23_5@toc@ha
; PWR9BE-NEXT:    vextsb2d v5, v5
; PWR9BE-NEXT:    vperm v0, v2, v2, v0
; PWR9BE-NEXT:    addi r3, r3, .LCPI23_5@toc@l
; PWR9BE-NEXT:    lxv v6, 0(r3)
; PWR9BE-NEXT:    addis r3, r2, .LCPI23_6@toc@ha
; PWR9BE-NEXT:    vperm v1, v2, v2, v1
; PWR9BE-NEXT:    vextsb2d v0, v0
; PWR9BE-NEXT:    addi r3, r3, .LCPI23_6@toc@l
; PWR9BE-NEXT:    vaddudm v5, v0, v5
; PWR9BE-NEXT:    lxv v7, 0(r3)
; PWR9BE-NEXT:    addis r3, r2, .LCPI23_7@toc@ha
; PWR9BE-NEXT:    vperm v6, v2, v2, v6
; PWR9BE-NEXT:    vextsb2d v1, v1
; PWR9BE-NEXT:    vaddudm v3, v3, v5
; PWR9BE-NEXT:    addi r3, r3, .LCPI23_7@toc@l
; PWR9BE-NEXT:    lxv v8, 0(r3)
; PWR9BE-NEXT:    vextsb2d v6, v6
; PWR9BE-NEXT:    vperm v7, v2, v2, v7
; PWR9BE-NEXT:    vaddudm v1, v6, v1
; PWR9BE-NEXT:    vextsb2d v7, v7
; PWR9BE-NEXT:    vperm v2, v2, v2, v8
; PWR9BE-NEXT:    vextsb2d v2, v2
; PWR9BE-NEXT:    vaddudm v2, v2, v7
; PWR9BE-NEXT:    vaddudm v2, v1, v2
; PWR9BE-NEXT:    vaddudm v2, v2, v3
; PWR9BE-NEXT:    xxswapd v3, v2
; PWR9BE-NEXT:    vaddudm v2, v2, v3
; PWR9BE-NEXT:    mfvsrd r3, v2
; PWR9BE-NEXT:    blr
;
; PWR10LE-LABEL: v16i8tov16i64_sign:
; PWR10LE:       # %bb.0: # %entry
; PWR10LE-NEXT:    plxv v3, .LCPI23_0@PCREL(0), 1
; PWR10LE-NEXT:    plxv v4, .LCPI23_1@PCREL(0), 1
; PWR10LE-NEXT:    vperm v3, v2, v2, v3
; PWR10LE-NEXT:    plxv v5, .LCPI23_2@PCREL(0), 1
; PWR10LE-NEXT:    plxv v0, .LCPI23_3@PCREL(0), 1
; PWR10LE-NEXT:    plxv v1, .LCPI23_4@PCREL(0), 1
; PWR10LE-NEXT:    plxv v6, .LCPI23_5@PCREL(0), 1
; PWR10LE-NEXT:    plxv v7, .LCPI23_6@PCREL(0), 1
; PWR10LE-NEXT:    plxv v8, .LCPI23_7@PCREL(0), 1
; PWR10LE-NEXT:    vperm v4, v2, v2, v4
; PWR10LE-NEXT:    vperm v5, v2, v2, v5
; PWR10LE-NEXT:    vperm v0, v2, v2, v0
; PWR10LE-NEXT:    vperm v1, v2, v2, v1
; PWR10LE-NEXT:    vperm v6, v2, v2, v6
; PWR10LE-NEXT:    vperm v7, v2, v2, v7
; PWR10LE-NEXT:    vperm v2, v2, v2, v8
; PWR10LE-NEXT:    vextsb2d v5, v5
; PWR10LE-NEXT:    vextsb2d v0, v0
; PWR10LE-NEXT:    vextsb2d v7, v7
; PWR10LE-NEXT:    vextsb2d v2, v2
; PWR10LE-NEXT:    vextsb2d v3, v3
; PWR10LE-NEXT:    vextsb2d v4, v4
; PWR10LE-NEXT:    vextsb2d v1, v1
; PWR10LE-NEXT:    vextsb2d v6, v6
; PWR10LE-NEXT:    vaddudm v2, v2, v7
; PWR10LE-NEXT:    vaddudm v5, v0, v5
; PWR10LE-NEXT:    vaddudm v3, v4, v3
; PWR10LE-NEXT:    vaddudm v3, v3, v5
; PWR10LE-NEXT:    vaddudm v4, v6, v1
; PWR10LE-NEXT:    vaddudm v2, v4, v2
; PWR10LE-NEXT:    vaddudm v2, v2, v3
; PWR10LE-NEXT:    xxswapd v3, v2
; PWR10LE-NEXT:    vaddudm v2, v2, v3
; PWR10LE-NEXT:    mfvsrld r3, v2
; PWR10LE-NEXT:    blr
;
; PWR10BE-LABEL: v16i8tov16i64_sign:
; PWR10BE:       # %bb.0: # %entry
; PWR10BE-NEXT:    addis r3, r2, .LCPI23_0@toc@ha
; PWR10BE-NEXT:    addi r3, r3, .LCPI23_0@toc@l
; PWR10BE-NEXT:    lxv v3, 0(r3)
; PWR10BE-NEXT:    addis r3, r2, .LCPI23_1@toc@ha
; PWR10BE-NEXT:    addi r3, r3, .LCPI23_1@toc@l
; PWR10BE-NEXT:    lxv v4, 0(r3)
; PWR10BE-NEXT:    addis r3, r2, .LCPI23_2@toc@ha
; PWR10BE-NEXT:    vperm v3, v2, v2, v3
; PWR10BE-NEXT:    addi r3, r3, .LCPI23_2@toc@l
; PWR10BE-NEXT:    vextsb2d v3, v3
; PWR10BE-NEXT:    lxv v5, 0(r3)
; PWR10BE-NEXT:    addis r3, r2, .LCPI23_3@toc@ha
; PWR10BE-NEXT:    vperm v4, v2, v2, v4
; PWR10BE-NEXT:    addi r3, r3, .LCPI23_3@toc@l
; PWR10BE-NEXT:    vextsb2d v4, v4
; PWR10BE-NEXT:    lxv v0, 0(r3)
; PWR10BE-NEXT:    addis r3, r2, .LCPI23_4@toc@ha
; PWR10BE-NEXT:    vperm v5, v2, v2, v5
; PWR10BE-NEXT:    addi r3, r3, .LCPI23_4@toc@l
; PWR10BE-NEXT:    vextsb2d v5, v5
; PWR10BE-NEXT:    lxv v1, 0(r3)
; PWR10BE-NEXT:    addis r3, r2, .LCPI23_5@toc@ha
; PWR10BE-NEXT:    vperm v0, v2, v2, v0
; PWR10BE-NEXT:    addi r3, r3, .LCPI23_5@toc@l
; PWR10BE-NEXT:    vextsb2d v0, v0
; PWR10BE-NEXT:    lxv v6, 0(r3)
; PWR10BE-NEXT:    addis r3, r2, .LCPI23_6@toc@ha
; PWR10BE-NEXT:    vperm v1, v2, v2, v1
; PWR10BE-NEXT:    vaddudm v5, v0, v5
; PWR10BE-NEXT:    vaddudm v3, v4, v3
; PWR10BE-NEXT:    vaddudm v3, v3, v5
; PWR10BE-NEXT:    addi r3, r3, .LCPI23_6@toc@l
; PWR10BE-NEXT:    vextsb2d v1, v1
; PWR10BE-NEXT:    lxv v7, 0(r3)
; PWR10BE-NEXT:    addis r3, r2, .LCPI23_7@toc@ha
; PWR10BE-NEXT:    vperm v6, v2, v2, v6
; PWR10BE-NEXT:    addi r3, r3, .LCPI23_7@toc@l
; PWR10BE-NEXT:    vextsb2d v6, v6
; PWR10BE-NEXT:    lxv v8, 0(r3)
; PWR10BE-NEXT:    vperm v7, v2, v2, v7
; PWR10BE-NEXT:    vextsb2d v7, v7
; PWR10BE-NEXT:    vperm v2, v2, v2, v8
; PWR10BE-NEXT:    vextsb2d v2, v2
; PWR10BE-NEXT:    vaddudm v2, v2, v7
; PWR10BE-NEXT:    vaddudm v4, v6, v1
; PWR10BE-NEXT:    vaddudm v2, v4, v2
; PWR10BE-NEXT:    vaddudm v2, v2, v3
; PWR10BE-NEXT:    xxswapd v3, v2
; PWR10BE-NEXT:    vaddudm v2, v2, v3
; PWR10BE-NEXT:    mfvsrd r3, v2
; PWR10BE-NEXT:    blr
entry:
  %0 = sext <16 x i8> %a to <16 x i64>
  %1 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %0)
  ret i64 %1
}

define dso_local i64 @v16i8tov16i64_zero(<16 x i8> %a) local_unnamed_addr #0 {
; PWR9LE-LABEL: v16i8tov16i64_zero:
; PWR9LE:       # %bb.0: # %entry
; PWR9LE-NEXT:    addis r3, r2, .LCPI24_0@toc@ha
; PWR9LE-NEXT:    xxlxor v4, v4, v4
; PWR9LE-NEXT:    addi r3, r3, .LCPI24_0@toc@l
; PWR9LE-NEXT:    lxv v3, 0(r3)
; PWR9LE-NEXT:    addis r3, r2, .LCPI24_1@toc@ha
; PWR9LE-NEXT:    addi r3, r3, .LCPI24_1@toc@l
; PWR9LE-NEXT:    lxv v5, 0(r3)
; PWR9LE-NEXT:    addis r3, r2, .LCPI24_2@toc@ha
; PWR9LE-NEXT:    vperm v3, v4, v2, v3
; PWR9LE-NEXT:    addi r3, r3, .LCPI24_2@toc@l
; PWR9LE-NEXT:    lxv v0, 0(r3)
; PWR9LE-NEXT:    addis r3, r2, .LCPI24_3@toc@ha
; PWR9LE-NEXT:    vperm v5, v4, v2, v5
; PWR9LE-NEXT:    addi r3, r3, .LCPI24_3@toc@l
; PWR9LE-NEXT:    lxv v1, 0(r3)
; PWR9LE-NEXT:    addis r3, r2, .LCPI24_4@toc@ha
; PWR9LE-NEXT:    vaddudm v3, v5, v3
; PWR9LE-NEXT:    vperm v0, v4, v2, v0
; PWR9LE-NEXT:    addi r3, r3, .LCPI24_4@toc@l
; PWR9LE-NEXT:    lxv v6, 0(r3)
; PWR9LE-NEXT:    addis r3, r2, .LCPI24_5@toc@ha
; PWR9LE-NEXT:    vperm v1, v4, v2, v1
; PWR9LE-NEXT:    addi r3, r3, .LCPI24_5@toc@l
; PWR9LE-NEXT:    lxv v7, 0(r3)
; PWR9LE-NEXT:    addis r3, r2, .LCPI24_6@toc@ha
; PWR9LE-NEXT:    vaddudm v0, v1, v0
; PWR9LE-NEXT:    vperm v6, v4, v2, v6
; PWR9LE-NEXT:    addi r3, r3, .LCPI24_6@toc@l
; PWR9LE-NEXT:    lxv v8, 0(r3)
; PWR9LE-NEXT:    addis r3, r2, .LCPI24_7@toc@ha
; PWR9LE-NEXT:    vaddudm v3, v3, v0
; PWR9LE-NEXT:    vperm v7, v4, v2, v7
; PWR9LE-NEXT:    addi r3, r3, .LCPI24_7@toc@l
; PWR9LE-NEXT:    lxv v9, 0(r3)
; PWR9LE-NEXT:    vperm v8, v4, v2, v8
; PWR9LE-NEXT:    vperm v2, v4, v2, v9
; PWR9LE-NEXT:    vaddudm v4, v7, v6
; PWR9LE-NEXT:    vaddudm v2, v2, v8
; PWR9LE-NEXT:    vaddudm v2, v4, v2
; PWR9LE-NEXT:    vaddudm v2, v2, v3
; PWR9LE-NEXT:    xxswapd v3, v2
; PWR9LE-NEXT:    vaddudm v2, v2, v3
; PWR9LE-NEXT:    mfvsrld r3, v2
; PWR9LE-NEXT:    blr
;
; PWR9BE-LABEL: v16i8tov16i64_zero:
; PWR9BE:       # %bb.0: # %entry
; PWR9BE-NEXT:    addis r3, r2, .LCPI24_0@toc@ha
; PWR9BE-NEXT:    xxlxor v4, v4, v4
; PWR9BE-NEXT:    addi r3, r3, .LCPI24_0@toc@l
; PWR9BE-NEXT:    lxv v3, 0(r3)
; PWR9BE-NEXT:    addis r3, r2, .LCPI24_1@toc@ha
; PWR9BE-NEXT:    addi r3, r3, .LCPI24_1@toc@l
; PWR9BE-NEXT:    lxv v5, 0(r3)
; PWR9BE-NEXT:    addis r3, r2, .LCPI24_2@toc@ha
; PWR9BE-NEXT:    vperm v3, v4, v2, v3
; PWR9BE-NEXT:    addi r3, r3, .LCPI24_2@toc@l
; PWR9BE-NEXT:    lxv v0, 0(r3)
; PWR9BE-NEXT:    addis r3, r2, .LCPI24_3@toc@ha
; PWR9BE-NEXT:    vperm v5, v4, v2, v5
; PWR9BE-NEXT:    addi r3, r3, .LCPI24_3@toc@l
; PWR9BE-NEXT:    lxv v1, 0(r3)
; PWR9BE-NEXT:    addis r3, r2, .LCPI24_4@toc@ha
; PWR9BE-NEXT:    vaddudm v3, v5, v3
; PWR9BE-NEXT:    vperm v0, v4, v2, v0
; PWR9BE-NEXT:    addi r3, r3, .LCPI24_4@toc@l
; PWR9BE-NEXT:    lxv v6, 0(r3)
; PWR9BE-NEXT:    addis r3, r2, .LCPI24_5@toc@ha
; PWR9BE-NEXT:    vperm v1, v4, v2, v1
; PWR9BE-NEXT:    addi r3, r3, .LCPI24_5@toc@l
; PWR9BE-NEXT:    lxv v7, 0(r3)
; PWR9BE-NEXT:    addis r3, r2, .LCPI24_6@toc@ha
; PWR9BE-NEXT:    vaddudm v0, v1, v0
; PWR9BE-NEXT:    vperm v6, v4, v2, v6
; PWR9BE-NEXT:    addi r3, r3, .LCPI24_6@toc@l
; PWR9BE-NEXT:    lxv v8, 0(r3)
; PWR9BE-NEXT:    addis r3, r2, .LCPI24_7@toc@ha
; PWR9BE-NEXT:    vaddudm v3, v3, v0
; PWR9BE-NEXT:    vperm v7, v4, v2, v7
; PWR9BE-NEXT:    addi r3, r3, .LCPI24_7@toc@l
; PWR9BE-NEXT:    lxv v9, 0(r3)
; PWR9BE-NEXT:    vperm v8, v4, v2, v8
; PWR9BE-NEXT:    vperm v2, v4, v2, v9
; PWR9BE-NEXT:    vaddudm v4, v7, v6
; PWR9BE-NEXT:    vaddudm v2, v2, v8
; PWR9BE-NEXT:    vaddudm v2, v4, v2
; PWR9BE-NEXT:    vaddudm v2, v2, v3
; PWR9BE-NEXT:    xxswapd v3, v2
; PWR9BE-NEXT:    vaddudm v2, v2, v3
; PWR9BE-NEXT:    mfvsrd r3, v2
; PWR9BE-NEXT:    blr
;
; PWR10LE-LABEL: v16i8tov16i64_zero:
; PWR10LE:       # %bb.0: # %entry
; PWR10LE-NEXT:    plxv v3, .LCPI24_0@PCREL(0), 1
; PWR10LE-NEXT:    plxv v5, .LCPI24_1@PCREL(0), 1
; PWR10LE-NEXT:    xxlxor v4, v4, v4
; PWR10LE-NEXT:    vperm v3, v4, v2, v3
; PWR10LE-NEXT:    plxv v0, .LCPI24_2@PCREL(0), 1
; PWR10LE-NEXT:    plxv v1, .LCPI24_3@PCREL(0), 1
; PWR10LE-NEXT:    plxv v6, .LCPI24_4@PCREL(0), 1
; PWR10LE-NEXT:    plxv v7, .LCPI24_5@PCREL(0), 1
; PWR10LE-NEXT:    plxv v8, .LCPI24_6@PCREL(0), 1
; PWR10LE-NEXT:    plxv v9, .LCPI24_7@PCREL(0), 1
; PWR10LE-NEXT:    vperm v5, v4, v2, v5
; PWR10LE-NEXT:    vperm v0, v4, v2, v0
; PWR10LE-NEXT:    vperm v1, v4, v2, v1
; PWR10LE-NEXT:    vperm v6, v4, v2, v6
; PWR10LE-NEXT:    vperm v7, v4, v2, v7
; PWR10LE-NEXT:    vperm v8, v4, v2, v8
; PWR10LE-NEXT:    vperm v2, v4, v2, v9
; PWR10LE-NEXT:    vaddudm v2, v2, v8
; PWR10LE-NEXT:    vaddudm v4, v1, v0
; PWR10LE-NEXT:    vaddudm v3, v5, v3
; PWR10LE-NEXT:    vaddudm v3, v3, v4
; PWR10LE-NEXT:    vaddudm v4, v7, v6
; PWR10LE-NEXT:    vaddudm v2, v4, v2
; PWR10LE-NEXT:    vaddudm v2, v2, v3
; PWR10LE-NEXT:    xxswapd v3, v2
; PWR10LE-NEXT:    vaddudm v2, v2, v3
; PWR10LE-NEXT:    mfvsrld r3, v2
; PWR10LE-NEXT:    blr
;
; PWR10BE-LABEL: v16i8tov16i64_zero:
; PWR10BE:       # %bb.0: # %entry
; PWR10BE-NEXT:    addis r3, r2, .LCPI24_0@toc@ha
; PWR10BE-NEXT:    xxlxor v4, v4, v4
; PWR10BE-NEXT:    addi r3, r3, .LCPI24_0@toc@l
; PWR10BE-NEXT:    lxv v3, 0(r3)
; PWR10BE-NEXT:    addis r3, r2, .LCPI24_1@toc@ha
; PWR10BE-NEXT:    addi r3, r3, .LCPI24_1@toc@l
; PWR10BE-NEXT:    lxv v5, 0(r3)
; PWR10BE-NEXT:    addis r3, r2, .LCPI24_2@toc@ha
; PWR10BE-NEXT:    vperm v3, v4, v2, v3
; PWR10BE-NEXT:    addi r3, r3, .LCPI24_2@toc@l
; PWR10BE-NEXT:    lxv v0, 0(r3)
; PWR10BE-NEXT:    addis r3, r2, .LCPI24_3@toc@ha
; PWR10BE-NEXT:    vperm v5, v4, v2, v5
; PWR10BE-NEXT:    addi r3, r3, .LCPI24_3@toc@l
; PWR10BE-NEXT:    lxv v1, 0(r3)
; PWR10BE-NEXT:    addis r3, r2, .LCPI24_4@toc@ha
; PWR10BE-NEXT:    vperm v0, v4, v2, v0
; PWR10BE-NEXT:    addi r3, r3, .LCPI24_4@toc@l
; PWR10BE-NEXT:    lxv v6, 0(r3)
; PWR10BE-NEXT:    addis r3, r2, .LCPI24_5@toc@ha
; PWR10BE-NEXT:    vperm v1, v4, v2, v1
; PWR10BE-NEXT:    addi r3, r3, .LCPI24_5@toc@l
; PWR10BE-NEXT:    lxv v7, 0(r3)
; PWR10BE-NEXT:    addis r3, r2, .LCPI24_6@toc@ha
; PWR10BE-NEXT:    vperm v6, v4, v2, v6
; PWR10BE-NEXT:    addi r3, r3, .LCPI24_6@toc@l
; PWR10BE-NEXT:    lxv v8, 0(r3)
; PWR10BE-NEXT:    addis r3, r2, .LCPI24_7@toc@ha
; PWR10BE-NEXT:    vperm v7, v4, v2, v7
; PWR10BE-NEXT:    addi r3, r3, .LCPI24_7@toc@l
; PWR10BE-NEXT:    lxv v9, 0(r3)
; PWR10BE-NEXT:    vperm v8, v4, v2, v8
; PWR10BE-NEXT:    vperm v2, v4, v2, v9
; PWR10BE-NEXT:    vaddudm v4, v1, v0
; PWR10BE-NEXT:    vaddudm v3, v5, v3
; PWR10BE-NEXT:    vaddudm v3, v3, v4
; PWR10BE-NEXT:    vaddudm v2, v2, v8
; PWR10BE-NEXT:    vaddudm v4, v7, v6
; PWR10BE-NEXT:    vaddudm v2, v4, v2
; PWR10BE-NEXT:    vaddudm v2, v2, v3
; PWR10BE-NEXT:    xxswapd v3, v2
; PWR10BE-NEXT:    vaddudm v2, v2, v3
; PWR10BE-NEXT:    mfvsrd r3, v2
; PWR10BE-NEXT:    blr
entry:
  %0 = zext <16 x i8> %a to <16 x i64>
  %1 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %0)
  ret i64 %1
}

declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) #0
declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) #0
declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>) #0
declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>) #0

attributes #0 = { nounwind }