llvm/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI

target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"

declare i8 @llvm.vector.reduce.smin.v2i8(<2 x i8>)
declare i8 @llvm.vector.reduce.smin.v3i8(<3 x i8>)
declare i8 @llvm.vector.reduce.smin.v4i8(<4 x i8>)
declare i8 @llvm.vector.reduce.smin.v8i8(<8 x i8>)
declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>)
declare i8 @llvm.vector.reduce.smin.v32i8(<32 x i8>)
declare i16 @llvm.vector.reduce.smin.v2i16(<2 x i16>)
declare i16 @llvm.vector.reduce.smin.v3i16(<3 x i16>)
declare i16 @llvm.vector.reduce.smin.v4i16(<4 x i16>)
declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>)
declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>)
declare i32 @llvm.vector.reduce.smin.v2i32(<2 x i32>)
declare i32 @llvm.vector.reduce.smin.v3i32(<3 x i32>)
declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>)
declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>)
declare i32 @llvm.vector.reduce.smin.v16i32(<16 x i32>)
declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>)
declare i64 @llvm.vector.reduce.smin.v3i64(<3 x i64>)
declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>)
declare i128 @llvm.vector.reduce.smin.v2i128(<2 x i128>)
declare i8 @llvm.vector.reduce.smax.v2i8(<2 x i8>)
declare i8 @llvm.vector.reduce.smax.v3i8(<3 x i8>)
declare i8 @llvm.vector.reduce.smax.v4i8(<4 x i8>)
declare i8 @llvm.vector.reduce.smax.v8i8(<8 x i8>)
declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>)
declare i8 @llvm.vector.reduce.smax.v32i8(<32 x i8>)
declare i16 @llvm.vector.reduce.smax.v2i16(<2 x i16>)
declare i16 @llvm.vector.reduce.smax.v3i16(<3 x i16>)
declare i16 @llvm.vector.reduce.smax.v4i16(<4 x i16>)
declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>)
declare i16 @llvm.vector.reduce.smax.v16i16(<16 x i16>)
declare i32 @llvm.vector.reduce.smax.v2i32(<2 x i32>)
declare i32 @llvm.vector.reduce.smax.v3i32(<3 x i32>)
declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>)
declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>)
declare i32 @llvm.vector.reduce.smax.v16i32(<16 x i32>)
declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>)
declare i64 @llvm.vector.reduce.smax.v3i64(<3 x i64>)
declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>)
declare i128 @llvm.vector.reduce.smax.v2i128(<2 x i128>)
declare i8 @llvm.vector.reduce.umin.v2i8(<2 x i8>)
declare i8 @llvm.vector.reduce.umin.v3i8(<3 x i8>)
declare i8 @llvm.vector.reduce.umin.v4i8(<4 x i8>)
declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>)
declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>)
declare i8 @llvm.vector.reduce.umin.v32i8(<32 x i8>)
declare i16 @llvm.vector.reduce.umin.v2i16(<2 x i16>)
declare i16 @llvm.vector.reduce.umin.v3i16(<3 x i16>)
declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>)
declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>)
declare i16 @llvm.vector.reduce.umin.v16i16(<16 x i16>)
declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>)
declare i32 @llvm.vector.reduce.umin.v3i32(<3 x i32>)
declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>)
declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>)
declare i32 @llvm.vector.reduce.umin.v16i32(<16 x i32>)
declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>)
declare i64 @llvm.vector.reduce.umin.v3i64(<3 x i64>)
declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>)
declare i128 @llvm.vector.reduce.umin.v2i128(<2 x i128>)
declare i8 @llvm.vector.reduce.umax.v2i8(<2 x i8>)
declare i8 @llvm.vector.reduce.umax.v3i8(<3 x i8>)
declare i8 @llvm.vector.reduce.umax.v4i8(<4 x i8>)
declare i8 @llvm.vector.reduce.umax.v8i8(<8 x i8>)
declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>)
declare i8 @llvm.vector.reduce.umax.v32i8(<32 x i8>)
declare i16 @llvm.vector.reduce.umax.v2i16(<2 x i16>)
declare i16 @llvm.vector.reduce.umax.v3i16(<3 x i16>)
declare i16 @llvm.vector.reduce.umax.v4i16(<4 x i16>)
declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>)
declare i16 @llvm.vector.reduce.umax.v16i16(<16 x i16>)
declare i32 @llvm.vector.reduce.umax.v2i32(<2 x i32>)
declare i32 @llvm.vector.reduce.umax.v3i32(<3 x i32>)
declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>)
declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>)
declare i32 @llvm.vector.reduce.umax.v16i32(<16 x i32>)
declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>)
declare i64 @llvm.vector.reduce.umax.v3i64(<3 x i64>)
declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>)
declare i128 @llvm.vector.reduce.umax.v2i128(<2 x i128>)

declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)

define i8 @smax_B(ptr nocapture readonly %arr)  {
; CHECK-LABEL: smax_B:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    smaxv b0, v0.16b
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
  %arr.load = load <16 x i8>, ptr %arr
  %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %arr.load)
  ret i8 %r
}

define i16 @smax_H(ptr nocapture readonly %arr) {
; CHECK-LABEL: smax_H:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    smaxv h0, v0.8h
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
  %arr.load = load <8 x i16>, ptr %arr
  %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %arr.load)
  ret i16 %r
}

define i32 @smax_S(ptr nocapture readonly %arr)  {
; CHECK-LABEL: smax_S:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    smaxv s0, v0.4s
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
  %arr.load = load <4 x i32>, ptr %arr
  %r = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %arr.load)
  ret i32 %r
}

define i8 @umax_B(ptr nocapture readonly %arr)  {
; CHECK-LABEL: umax_B:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    umaxv b0, v0.16b
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
  %arr.load = load <16 x i8>, ptr %arr
  %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %arr.load)
  ret i8 %r
}

define i16 @umax_H(ptr nocapture readonly %arr)  {
; CHECK-LABEL: umax_H:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    umaxv h0, v0.8h
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
  %arr.load = load <8 x i16>, ptr %arr
  %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %arr.load)
  ret i16 %r
}

define i32 @umax_S(ptr nocapture readonly %arr) {
; CHECK-LABEL: umax_S:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    umaxv s0, v0.4s
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
  %arr.load = load <4 x i32>, ptr %arr
  %r = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %arr.load)
  ret i32 %r
}

define i8 @smin_B(ptr nocapture readonly %arr) {
; CHECK-LABEL: smin_B:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    sminv b0, v0.16b
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
  %arr.load = load <16 x i8>, ptr %arr
  %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %arr.load)
  ret i8 %r
}

define i16 @smin_H(ptr nocapture readonly %arr) {
; CHECK-LABEL: smin_H:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    sminv h0, v0.8h
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
  %arr.load = load <8 x i16>, ptr %arr
  %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %arr.load)
  ret i16 %r
}

define i32 @smin_S(ptr nocapture readonly %arr) {
; CHECK-LABEL: smin_S:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    sminv s0, v0.4s
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
  %arr.load = load <4 x i32>, ptr %arr
  %r = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %arr.load)
  ret i32 %r
}

define i8 @umin_B(ptr nocapture readonly %arr)  {
; CHECK-LABEL: umin_B:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    uminv b0, v0.16b
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
  %arr.load = load <16 x i8>, ptr %arr
  %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %arr.load)
  ret i8 %r
}

define i16 @umin_H(ptr nocapture readonly %arr)  {
; CHECK-LABEL: umin_H:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    uminv h0, v0.8h
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
  %arr.load = load <8 x i16>, ptr %arr
  %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %arr.load)
  ret i16 %r
}

define i32 @umin_S(ptr nocapture readonly %arr) {
; CHECK-LABEL: umin_S:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    uminv s0, v0.4s
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
  %arr.load = load <4 x i32>, ptr %arr
  %r = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %arr.load)
  ret i32 %r
}

define float @fmaxnm_S(ptr nocapture readonly %arr) {
; CHECK-LABEL: fmaxnm_S:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    fmaxnmv s0, v0.4s
; CHECK-NEXT:    ret
  %arr.load  = load <4 x float>, ptr %arr
  %r = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %arr.load)
  ret float %r
}

define float @fminnm_S(ptr nocapture readonly %arr) {
; CHECK-LABEL: fminnm_S:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    fminnmv s0, v0.4s
; CHECK-NEXT:    ret
  %arr.load  = load <4 x float>, ptr %arr
  %r = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %arr.load)
  ret float %r
}

define i16 @oversized_umax_256(ptr nocapture readonly %arr)  {
; CHECK-SD-LABEL: oversized_umax_256:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    ldp q1, q0, [x0]
; CHECK-SD-NEXT:    umax v0.8h, v1.8h, v0.8h
; CHECK-SD-NEXT:    umaxv h0, v0.8h
; CHECK-SD-NEXT:    fmov w0, s0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: oversized_umax_256:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    ldp q0, q1, [x0]
; CHECK-GI-NEXT:    umax v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT:    umaxv h0, v0.8h
; CHECK-GI-NEXT:    fmov w0, s0
; CHECK-GI-NEXT:    ret
  %arr.load = load <16 x i16>, ptr %arr
  %r = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> %arr.load)
  ret i16 %r
}

define i32 @oversized_umax_512(ptr nocapture readonly %arr)  {
; CHECK-SD-LABEL: oversized_umax_512:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    ldp q0, q1, [x0, #32]
; CHECK-SD-NEXT:    ldp q2, q3, [x0]
; CHECK-SD-NEXT:    umax v1.4s, v3.4s, v1.4s
; CHECK-SD-NEXT:    umax v0.4s, v2.4s, v0.4s
; CHECK-SD-NEXT:    umax v0.4s, v0.4s, v1.4s
; CHECK-SD-NEXT:    umaxv s0, v0.4s
; CHECK-SD-NEXT:    fmov w0, s0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: oversized_umax_512:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    ldp q0, q1, [x0]
; CHECK-GI-NEXT:    ldp q2, q3, [x0, #32]
; CHECK-GI-NEXT:    umax v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT:    umax v1.4s, v2.4s, v3.4s
; CHECK-GI-NEXT:    umax v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT:    umaxv s0, v0.4s
; CHECK-GI-NEXT:    fmov w0, s0
; CHECK-GI-NEXT:    ret
  %arr.load = load <16 x i32>, ptr %arr
  %r = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %arr.load)
  ret i32 %r
}

define i16 @oversized_umin_256(ptr nocapture readonly %arr)  {
; CHECK-SD-LABEL: oversized_umin_256:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    ldp q1, q0, [x0]
; CHECK-SD-NEXT:    umin v0.8h, v1.8h, v0.8h
; CHECK-SD-NEXT:    uminv h0, v0.8h
; CHECK-SD-NEXT:    fmov w0, s0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: oversized_umin_256:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    ldp q0, q1, [x0]
; CHECK-GI-NEXT:    umin v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT:    uminv h0, v0.8h
; CHECK-GI-NEXT:    fmov w0, s0
; CHECK-GI-NEXT:    ret
  %arr.load = load <16 x i16>, ptr %arr
  %r = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %arr.load)
  ret i16 %r
}

define i32 @oversized_umin_512(ptr nocapture readonly %arr)  {
; CHECK-SD-LABEL: oversized_umin_512:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    ldp q0, q1, [x0, #32]
; CHECK-SD-NEXT:    ldp q2, q3, [x0]
; CHECK-SD-NEXT:    umin v1.4s, v3.4s, v1.4s
; CHECK-SD-NEXT:    umin v0.4s, v2.4s, v0.4s
; CHECK-SD-NEXT:    umin v0.4s, v0.4s, v1.4s
; CHECK-SD-NEXT:    uminv s0, v0.4s
; CHECK-SD-NEXT:    fmov w0, s0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: oversized_umin_512:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    ldp q0, q1, [x0]
; CHECK-GI-NEXT:    ldp q2, q3, [x0, #32]
; CHECK-GI-NEXT:    umin v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT:    umin v1.4s, v2.4s, v3.4s
; CHECK-GI-NEXT:    umin v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT:    uminv s0, v0.4s
; CHECK-GI-NEXT:    fmov w0, s0
; CHECK-GI-NEXT:    ret
  %arr.load = load <16 x i32>, ptr %arr
  %r = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> %arr.load)
  ret i32 %r
}

define i16 @oversized_smax_256(ptr nocapture readonly %arr)  {
; CHECK-SD-LABEL: oversized_smax_256:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    ldp q1, q0, [x0]
; CHECK-SD-NEXT:    smax v0.8h, v1.8h, v0.8h
; CHECK-SD-NEXT:    smaxv h0, v0.8h
; CHECK-SD-NEXT:    fmov w0, s0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: oversized_smax_256:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    ldp q0, q1, [x0]
; CHECK-GI-NEXT:    smax v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT:    smaxv h0, v0.8h
; CHECK-GI-NEXT:    fmov w0, s0
; CHECK-GI-NEXT:    ret
  %arr.load = load <16 x i16>, ptr %arr
  %r = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> %arr.load)
  ret i16 %r
}

define i32 @oversized_smax_512(ptr nocapture readonly %arr)  {
; CHECK-SD-LABEL: oversized_smax_512:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    ldp q0, q1, [x0, #32]
; CHECK-SD-NEXT:    ldp q2, q3, [x0]
; CHECK-SD-NEXT:    smax v1.4s, v3.4s, v1.4s
; CHECK-SD-NEXT:    smax v0.4s, v2.4s, v0.4s
; CHECK-SD-NEXT:    smax v0.4s, v0.4s, v1.4s
; CHECK-SD-NEXT:    smaxv s0, v0.4s
; CHECK-SD-NEXT:    fmov w0, s0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: oversized_smax_512:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    ldp q0, q1, [x0]
; CHECK-GI-NEXT:    ldp q2, q3, [x0, #32]
; CHECK-GI-NEXT:    smax v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT:    smax v1.4s, v2.4s, v3.4s
; CHECK-GI-NEXT:    smax v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT:    smaxv s0, v0.4s
; CHECK-GI-NEXT:    fmov w0, s0
; CHECK-GI-NEXT:    ret
  %arr.load = load <16 x i32>, ptr %arr
  %r = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> %arr.load)
  ret i32 %r
}

define i16 @oversized_smin_256(ptr nocapture readonly %arr)  {
; CHECK-SD-LABEL: oversized_smin_256:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    ldp q1, q0, [x0]
; CHECK-SD-NEXT:    smin v0.8h, v1.8h, v0.8h
; CHECK-SD-NEXT:    sminv h0, v0.8h
; CHECK-SD-NEXT:    fmov w0, s0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: oversized_smin_256:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    ldp q0, q1, [x0]
; CHECK-GI-NEXT:    smin v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT:    sminv h0, v0.8h
; CHECK-GI-NEXT:    fmov w0, s0
; CHECK-GI-NEXT:    ret
  %arr.load = load <16 x i16>, ptr %arr
  %r = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %arr.load)
  ret i16 %r
}


define i32 @oversized_smin_512(ptr nocapture readonly %arr)  {
; CHECK-SD-LABEL: oversized_smin_512:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    ldp q0, q1, [x0, #32]
; CHECK-SD-NEXT:    ldp q2, q3, [x0]
; CHECK-SD-NEXT:    smin v1.4s, v3.4s, v1.4s
; CHECK-SD-NEXT:    smin v0.4s, v2.4s, v0.4s
; CHECK-SD-NEXT:    smin v0.4s, v0.4s, v1.4s
; CHECK-SD-NEXT:    sminv s0, v0.4s
; CHECK-SD-NEXT:    fmov w0, s0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: oversized_smin_512:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    ldp q0, q1, [x0]
; CHECK-GI-NEXT:    ldp q2, q3, [x0, #32]
; CHECK-GI-NEXT:    smin v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT:    smin v1.4s, v2.4s, v3.4s
; CHECK-GI-NEXT:    smin v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT:    sminv s0, v0.4s
; CHECK-GI-NEXT:    fmov w0, s0
; CHECK-GI-NEXT:    ret
  %arr.load = load <16 x i32>, ptr %arr
  %r = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> %arr.load)
  ret i32 %r
}

define i8 @sminv_v2i8(<2 x i8> %a) {
; CHECK-SD-LABEL: sminv_v2i8:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    shl v0.2s, v0.2s, #24
; CHECK-SD-NEXT:    sshr v0.2s, v0.2s, #24
; CHECK-SD-NEXT:    sminp v0.2s, v0.2s, v0.2s
; CHECK-SD-NEXT:    fmov w0, s0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: sminv_v2i8:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    mov s1, v0.s[1]
; CHECK-GI-NEXT:    fmov w8, s0
; CHECK-GI-NEXT:    sxtb w8, w8
; CHECK-GI-NEXT:    fmov w9, s1
; CHECK-GI-NEXT:    cmp w8, w9, sxtb
; CHECK-GI-NEXT:    fcsel s0, s0, s1, lt
; CHECK-GI-NEXT:    fmov w0, s0
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> %a)
  ret i8 %arg1
}

define i8 @sminv_v3i8(<3 x i8> %a) {
; CHECK-SD-LABEL: sminv_v3i8:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    movi v0.4h, #127
; CHECK-SD-NEXT:    mov v0.h[0], w0
; CHECK-SD-NEXT:    mov v0.h[1], w1
; CHECK-SD-NEXT:    mov v0.h[2], w2
; CHECK-SD-NEXT:    shl v0.4h, v0.4h, #8
; CHECK-SD-NEXT:    sshr v0.4h, v0.4h, #8
; CHECK-SD-NEXT:    sminv h0, v0.4h
; CHECK-SD-NEXT:    fmov w0, s0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: sminv_v3i8:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    sxtb w8, w0
; CHECK-GI-NEXT:    cmp w8, w1, sxtb
; CHECK-GI-NEXT:    csel w8, w0, w1, lt
; CHECK-GI-NEXT:    sxtb w9, w8
; CHECK-GI-NEXT:    cmp w9, w2, sxtb
; CHECK-GI-NEXT:    csel w0, w8, w2, lt
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i8 @llvm.vector.reduce.smin.v3i8(<3 x i8> %a)
  ret i8 %arg1
}

define i8 @sminv_v4i8(<4 x i8> %a) {
; CHECK-SD-LABEL: sminv_v4i8:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    shl v0.4h, v0.4h, #8
; CHECK-SD-NEXT:    sshr v0.4h, v0.4h, #8
; CHECK-SD-NEXT:    sminv h0, v0.4h
; CHECK-SD-NEXT:    fmov w0, s0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: sminv_v4i8:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    umov w8, v0.h[0]
; CHECK-GI-NEXT:    umov w9, v0.h[1]
; CHECK-GI-NEXT:    umov w10, v0.h[2]
; CHECK-GI-NEXT:    umov w12, v0.h[3]
; CHECK-GI-NEXT:    sxtb w11, w8
; CHECK-GI-NEXT:    cmp w11, w9, sxtb
; CHECK-GI-NEXT:    sxtb w11, w10
; CHECK-GI-NEXT:    csel w8, w8, w9, lt
; CHECK-GI-NEXT:    cmp w11, w12, sxtb
; CHECK-GI-NEXT:    sxtb w9, w8
; CHECK-GI-NEXT:    csel w10, w10, w12, lt
; CHECK-GI-NEXT:    cmp w9, w10, sxtb
; CHECK-GI-NEXT:    csel w0, w8, w10, lt
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> %a)
  ret i8 %arg1
}

define i8 @sminv_v8i8(<8 x i8> %a) {
; CHECK-LABEL: sminv_v8i8:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    sminv b0, v0.8b
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %a)
  ret i8 %arg1
}

define i8 @sminv_v16i8(<16 x i8> %a) {
; CHECK-LABEL: sminv_v16i8:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    sminv b0, v0.16b
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %a)
  ret i8 %arg1
}

define i8 @sminv_v32i8(<32 x i8> %a) {
; CHECK-LABEL: sminv_v32i8:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    smin v0.16b, v0.16b, v1.16b
; CHECK-NEXT:    sminv b0, v0.16b
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> %a)
  ret i8 %arg1
}

define i16 @sminv_v2i16(<2 x i16> %a) {
; CHECK-SD-LABEL: sminv_v2i16:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    shl v0.2s, v0.2s, #16
; CHECK-SD-NEXT:    sshr v0.2s, v0.2s, #16
; CHECK-SD-NEXT:    sminp v0.2s, v0.2s, v0.2s
; CHECK-SD-NEXT:    fmov w0, s0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: sminv_v2i16:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    mov s1, v0.s[1]
; CHECK-GI-NEXT:    fmov w8, s0
; CHECK-GI-NEXT:    sxth w8, w8
; CHECK-GI-NEXT:    fmov w9, s1
; CHECK-GI-NEXT:    cmp w8, w9, sxth
; CHECK-GI-NEXT:    fcsel s0, s0, s1, lt
; CHECK-GI-NEXT:    fmov w0, s0
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> %a)
  ret i16 %arg1
}

define i16 @sminv_v3i16(<3 x i16> %a) {
; CHECK-LABEL: sminv_v3i16:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT:    mov w8, #32767 // =0x7fff
; CHECK-NEXT:    mov v0.h[3], w8
; CHECK-NEXT:    sminv h0, v0.4h
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i16 @llvm.vector.reduce.smin.v3i16(<3 x i16> %a)
  ret i16 %arg1
}

define i16 @sminv_v4i16(<4 x i16> %a) {
; CHECK-LABEL: sminv_v4i16:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    sminv h0, v0.4h
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %a)
  ret i16 %arg1
}

define i16 @sminv_v8i16(<8 x i16> %a) {
; CHECK-LABEL: sminv_v8i16:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    sminv h0, v0.8h
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %a)
  ret i16 %arg1
}

define i16 @sminv_v16i16(<16 x i16> %a) {
; CHECK-LABEL: sminv_v16i16:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    smin v0.8h, v0.8h, v1.8h
; CHECK-NEXT:    sminv h0, v0.8h
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %a)
  ret i16 %arg1
}

define i32 @sminv_v2i32(<2 x i32> %a) {
; CHECK-LABEL: sminv_v2i32:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    sminp v0.2s, v0.2s, v0.2s
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> %a)
  ret i32 %arg1
}

define i32 @sminv_v3i32(<3 x i32> %a) {
; CHECK-LABEL: sminv_v3i32:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    mov w8, #2147483647 // =0x7fffffff
; CHECK-NEXT:    mov v0.s[3], w8
; CHECK-NEXT:    sminv s0, v0.4s
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i32 @llvm.vector.reduce.smin.v3i32(<3 x i32> %a)
  ret i32 %arg1
}

define i32 @sminv_v4i32(<4 x i32> %a) {
; CHECK-LABEL: sminv_v4i32:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    sminv s0, v0.4s
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %a)
  ret i32 %arg1
}

define i32 @sminv_v8i32(<8 x i32> %a) {
; CHECK-LABEL: sminv_v8i32:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
; CHECK-NEXT:    sminv s0, v0.4s
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %a)
  ret i32 %arg1
}

define i64 @sminv_v2i64(<2 x i64> %a) {
; CHECK-SD-LABEL: sminv_v2i64:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
; CHECK-SD-NEXT:    cmgt d2, d1, d0
; CHECK-SD-NEXT:    bif v0.8b, v1.8b, v2.8b
; CHECK-SD-NEXT:    fmov x0, d0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: sminv_v2i64:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    mov d1, v0.d[1]
; CHECK-GI-NEXT:    fmov x8, d0
; CHECK-GI-NEXT:    fmov x9, d1
; CHECK-GI-NEXT:    cmp x8, x9
; CHECK-GI-NEXT:    fcsel d0, d0, d1, lt
; CHECK-GI-NEXT:    fmov x0, d0
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %a)
  ret i64 %arg1
}

define i64 @sminv_v3i64(<3 x i64> %a) {
; CHECK-SD-LABEL: sminv_v3i64:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    mov x8, #9223372036854775807 // =0x7fffffffffffffff
; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT:    mov v2.d[1], x8
; CHECK-SD-NEXT:    cmgt v1.2d, v2.2d, v0.2d
; CHECK-SD-NEXT:    bif v0.16b, v2.16b, v1.16b
; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
; CHECK-SD-NEXT:    cmgt d2, d1, d0
; CHECK-SD-NEXT:    bif v0.8b, v1.8b, v2.8b
; CHECK-SD-NEXT:    fmov x0, d0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: sminv_v3i64:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
; CHECK-GI-NEXT:    mov x8, #9223372036854775807 // =0x7fffffffffffffff
; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT:    mov v2.d[1], x8
; CHECK-GI-NEXT:    cmgt v1.2d, v2.2d, v0.2d
; CHECK-GI-NEXT:    bif v0.16b, v2.16b, v1.16b
; CHECK-GI-NEXT:    mov d1, v0.d[1]
; CHECK-GI-NEXT:    fmov x8, d0
; CHECK-GI-NEXT:    fmov x9, d1
; CHECK-GI-NEXT:    cmp x8, x9
; CHECK-GI-NEXT:    fcsel d0, d0, d1, lt
; CHECK-GI-NEXT:    fmov x0, d0
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i64 @llvm.vector.reduce.smin.v3i64(<3 x i64> %a)
  ret i64 %arg1
}

define i64 @sminv_v4i64(<4 x i64> %a) {
; CHECK-SD-LABEL: sminv_v4i64:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    cmgt v2.2d, v1.2d, v0.2d
; CHECK-SD-NEXT:    bif v0.16b, v1.16b, v2.16b
; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
; CHECK-SD-NEXT:    cmgt d2, d1, d0
; CHECK-SD-NEXT:    bif v0.8b, v1.8b, v2.8b
; CHECK-SD-NEXT:    fmov x0, d0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: sminv_v4i64:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    cmgt v2.2d, v1.2d, v0.2d
; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
; CHECK-GI-NEXT:    mov d1, v0.d[1]
; CHECK-GI-NEXT:    fmov x8, d0
; CHECK-GI-NEXT:    fmov x9, d1
; CHECK-GI-NEXT:    cmp x8, x9
; CHECK-GI-NEXT:    fcsel d0, d0, d1, lt
; CHECK-GI-NEXT:    fmov x0, d0
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %a)
  ret i64 %arg1
}

define i128 @sminv_v2i128(<2 x i128> %a) {
; CHECK-SD-LABEL: sminv_v2i128:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    cmp x0, x2
; CHECK-SD-NEXT:    sbcs xzr, x1, x3
; CHECK-SD-NEXT:    csel x0, x0, x2, lt
; CHECK-SD-NEXT:    csel x1, x1, x3, lt
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: sminv_v2i128:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    cmp x1, x3
; CHECK-GI-NEXT:    cset w8, lt
; CHECK-GI-NEXT:    cmp x0, x2
; CHECK-GI-NEXT:    cset w9, lo
; CHECK-GI-NEXT:    cmp x1, x3
; CHECK-GI-NEXT:    csel w8, w9, w8, eq
; CHECK-GI-NEXT:    tst w8, #0x1
; CHECK-GI-NEXT:    csel x0, x0, x2, ne
; CHECK-GI-NEXT:    csel x1, x1, x3, ne
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i128 @llvm.vector.reduce.smin.v2i128(<2 x i128> %a)
  ret i128 %arg1
}

define i8 @smaxv_v2i8(<2 x i8> %a) {
; CHECK-SD-LABEL: smaxv_v2i8:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    shl v0.2s, v0.2s, #24
; CHECK-SD-NEXT:    sshr v0.2s, v0.2s, #24
; CHECK-SD-NEXT:    smaxp v0.2s, v0.2s, v0.2s
; CHECK-SD-NEXT:    fmov w0, s0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: smaxv_v2i8:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    mov s1, v0.s[1]
; CHECK-GI-NEXT:    fmov w8, s0
; CHECK-GI-NEXT:    sxtb w8, w8
; CHECK-GI-NEXT:    fmov w9, s1
; CHECK-GI-NEXT:    cmp w8, w9, sxtb
; CHECK-GI-NEXT:    fcsel s0, s0, s1, gt
; CHECK-GI-NEXT:    fmov w0, s0
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> %a)
  ret i8 %arg1
}

define i8 @smaxv_v3i8(<3 x i8> %a) {
; CHECK-SD-LABEL: smaxv_v3i8:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    movi v0.4h, #128
; CHECK-SD-NEXT:    mov v0.h[0], w0
; CHECK-SD-NEXT:    mov v0.h[1], w1
; CHECK-SD-NEXT:    mov v0.h[2], w2
; CHECK-SD-NEXT:    shl v0.4h, v0.4h, #8
; CHECK-SD-NEXT:    sshr v0.4h, v0.4h, #8
; CHECK-SD-NEXT:    smaxv h0, v0.4h
; CHECK-SD-NEXT:    fmov w0, s0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: smaxv_v3i8:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    sxtb w8, w0
; CHECK-GI-NEXT:    cmp w8, w1, sxtb
; CHECK-GI-NEXT:    csel w8, w0, w1, gt
; CHECK-GI-NEXT:    sxtb w9, w8
; CHECK-GI-NEXT:    cmp w9, w2, sxtb
; CHECK-GI-NEXT:    csel w0, w8, w2, gt
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i8 @llvm.vector.reduce.smax.v3i8(<3 x i8> %a)
  ret i8 %arg1
}

define i8 @smaxv_v4i8(<4 x i8> %a) {
; CHECK-SD-LABEL: smaxv_v4i8:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    shl v0.4h, v0.4h, #8
; CHECK-SD-NEXT:    sshr v0.4h, v0.4h, #8
; CHECK-SD-NEXT:    smaxv h0, v0.4h
; CHECK-SD-NEXT:    fmov w0, s0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: smaxv_v4i8:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    umov w8, v0.h[0]
; CHECK-GI-NEXT:    umov w9, v0.h[1]
; CHECK-GI-NEXT:    umov w10, v0.h[2]
; CHECK-GI-NEXT:    umov w12, v0.h[3]
; CHECK-GI-NEXT:    sxtb w11, w8
; CHECK-GI-NEXT:    cmp w11, w9, sxtb
; CHECK-GI-NEXT:    sxtb w11, w10
; CHECK-GI-NEXT:    csel w8, w8, w9, gt
; CHECK-GI-NEXT:    cmp w11, w12, sxtb
; CHECK-GI-NEXT:    sxtb w9, w8
; CHECK-GI-NEXT:    csel w10, w10, w12, gt
; CHECK-GI-NEXT:    cmp w9, w10, sxtb
; CHECK-GI-NEXT:    csel w0, w8, w10, gt
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> %a)
  ret i8 %arg1
}

define i8 @smaxv_v8i8(<8 x i8> %a) {
; CHECK-LABEL: smaxv_v8i8:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    smaxv b0, v0.8b
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %a)
  ret i8 %arg1
}

define i8 @smaxv_v16i8(<16 x i8> %a) {
; CHECK-LABEL: smaxv_v16i8:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    smaxv b0, v0.16b
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %a)
  ret i8 %arg1
}

define i8 @smaxv_v32i8(<32 x i8> %a) {
; CHECK-LABEL: smaxv_v32i8:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    smax v0.16b, v0.16b, v1.16b
; CHECK-NEXT:    smaxv b0, v0.16b
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> %a)
  ret i8 %arg1
}

define i16 @smaxv_v2i16(<2 x i16> %a) {
; CHECK-SD-LABEL: smaxv_v2i16:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    shl v0.2s, v0.2s, #16
; CHECK-SD-NEXT:    sshr v0.2s, v0.2s, #16
; CHECK-SD-NEXT:    smaxp v0.2s, v0.2s, v0.2s
; CHECK-SD-NEXT:    fmov w0, s0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: smaxv_v2i16:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    mov s1, v0.s[1]
; CHECK-GI-NEXT:    fmov w8, s0
; CHECK-GI-NEXT:    sxth w8, w8
; CHECK-GI-NEXT:    fmov w9, s1
; CHECK-GI-NEXT:    cmp w8, w9, sxth
; CHECK-GI-NEXT:    fcsel s0, s0, s1, gt
; CHECK-GI-NEXT:    fmov w0, s0
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> %a)
  ret i16 %arg1
}

define i16 @smaxv_v3i16(<3 x i16> %a) {
; CHECK-SD-LABEL: smaxv_v3i16:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    mov w8, #-32768 // =0xffff8000
; CHECK-SD-NEXT:    mov v0.h[3], w8
; CHECK-SD-NEXT:    smaxv h0, v0.4h
; CHECK-SD-NEXT:    fmov w0, s0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: smaxv_v3i16:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    mov w8, #32768 // =0x8000
; CHECK-GI-NEXT:    mov v0.h[3], w8
; CHECK-GI-NEXT:    smaxv h0, v0.4h
; CHECK-GI-NEXT:    fmov w0, s0
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i16 @llvm.vector.reduce.smax.v3i16(<3 x i16> %a)
  ret i16 %arg1
}

define i16 @smaxv_v4i16(<4 x i16> %a) {
; CHECK-LABEL: smaxv_v4i16:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    smaxv h0, v0.4h
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %a)
  ret i16 %arg1
}

define i16 @smaxv_v8i16(<8 x i16> %a) {
; CHECK-LABEL: smaxv_v8i16:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    smaxv h0, v0.8h
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %a)
  ret i16 %arg1
}

define i16 @smaxv_v16i16(<16 x i16> %a) {
; CHECK-LABEL: smaxv_v16i16:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    smax v0.8h, v0.8h, v1.8h
; CHECK-NEXT:    smaxv h0, v0.8h
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> %a)
  ret i16 %arg1
}

define i32 @smaxv_v2i32(<2 x i32> %a) {
; CHECK-LABEL: smaxv_v2i32:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    smaxp v0.2s, v0.2s, v0.2s
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> %a)
  ret i32 %arg1
}

define i32 @smaxv_v3i32(<3 x i32> %a) {
; CHECK-LABEL: smaxv_v3i32:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    mov w8, #-2147483648 // =0x80000000
; CHECK-NEXT:    mov v0.s[3], w8
; CHECK-NEXT:    smaxv s0, v0.4s
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i32 @llvm.vector.reduce.smax.v3i32(<3 x i32> %a)
  ret i32 %arg1
}

define i32 @smaxv_v4i32(<4 x i32> %a) {
; CHECK-LABEL: smaxv_v4i32:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    smaxv s0, v0.4s
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
  ret i32 %arg1
}

define i32 @smaxv_v8i32(<8 x i32> %a) {
; CHECK-LABEL: smaxv_v8i32:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
; CHECK-NEXT:    smaxv s0, v0.4s
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> %a)
  ret i32 %arg1
}

define i64 @smaxv_v2i64(<2 x i64> %a) {
; CHECK-SD-LABEL: smaxv_v2i64:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
; CHECK-SD-NEXT:    cmgt d2, d0, d1
; CHECK-SD-NEXT:    bif v0.8b, v1.8b, v2.8b
; CHECK-SD-NEXT:    fmov x0, d0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: smaxv_v2i64:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    mov d1, v0.d[1]
; CHECK-GI-NEXT:    fmov x8, d0
; CHECK-GI-NEXT:    fmov x9, d1
; CHECK-GI-NEXT:    cmp x8, x9
; CHECK-GI-NEXT:    fcsel d0, d0, d1, gt
; CHECK-GI-NEXT:    fmov x0, d0
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %a)
  ret i64 %arg1
}

define i64 @smaxv_v3i64(<3 x i64> %a) {
; CHECK-SD-LABEL: smaxv_v3i64:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT:    mov v2.d[1], x8
; CHECK-SD-NEXT:    cmgt v1.2d, v0.2d, v2.2d
; CHECK-SD-NEXT:    bif v0.16b, v2.16b, v1.16b
; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
; CHECK-SD-NEXT:    cmgt d2, d0, d1
; CHECK-SD-NEXT:    bif v0.8b, v1.8b, v2.8b
; CHECK-SD-NEXT:    fmov x0, d0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: smaxv_v3i64:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
; CHECK-GI-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT:    mov v2.d[1], x8
; CHECK-GI-NEXT:    cmgt v1.2d, v0.2d, v2.2d
; CHECK-GI-NEXT:    bif v0.16b, v2.16b, v1.16b
; CHECK-GI-NEXT:    mov d1, v0.d[1]
; CHECK-GI-NEXT:    fmov x8, d0
; CHECK-GI-NEXT:    fmov x9, d1
; CHECK-GI-NEXT:    cmp x8, x9
; CHECK-GI-NEXT:    fcsel d0, d0, d1, gt
; CHECK-GI-NEXT:    fmov x0, d0
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i64 @llvm.vector.reduce.smax.v3i64(<3 x i64> %a)
  ret i64 %arg1
}

define i64 @smaxv_v4i64(<4 x i64> %a) {
; CHECK-SD-LABEL: smaxv_v4i64:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    cmgt v2.2d, v0.2d, v1.2d
; CHECK-SD-NEXT:    bif v0.16b, v1.16b, v2.16b
; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
; CHECK-SD-NEXT:    cmgt d2, d0, d1
; CHECK-SD-NEXT:    bif v0.8b, v1.8b, v2.8b
; CHECK-SD-NEXT:    fmov x0, d0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: smaxv_v4i64:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    cmgt v2.2d, v0.2d, v1.2d
; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
; CHECK-GI-NEXT:    mov d1, v0.d[1]
; CHECK-GI-NEXT:    fmov x8, d0
; CHECK-GI-NEXT:    fmov x9, d1
; CHECK-GI-NEXT:    cmp x8, x9
; CHECK-GI-NEXT:    fcsel d0, d0, d1, gt
; CHECK-GI-NEXT:    fmov x0, d0
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %a)
  ret i64 %arg1
}

define i128 @smaxv_v2i128(<2 x i128> %a) {
; CHECK-SD-LABEL: smaxv_v2i128:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    cmp x2, x0
; CHECK-SD-NEXT:    sbcs xzr, x3, x1
; CHECK-SD-NEXT:    csel x0, x0, x2, lt
; CHECK-SD-NEXT:    csel x1, x1, x3, lt
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: smaxv_v2i128:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    cmp x1, x3
; CHECK-GI-NEXT:    cset w8, gt
; CHECK-GI-NEXT:    cmp x0, x2
; CHECK-GI-NEXT:    cset w9, hi
; CHECK-GI-NEXT:    cmp x1, x3
; CHECK-GI-NEXT:    csel w8, w9, w8, eq
; CHECK-GI-NEXT:    tst w8, #0x1
; CHECK-GI-NEXT:    csel x0, x0, x2, ne
; CHECK-GI-NEXT:    csel x1, x1, x3, ne
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i128 @llvm.vector.reduce.smax.v2i128(<2 x i128> %a)
  ret i128 %arg1
}

define i8 @uminv_v2i8(<2 x i8> %a) {
; CHECK-SD-LABEL: uminv_v2i8:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    movi d1, #0x0000ff000000ff
; CHECK-SD-NEXT:    and v0.8b, v0.8b, v1.8b
; CHECK-SD-NEXT:    uminp v0.2s, v0.2s, v0.2s
; CHECK-SD-NEXT:    fmov w0, s0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: uminv_v2i8:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    mov s1, v0.s[1]
; CHECK-GI-NEXT:    fmov w8, s0
; CHECK-GI-NEXT:    and w8, w8, #0xff
; CHECK-GI-NEXT:    fmov w9, s1
; CHECK-GI-NEXT:    cmp w8, w9, uxtb
; CHECK-GI-NEXT:    fcsel s0, s0, s1, lo
; CHECK-GI-NEXT:    fmov w0, s0
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> %a)
  ret i8 %arg1
}

define i8 @uminv_v3i8(<3 x i8> %a) {
; CHECK-SD-LABEL: uminv_v3i8:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    movi d0, #0xff00ff00ff00ff
; CHECK-SD-NEXT:    mov v0.h[0], w0
; CHECK-SD-NEXT:    mov v0.h[1], w1
; CHECK-SD-NEXT:    mov v0.h[2], w2
; CHECK-SD-NEXT:    bic v0.4h, #255, lsl #8
; CHECK-SD-NEXT:    uminv h0, v0.4h
; CHECK-SD-NEXT:    fmov w0, s0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: uminv_v3i8:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    and w8, w0, #0xff
; CHECK-GI-NEXT:    cmp w8, w1, uxtb
; CHECK-GI-NEXT:    csel w8, w0, w1, lo
; CHECK-GI-NEXT:    and w9, w8, #0xff
; CHECK-GI-NEXT:    cmp w9, w2, uxtb
; CHECK-GI-NEXT:    csel w0, w8, w2, lo
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i8 @llvm.vector.reduce.umin.v3i8(<3 x i8> %a)
  ret i8 %arg1
}

define i8 @uminv_v4i8(<4 x i8> %a) {
; CHECK-SD-LABEL: uminv_v4i8:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    bic v0.4h, #255, lsl #8
; CHECK-SD-NEXT:    uminv h0, v0.4h
; CHECK-SD-NEXT:    fmov w0, s0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: uminv_v4i8:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    umov w8, v0.h[0]
; CHECK-GI-NEXT:    umov w9, v0.h[1]
; CHECK-GI-NEXT:    umov w10, v0.h[2]
; CHECK-GI-NEXT:    umov w11, v0.h[3]
; CHECK-GI-NEXT:    and w12, w8, #0xff
; CHECK-GI-NEXT:    cmp w12, w9, uxtb
; CHECK-GI-NEXT:    and w12, w10, #0xff
; CHECK-GI-NEXT:    csel w8, w8, w9, lo
; CHECK-GI-NEXT:    cmp w12, w11, uxtb
; CHECK-GI-NEXT:    csel w9, w10, w11, lo
; CHECK-GI-NEXT:    and w10, w8, #0xff
; CHECK-GI-NEXT:    cmp w10, w9, uxtb
; CHECK-GI-NEXT:    csel w0, w8, w9, lo
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> %a)
  ret i8 %arg1
}

define i8 @uminv_v8i8(<8 x i8> %a) {
; CHECK-LABEL: uminv_v8i8:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    uminv b0, v0.8b
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %a)
  ret i8 %arg1
}

define i8 @uminv_v16i8(<16 x i8> %a) {
; CHECK-LABEL: uminv_v16i8:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    uminv b0, v0.16b
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %a)
  ret i8 %arg1
}

define i8 @uminv_v32i8(<32 x i8> %a) {
; CHECK-LABEL: uminv_v32i8:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    umin v0.16b, v0.16b, v1.16b
; CHECK-NEXT:    uminv b0, v0.16b
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> %a)
  ret i8 %arg1
}

define i16 @uminv_v2i16(<2 x i16> %a) {
; CHECK-SD-LABEL: uminv_v2i16:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    movi d1, #0x00ffff0000ffff
; CHECK-SD-NEXT:    and v0.8b, v0.8b, v1.8b
; CHECK-SD-NEXT:    uminp v0.2s, v0.2s, v0.2s
; CHECK-SD-NEXT:    fmov w0, s0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: uminv_v2i16:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    mov s1, v0.s[1]
; CHECK-GI-NEXT:    fmov w8, s0
; CHECK-GI-NEXT:    and w8, w8, #0xffff
; CHECK-GI-NEXT:    fmov w9, s1
; CHECK-GI-NEXT:    cmp w8, w9, uxth
; CHECK-GI-NEXT:    fcsel s0, s0, s1, lo
; CHECK-GI-NEXT:    fmov w0, s0
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> %a)
  ret i16 %arg1
}

define i16 @uminv_v3i16(<3 x i16> %a) {
; CHECK-SD-LABEL: uminv_v3i16:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    mov w8, #-1 // =0xffffffff
; CHECK-SD-NEXT:    mov v0.h[3], w8
; CHECK-SD-NEXT:    uminv h0, v0.4h
; CHECK-SD-NEXT:    fmov w0, s0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: uminv_v3i16:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    mov w8, #65535 // =0xffff
; CHECK-GI-NEXT:    mov v0.h[3], w8
; CHECK-GI-NEXT:    uminv h0, v0.4h
; CHECK-GI-NEXT:    fmov w0, s0
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i16 @llvm.vector.reduce.umin.v3i16(<3 x i16> %a)
  ret i16 %arg1
}

define i16 @uminv_v4i16(<4 x i16> %a) {
; CHECK-LABEL: uminv_v4i16:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    uminv h0, v0.4h
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %a)
  ret i16 %arg1
}

define i16 @uminv_v8i16(<8 x i16> %a) {
; CHECK-LABEL: uminv_v8i16:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    uminv h0, v0.8h
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %a)
  ret i16 %arg1
}

define i16 @uminv_v16i16(<16 x i16> %a) {
; CHECK-LABEL: uminv_v16i16:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    umin v0.8h, v0.8h, v1.8h
; CHECK-NEXT:    uminv h0, v0.8h
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %a)
  ret i16 %arg1
}

define i32 @uminv_v2i32(<2 x i32> %a) {
; CHECK-LABEL: uminv_v2i32:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    uminp v0.2s, v0.2s, v0.2s
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> %a)
  ret i32 %arg1
}

define i32 @uminv_v3i32(<3 x i32> %a) {
; CHECK-LABEL: uminv_v3i32:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
; CHECK-NEXT:    mov v0.s[3], w8
; CHECK-NEXT:    uminv s0, v0.4s
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i32 @llvm.vector.reduce.umin.v3i32(<3 x i32> %a)
  ret i32 %arg1
}

define i32 @uminv_v4i32(<4 x i32> %a) {
; CHECK-LABEL: uminv_v4i32:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    uminv s0, v0.4s
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %a)
  ret i32 %arg1
}

define i32 @uminv_v8i32(<8 x i32> %a) {
; CHECK-LABEL: uminv_v8i32:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
; CHECK-NEXT:    uminv s0, v0.4s
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> %a)
  ret i32 %arg1
}

define i64 @uminv_v2i64(<2 x i64> %a) {
; CHECK-SD-LABEL: uminv_v2i64:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
; CHECK-SD-NEXT:    cmhi d2, d1, d0
; CHECK-SD-NEXT:    bif v0.8b, v1.8b, v2.8b
; CHECK-SD-NEXT:    fmov x0, d0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: uminv_v2i64:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    mov d1, v0.d[1]
; CHECK-GI-NEXT:    fmov x8, d0
; CHECK-GI-NEXT:    fmov x9, d1
; CHECK-GI-NEXT:    cmp x8, x9
; CHECK-GI-NEXT:    fcsel d0, d0, d1, lo
; CHECK-GI-NEXT:    fmov x0, d0
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %a)
  ret i64 %arg1
}

define i64 @uminv_v3i64(<3 x i64> %a) {
; CHECK-SD-LABEL: uminv_v3i64:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    mov x8, #-1 // =0xffffffffffffffff
; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT:    mov v2.d[1], x8
; CHECK-SD-NEXT:    cmhi v1.2d, v2.2d, v0.2d
; CHECK-SD-NEXT:    bif v0.16b, v2.16b, v1.16b
; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
; CHECK-SD-NEXT:    cmhi d2, d1, d0
; CHECK-SD-NEXT:    bif v0.8b, v1.8b, v2.8b
; CHECK-SD-NEXT:    fmov x0, d0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: uminv_v3i64:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
; CHECK-GI-NEXT:    mov x8, #-1 // =0xffffffffffffffff
; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT:    mov v2.d[1], x8
; CHECK-GI-NEXT:    cmhi v1.2d, v2.2d, v0.2d
; CHECK-GI-NEXT:    bif v0.16b, v2.16b, v1.16b
; CHECK-GI-NEXT:    mov d1, v0.d[1]
; CHECK-GI-NEXT:    fmov x8, d0
; CHECK-GI-NEXT:    fmov x9, d1
; CHECK-GI-NEXT:    cmp x8, x9
; CHECK-GI-NEXT:    fcsel d0, d0, d1, lo
; CHECK-GI-NEXT:    fmov x0, d0
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i64 @llvm.vector.reduce.umin.v3i64(<3 x i64> %a)
  ret i64 %arg1
}

define i64 @uminv_v4i64(<4 x i64> %a) {
; CHECK-SD-LABEL: uminv_v4i64:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    cmhi v2.2d, v1.2d, v0.2d
; CHECK-SD-NEXT:    bif v0.16b, v1.16b, v2.16b
; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
; CHECK-SD-NEXT:    cmhi d2, d1, d0
; CHECK-SD-NEXT:    bif v0.8b, v1.8b, v2.8b
; CHECK-SD-NEXT:    fmov x0, d0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: uminv_v4i64:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    cmhi v2.2d, v1.2d, v0.2d
; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
; CHECK-GI-NEXT:    mov d1, v0.d[1]
; CHECK-GI-NEXT:    fmov x8, d0
; CHECK-GI-NEXT:    fmov x9, d1
; CHECK-GI-NEXT:    cmp x8, x9
; CHECK-GI-NEXT:    fcsel d0, d0, d1, lo
; CHECK-GI-NEXT:    fmov x0, d0
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %a)
  ret i64 %arg1
}

define i128 @uminv_v2i128(<2 x i128> %a) {
; CHECK-SD-LABEL: uminv_v2i128:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    cmp x0, x2
; CHECK-SD-NEXT:    sbcs xzr, x1, x3
; CHECK-SD-NEXT:    csel x0, x0, x2, lo
; CHECK-SD-NEXT:    csel x1, x1, x3, lo
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: uminv_v2i128:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    cmp x1, x3
; CHECK-GI-NEXT:    cset w8, lo
; CHECK-GI-NEXT:    cmp x0, x2
; CHECK-GI-NEXT:    cset w9, lo
; CHECK-GI-NEXT:    cmp x1, x3
; CHECK-GI-NEXT:    csel w8, w9, w8, eq
; CHECK-GI-NEXT:    tst w8, #0x1
; CHECK-GI-NEXT:    csel x0, x0, x2, ne
; CHECK-GI-NEXT:    csel x1, x1, x3, ne
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i128 @llvm.vector.reduce.umin.v2i128(<2 x i128> %a)
  ret i128 %arg1
}

define i8 @umaxv_v2i8(<2 x i8> %a) {
; CHECK-SD-LABEL: umaxv_v2i8:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    movi d1, #0x0000ff000000ff
; CHECK-SD-NEXT:    and v0.8b, v0.8b, v1.8b
; CHECK-SD-NEXT:    umaxp v0.2s, v0.2s, v0.2s
; CHECK-SD-NEXT:    fmov w0, s0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: umaxv_v2i8:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    mov s1, v0.s[1]
; CHECK-GI-NEXT:    fmov w8, s0
; CHECK-GI-NEXT:    and w8, w8, #0xff
; CHECK-GI-NEXT:    fmov w9, s1
; CHECK-GI-NEXT:    cmp w8, w9, uxtb
; CHECK-GI-NEXT:    fcsel s0, s0, s1, hi
; CHECK-GI-NEXT:    fmov w0, s0
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> %a)
  ret i8 %arg1
}

define i8 @umaxv_v3i8(<3 x i8> %a) {
; CHECK-SD-LABEL: umaxv_v3i8:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    movi v0.2d, #0000000000000000
; CHECK-SD-NEXT:    mov v0.h[0], w0
; CHECK-SD-NEXT:    mov v0.h[1], w1
; CHECK-SD-NEXT:    mov v0.h[2], w2
; CHECK-SD-NEXT:    bic v0.4h, #255, lsl #8
; CHECK-SD-NEXT:    umaxv h0, v0.4h
; CHECK-SD-NEXT:    fmov w0, s0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: umaxv_v3i8:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    and w8, w0, #0xff
; CHECK-GI-NEXT:    cmp w8, w1, uxtb
; CHECK-GI-NEXT:    csel w8, w0, w1, hi
; CHECK-GI-NEXT:    and w9, w8, #0xff
; CHECK-GI-NEXT:    cmp w9, w2, uxtb
; CHECK-GI-NEXT:    csel w0, w8, w2, hi
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i8 @llvm.vector.reduce.umax.v3i8(<3 x i8> %a)
  ret i8 %arg1
}

define i8 @umaxv_v4i8(<4 x i8> %a) {
; CHECK-SD-LABEL: umaxv_v4i8:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    bic v0.4h, #255, lsl #8
; CHECK-SD-NEXT:    umaxv h0, v0.4h
; CHECK-SD-NEXT:    fmov w0, s0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: umaxv_v4i8:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    umov w8, v0.h[0]
; CHECK-GI-NEXT:    umov w9, v0.h[1]
; CHECK-GI-NEXT:    umov w10, v0.h[2]
; CHECK-GI-NEXT:    umov w11, v0.h[3]
; CHECK-GI-NEXT:    and w12, w8, #0xff
; CHECK-GI-NEXT:    cmp w12, w9, uxtb
; CHECK-GI-NEXT:    and w12, w10, #0xff
; CHECK-GI-NEXT:    csel w8, w8, w9, hi
; CHECK-GI-NEXT:    cmp w12, w11, uxtb
; CHECK-GI-NEXT:    csel w9, w10, w11, hi
; CHECK-GI-NEXT:    and w10, w8, #0xff
; CHECK-GI-NEXT:    cmp w10, w9, uxtb
; CHECK-GI-NEXT:    csel w0, w8, w9, hi
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> %a)
  ret i8 %arg1
}

define i8 @umaxv_v8i8(<8 x i8> %a) {
; CHECK-LABEL: umaxv_v8i8:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    umaxv b0, v0.8b
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %a)
  ret i8 %arg1
}

define i8 @umaxv_v16i8(<16 x i8> %a) {
; CHECK-LABEL: umaxv_v16i8:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    umaxv b0, v0.16b
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %a)
  ret i8 %arg1
}

define i8 @umaxv_v32i8(<32 x i8> %a) {
; CHECK-LABEL: umaxv_v32i8:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    umax v0.16b, v0.16b, v1.16b
; CHECK-NEXT:    umaxv b0, v0.16b
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> %a)
  ret i8 %arg1
}

define i16 @umaxv_v2i16(<2 x i16> %a) {
; CHECK-SD-LABEL: umaxv_v2i16:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    movi d1, #0x00ffff0000ffff
; CHECK-SD-NEXT:    and v0.8b, v0.8b, v1.8b
; CHECK-SD-NEXT:    umaxp v0.2s, v0.2s, v0.2s
; CHECK-SD-NEXT:    fmov w0, s0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: umaxv_v2i16:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    mov s1, v0.s[1]
; CHECK-GI-NEXT:    fmov w8, s0
; CHECK-GI-NEXT:    and w8, w8, #0xffff
; CHECK-GI-NEXT:    fmov w9, s1
; CHECK-GI-NEXT:    cmp w8, w9, uxth
; CHECK-GI-NEXT:    fcsel s0, s0, s1, hi
; CHECK-GI-NEXT:    fmov w0, s0
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> %a)
  ret i16 %arg1
}

define i16 @umaxv_v3i16(<3 x i16> %a) {
; CHECK-SD-LABEL: umaxv_v3i16:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    mov v0.h[3], wzr
; CHECK-SD-NEXT:    umaxv h0, v0.4h
; CHECK-SD-NEXT:    fmov w0, s0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: umaxv_v3i16:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    mov w8, #0 // =0x0
; CHECK-GI-NEXT:    mov v0.h[3], w8
; CHECK-GI-NEXT:    umaxv h0, v0.4h
; CHECK-GI-NEXT:    fmov w0, s0
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i16 @llvm.vector.reduce.umax.v3i16(<3 x i16> %a)
  ret i16 %arg1
}

define i16 @umaxv_v4i16(<4 x i16> %a) {
; CHECK-LABEL: umaxv_v4i16:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    umaxv h0, v0.4h
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %a)
  ret i16 %arg1
}

define i16 @umaxv_v8i16(<8 x i16> %a) {
; CHECK-LABEL: umaxv_v8i16:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    umaxv h0, v0.8h
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %a)
  ret i16 %arg1
}

define i16 @umaxv_v16i16(<16 x i16> %a) {
; CHECK-LABEL: umaxv_v16i16:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    umax v0.8h, v0.8h, v1.8h
; CHECK-NEXT:    umaxv h0, v0.8h
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> %a)
  ret i16 %arg1
}

define i32 @umaxv_v2i32(<2 x i32> %a) {
; CHECK-LABEL: umaxv_v2i32:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    umaxp v0.2s, v0.2s, v0.2s
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> %a)
  ret i32 %arg1
}

define i32 @umaxv_v3i32(<3 x i32> %a) {
; CHECK-LABEL: umaxv_v3i32:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    mov v0.s[3], wzr
; CHECK-NEXT:    umaxv s0, v0.4s
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i32 @llvm.vector.reduce.umax.v3i32(<3 x i32> %a)
  ret i32 %arg1
}

define i32 @umaxv_v4i32(<4 x i32> %a) {
; CHECK-LABEL: umaxv_v4i32:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    umaxv s0, v0.4s
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %a)
  ret i32 %arg1
}

define i32 @umaxv_v8i32(<8 x i32> %a) {
; CHECK-LABEL: umaxv_v8i32:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
; CHECK-NEXT:    umaxv s0, v0.4s
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
entry:
  %arg1 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %a)
  ret i32 %arg1
}

define i64 @umaxv_v2i64(<2 x i64> %a) {
; CHECK-SD-LABEL: umaxv_v2i64:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
; CHECK-SD-NEXT:    cmhi d2, d0, d1
; CHECK-SD-NEXT:    bif v0.8b, v1.8b, v2.8b
; CHECK-SD-NEXT:    fmov x0, d0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: umaxv_v2i64:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    mov d1, v0.d[1]
; CHECK-GI-NEXT:    fmov x8, d0
; CHECK-GI-NEXT:    fmov x9, d1
; CHECK-GI-NEXT:    cmp x8, x9
; CHECK-GI-NEXT:    fcsel d0, d0, d1, hi
; CHECK-GI-NEXT:    fmov x0, d0
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %a)
  ret i64 %arg1
}

define i64 @umaxv_v3i64(<3 x i64> %a) {
; CHECK-SD-LABEL: umaxv_v3i64:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
; CHECK-SD-NEXT:    mov v3.16b, v2.16b
; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT:    mov v3.d[1], xzr
; CHECK-SD-NEXT:    cmhi v3.2d, v0.2d, v3.2d
; CHECK-SD-NEXT:    ext v4.16b, v3.16b, v3.16b, #8
; CHECK-SD-NEXT:    bif v0.16b, v2.16b, v3.16b
; CHECK-SD-NEXT:    and v1.8b, v1.8b, v4.8b
; CHECK-SD-NEXT:    cmhi d2, d0, d1
; CHECK-SD-NEXT:    bif v0.8b, v1.8b, v2.8b
; CHECK-SD-NEXT:    fmov x0, d0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: umaxv_v3i64:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT:    mov v2.d[1], xzr
; CHECK-GI-NEXT:    cmhi v1.2d, v0.2d, v2.2d
; CHECK-GI-NEXT:    bif v0.16b, v2.16b, v1.16b
; CHECK-GI-NEXT:    mov d1, v0.d[1]
; CHECK-GI-NEXT:    fmov x8, d0
; CHECK-GI-NEXT:    fmov x9, d1
; CHECK-GI-NEXT:    cmp x8, x9
; CHECK-GI-NEXT:    fcsel d0, d0, d1, hi
; CHECK-GI-NEXT:    fmov x0, d0
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i64 @llvm.vector.reduce.umax.v3i64(<3 x i64> %a)
  ret i64 %arg1
}

define i64 @umaxv_v4i64(<4 x i64> %a) {
; CHECK-SD-LABEL: umaxv_v4i64:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    cmhi v2.2d, v0.2d, v1.2d
; CHECK-SD-NEXT:    bif v0.16b, v1.16b, v2.16b
; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
; CHECK-SD-NEXT:    cmhi d2, d0, d1
; CHECK-SD-NEXT:    bif v0.8b, v1.8b, v2.8b
; CHECK-SD-NEXT:    fmov x0, d0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: umaxv_v4i64:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    cmhi v2.2d, v0.2d, v1.2d
; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
; CHECK-GI-NEXT:    mov d1, v0.d[1]
; CHECK-GI-NEXT:    fmov x8, d0
; CHECK-GI-NEXT:    fmov x9, d1
; CHECK-GI-NEXT:    cmp x8, x9
; CHECK-GI-NEXT:    fcsel d0, d0, d1, hi
; CHECK-GI-NEXT:    fmov x0, d0
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %a)
  ret i64 %arg1
}

define i128 @umaxv_v2i128(<2 x i128> %a) {
; CHECK-SD-LABEL: umaxv_v2i128:
; CHECK-SD:       // %bb.0: // %entry
; CHECK-SD-NEXT:    cmp x2, x0
; CHECK-SD-NEXT:    sbcs xzr, x3, x1
; CHECK-SD-NEXT:    csel x0, x0, x2, lo
; CHECK-SD-NEXT:    csel x1, x1, x3, lo
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: umaxv_v2i128:
; CHECK-GI:       // %bb.0: // %entry
; CHECK-GI-NEXT:    cmp x1, x3
; CHECK-GI-NEXT:    cset w8, hi
; CHECK-GI-NEXT:    cmp x0, x2
; CHECK-GI-NEXT:    cset w9, hi
; CHECK-GI-NEXT:    cmp x1, x3
; CHECK-GI-NEXT:    csel w8, w9, w8, eq
; CHECK-GI-NEXT:    tst w8, #0x1
; CHECK-GI-NEXT:    csel x0, x0, x2, ne
; CHECK-GI-NEXT:    csel x1, x1, x3, ne
; CHECK-GI-NEXT:    ret
entry:
  %arg1 = call i128 @llvm.vector.reduce.umax.v2i128(<2 x i128> %a)
  ret i128 %arg1
}