llvm/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=slp-vectorizer -S %s | FileCheck %s
; RUN: opt -aa-pipeline=basic-aa -passes='slp-vectorizer' -S %s | FileCheck %s

target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
target triple = "arm64-apple-ios5.0.0"

define void @select_umin_8xi16(ptr %ptr, i16 %x) {
; CHECK-LABEL: @select_umin_8xi16(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
; CHECK-NEXT:    ret void
;
entry:
  %l.0 = load i16, ptr %ptr
  %cmp.0 = icmp ult i16 %l.0, 16383
  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
  store i16 %s.0, ptr %ptr, align 2

  %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1
  %l.1 = load i16, ptr %gep.1
  %cmp.1 = icmp ult i16 %l.1, 16383
  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
  store i16 %s.1, ptr %gep.1, align 2

  %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2
  %l.2 = load i16, ptr %gep.2
  %cmp.2 = icmp ult i16 %l.2, 16383
  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
  store i16 %s.2, ptr %gep.2, align 2

  %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3
  %l.3 = load i16, ptr %gep.3
  %cmp.3 = icmp ult i16 %l.3, 16383
  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
  store i16 %s.3, ptr %gep.3, align 2

  %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4
  %l.4 = load i16, ptr %gep.4
  %cmp.4 = icmp ult i16 %l.4, 16383
  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
  store i16 %s.4, ptr %gep.4, align 2

  %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5
  %l.5 = load i16, ptr %gep.5
  %cmp.5 = icmp ult i16 %l.5, 16383
  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
  store i16 %s.5, ptr %gep.5, align 2

  %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6
  %l.6 = load i16, ptr %gep.6
  %cmp.6 = icmp ult i16 %l.6, 16383
  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
  store i16 %s.6, ptr %gep.6, align 2

  %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7
  %l.7 = load i16, ptr %gep.7
  %cmp.7 = icmp ult i16 %l.7, 16383
  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
  store i16 %s.7, ptr %gep.7, align 2
  ret void
}

define void @select_umin_4xi32(ptr %ptr, i32 %x) {
; CHECK-LABEL: @select_umin_4xi32(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %l.0 = load i32, ptr %ptr
  %cmp.0 = icmp ult i32 %l.0, 16383
  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
  store i32 %s.0, ptr %ptr, align 4

  %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1
  %l.1 = load i32, ptr %gep.1
  %cmp.1 = icmp ult i32 %l.1, 16383
  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
  store i32 %s.1, ptr %gep.1, align 4

  %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2
  %l.2 = load i32, ptr %gep.2
  %cmp.2 = icmp ult i32 %l.2, 16383
  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
  store i32 %s.2, ptr %gep.2, align 4

  %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3
  %l.3 = load i32, ptr %gep.3
  %cmp.3 = icmp ult i32 %l.3, 16383
  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
  store i32 %s.3, ptr %gep.3, align 4

  ret void
}

define void @select_ule_ugt_mix_4xi32(ptr %ptr, i32 %x) {
; CHECK-LABEL: @select_ule_ugt_mix_4xi32(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT:    [[TMP3:%.*]] = icmp ugt <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> [[TMP3]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT:    [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[PTR]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %l.0 = load i32, ptr %ptr
  %cmp.0 = icmp ult i32 %l.0, 16383
  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
  store i32 %s.0, ptr %ptr, align 4

  %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1
  %l.1 = load i32, ptr %gep.1
  %cmp.1 = icmp ugt i32 %l.1, 16383
  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
  store i32 %s.1, ptr %gep.1, align 4

  %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2
  %l.2 = load i32, ptr %gep.2
  %cmp.2 = icmp ult i32 %l.2, 16383
  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
  store i32 %s.2, ptr %gep.2, align 4

  %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3
  %l.3 = load i32, ptr %gep.3
  %cmp.3 = icmp ugt i32 %l.3, 16383
  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
  store i32 %s.3, ptr %gep.3, align 4

  ret void
}

; There is no <2 x i64> version of umin, but we can efficiently lower
; compare/select pairs with uniform predicates.
define void @select_umin_2xi64(ptr %ptr, i64 %x) {
; CHECK-LABEL: @select_umin_2xi64(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <2 x i64> [[TMP1]], <i64 16383, i64 16383>
; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %l.0 = load i64, ptr %ptr
  %cmp.0 = icmp ult i64 %l.0, 16383
  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
  store i64 %s.0, ptr %ptr, align 4

  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1
  %l.1 = load i64, ptr %gep.1
  %cmp.1 = icmp ult i64 %l.1, 16383
  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
  store i64 %s.1, ptr %gep.1, align 4

  ret void
}


define void @select_umin_ule_8xi16(ptr %ptr, i16 %x) {
; CHECK-LABEL: @select_umin_ule_8xi16(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
; CHECK-NEXT:    ret void
;
entry:
  %l.0 = load i16, ptr %ptr
  %cmp.0 = icmp ule i16 %l.0, 16383
  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
  store i16 %s.0, ptr %ptr, align 2

  %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1
  %l.1 = load i16, ptr %gep.1
  %cmp.1 = icmp ule i16 %l.1, 16383
  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
  store i16 %s.1, ptr %gep.1, align 2

  %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2
  %l.2 = load i16, ptr %gep.2
  %cmp.2 = icmp ule i16 %l.2, 16383
  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
  store i16 %s.2, ptr %gep.2, align 2

  %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3
  %l.3 = load i16, ptr %gep.3
  %cmp.3 = icmp ule i16 %l.3, 16383
  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
  store i16 %s.3, ptr %gep.3, align 2

  %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4
  %l.4 = load i16, ptr %gep.4
  %cmp.4 = icmp ule i16 %l.4, 16383
  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
  store i16 %s.4, ptr %gep.4, align 2

  %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5
  %l.5 = load i16, ptr %gep.5
  %cmp.5 = icmp ule i16 %l.5, 16383
  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
  store i16 %s.5, ptr %gep.5, align 2

  %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6
  %l.6 = load i16, ptr %gep.6
  %cmp.6 = icmp ule i16 %l.6, 16383
  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
  store i16 %s.6, ptr %gep.6, align 2

  %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7
  %l.7 = load i16, ptr %gep.7
  %cmp.7 = icmp ule i16 %l.7, 16383
  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
  store i16 %s.7, ptr %gep.7, align 2
  ret void
}

define void @select_umin_ule_4xi32(ptr %ptr, i32 %x) {
; CHECK-LABEL: @select_umin_ule_4xi32(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %l.0 = load i32, ptr %ptr
  %cmp.0 = icmp ule i32 %l.0, 16383
  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
  store i32 %s.0, ptr %ptr, align 4

  %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1
  %l.1 = load i32, ptr %gep.1
  %cmp.1 = icmp ule i32 %l.1, 16383
  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
  store i32 %s.1, ptr %gep.1, align 4

  %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2
  %l.2 = load i32, ptr %gep.2
  %cmp.2 = icmp ule i32 %l.2, 16383
  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
  store i32 %s.2, ptr %gep.2, align 4

  %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3
  %l.3 = load i32, ptr %gep.3
  %cmp.3 = icmp ule i32 %l.3, 16383
  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
  store i32 %s.3, ptr %gep.3, align 4

  ret void
}

; There is no <2 x i64> version of umin, but we can efficiently lower
; compare/select pairs with uniform predicates.
define void @select_umin_ule_2xi64(ptr %ptr, i64 %x) {
; CHECK-LABEL: @select_umin_ule_2xi64(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule <2 x i64> [[TMP1]], <i64 16383, i64 16383>
; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %l.0 = load i64, ptr %ptr
  %cmp.0 = icmp ule i64 %l.0, 16383
  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
  store i64 %s.0, ptr %ptr, align 4

  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1
  %l.1 = load i64, ptr %gep.1
  %cmp.1 = icmp ule i64 %l.1, 16383
  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
  store i64 %s.1, ptr %gep.1, align 4

  ret void
}

define void @select_smin_8xi16(ptr %ptr, i16 %x) {
; CHECK-LABEL: @select_smin_8xi16(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
; CHECK-NEXT:    ret void
;
entry:
  %l.0 = load i16, ptr %ptr
  %cmp.0 = icmp slt i16 %l.0, 16383
  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
  store i16 %s.0, ptr %ptr, align 2

  %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1
  %l.1 = load i16, ptr %gep.1
  %cmp.1 = icmp slt i16 %l.1, 16383
  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
  store i16 %s.1, ptr %gep.1, align 2

  %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2
  %l.2 = load i16, ptr %gep.2
  %cmp.2 = icmp slt i16 %l.2, 16383
  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
  store i16 %s.2, ptr %gep.2, align 2

  %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3
  %l.3 = load i16, ptr %gep.3
  %cmp.3 = icmp slt i16 %l.3, 16383
  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
  store i16 %s.3, ptr %gep.3, align 2

  %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4
  %l.4 = load i16, ptr %gep.4
  %cmp.4 = icmp slt i16 %l.4, 16383
  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
  store i16 %s.4, ptr %gep.4, align 2

  %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5
  %l.5 = load i16, ptr %gep.5
  %cmp.5 = icmp slt i16 %l.5, 16383
  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
  store i16 %s.5, ptr %gep.5, align 2

  %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6
  %l.6 = load i16, ptr %gep.6
  %cmp.6 = icmp slt i16 %l.6, 16383
  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
  store i16 %s.6, ptr %gep.6, align 2

  %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7
  %l.7 = load i16, ptr %gep.7
  %cmp.7 = icmp slt i16 %l.7, 16383
  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
  store i16 %s.7, ptr %gep.7, align 2
  ret void
}

define void @select_smin_4xi32(ptr %ptr, i32 %x) {
; CHECK-LABEL: @select_smin_4xi32(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %l.0 = load i32, ptr %ptr
  %cmp.0 = icmp slt i32 %l.0, 16383
  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
  store i32 %s.0, ptr %ptr, align 4

  %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1
  %l.1 = load i32, ptr %gep.1
  %cmp.1 = icmp slt i32 %l.1, 16383
  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
  store i32 %s.1, ptr %gep.1, align 4

  %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2
  %l.2 = load i32, ptr %gep.2
  %cmp.2 = icmp slt i32 %l.2, 16383
  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
  store i32 %s.2, ptr %gep.2, align 4

  %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3
  %l.3 = load i32, ptr %gep.3
  %cmp.3 = icmp slt i32 %l.3, 16383
  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
  store i32 %s.3, ptr %gep.3, align 4

  ret void
}

; There is no <2 x i64> version of smin, but we can efficiently lower
; compare/select pairs with uniform predicates.
define void @select_smin_2xi64(ptr %ptr, i64 %x) {
; CHECK-LABEL: @select_smin_2xi64(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <2 x i64> [[TMP1]], <i64 16383, i64 16383>
; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %l.0 = load i64, ptr %ptr
  %cmp.0 = icmp slt i64 %l.0, 16383
  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
  store i64 %s.0, ptr %ptr, align 4

  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1
  %l.1 = load i64, ptr %gep.1
  %cmp.1 = icmp slt i64 %l.1, 16383
  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
  store i64 %s.1, ptr %gep.1, align 4

  ret void
}

define void @select_smin_sle_8xi16(ptr %ptr, i16 %x) {
; CHECK-LABEL: @select_smin_sle_8xi16(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
; CHECK-NEXT:    [[TMP2:%.*]] = icmp sle <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
; CHECK-NEXT:    ret void
;
entry:
  %l.0 = load i16, ptr %ptr
  %cmp.0 = icmp sle i16 %l.0, 16383
  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
  store i16 %s.0, ptr %ptr, align 2

  %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1
  %l.1 = load i16, ptr %gep.1
  %cmp.1 = icmp sle i16 %l.1, 16383
  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
  store i16 %s.1, ptr %gep.1, align 2

  %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2
  %l.2 = load i16, ptr %gep.2
  %cmp.2 = icmp sle i16 %l.2, 16383
  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
  store i16 %s.2, ptr %gep.2, align 2

  %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3
  %l.3 = load i16, ptr %gep.3
  %cmp.3 = icmp sle i16 %l.3, 16383
  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
  store i16 %s.3, ptr %gep.3, align 2

  %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4
  %l.4 = load i16, ptr %gep.4
  %cmp.4 = icmp sle i16 %l.4, 16383
  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
  store i16 %s.4, ptr %gep.4, align 2

  %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5
  %l.5 = load i16, ptr %gep.5
  %cmp.5 = icmp sle i16 %l.5, 16383
  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
  store i16 %s.5, ptr %gep.5, align 2

  %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6
  %l.6 = load i16, ptr %gep.6
  %cmp.6 = icmp sle i16 %l.6, 16383
  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
  store i16 %s.6, ptr %gep.6, align 2

  %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7
  %l.7 = load i16, ptr %gep.7
  %cmp.7 = icmp sle i16 %l.7, 16383
  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
  store i16 %s.7, ptr %gep.7, align 2
  ret void
}

define void @select_smin_sle_4xi32(ptr %ptr, i32 %x) {
; CHECK-LABEL: @select_smin_sle_4xi32(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = icmp sle <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %l.0 = load i32, ptr %ptr
  %cmp.0 = icmp sle i32 %l.0, 16383
  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
  store i32 %s.0, ptr %ptr, align 4

  %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1
  %l.1 = load i32, ptr %gep.1
  %cmp.1 = icmp sle i32 %l.1, 16383
  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
  store i32 %s.1, ptr %gep.1, align 4

  %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2
  %l.2 = load i32, ptr %gep.2
  %cmp.2 = icmp sle i32 %l.2, 16383
  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
  store i32 %s.2, ptr %gep.2, align 4

  %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3
  %l.3 = load i32, ptr %gep.3
  %cmp.3 = icmp sle i32 %l.3, 16383
  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
  store i32 %s.3, ptr %gep.3, align 4

  ret void
}

; There is no <2 x i64> version of smin, but we can efficiently lower
; compare/select pairs with uniform predicates.
define void @select_smin_sle_2xi64(ptr %ptr, i64 %x) {
; CHECK-LABEL: @select_smin_sle_2xi64(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
; CHECK-NEXT:    [[TMP2:%.*]] = icmp sle <2 x i64> [[TMP1]], <i64 16383, i64 16383>
; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %l.0 = load i64, ptr %ptr
  %cmp.0 = icmp sle i64 %l.0, 16383
  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
  store i64 %s.0, ptr %ptr, align 4

  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1
  %l.1 = load i64, ptr %gep.1
  %cmp.1 = icmp sle i64 %l.1, 16383
  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
  store i64 %s.1, ptr %gep.1, align 4

  ret void
}
define void @select_umax_8xi16(ptr %ptr, i16 %x) {
; CHECK-LABEL: @select_umax_8xi16(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
; CHECK-NEXT:    ret void
;
entry:
  %l.0 = load i16, ptr %ptr
  %cmp.0 = icmp ugt i16 %l.0, 16383
  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
  store i16 %s.0, ptr %ptr, align 2

  %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1
  %l.1 = load i16, ptr %gep.1
  %cmp.1 = icmp ugt i16 %l.1, 16383
  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
  store i16 %s.1, ptr %gep.1, align 2

  %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2
  %l.2 = load i16, ptr %gep.2
  %cmp.2 = icmp ugt i16 %l.2, 16383
  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
  store i16 %s.2, ptr %gep.2, align 2

  %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3
  %l.3 = load i16, ptr %gep.3
  %cmp.3 = icmp ugt i16 %l.3, 16383
  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
  store i16 %s.3, ptr %gep.3, align 2

  %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4
  %l.4 = load i16, ptr %gep.4
  %cmp.4 = icmp ugt i16 %l.4, 16383
  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
  store i16 %s.4, ptr %gep.4, align 2

  %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5
  %l.5 = load i16, ptr %gep.5
  %cmp.5 = icmp ugt i16 %l.5, 16383
  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
  store i16 %s.5, ptr %gep.5, align 2

  %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6
  %l.6 = load i16, ptr %gep.6
  %cmp.6 = icmp ugt i16 %l.6, 16383
  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
  store i16 %s.6, ptr %gep.6, align 2

  %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7
  %l.7 = load i16, ptr %gep.7
  %cmp.7 = icmp ugt i16 %l.7, 16383
  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
  store i16 %s.7, ptr %gep.7, align 2
  ret void
}

define void @select_umax_4xi32(ptr %ptr, i32 %x) {
; CHECK-LABEL: @select_umax_4xi32(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %l.0 = load i32, ptr %ptr
  %cmp.0 = icmp ugt i32 %l.0, 16383
  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
  store i32 %s.0, ptr %ptr, align 4

  %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1
  %l.1 = load i32, ptr %gep.1
  %cmp.1 = icmp ugt i32 %l.1, 16383
  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
  store i32 %s.1, ptr %gep.1, align 4

  %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2
  %l.2 = load i32, ptr %gep.2
  %cmp.2 = icmp ugt i32 %l.2, 16383
  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
  store i32 %s.2, ptr %gep.2, align 4

  %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3
  %l.3 = load i32, ptr %gep.3
  %cmp.3 = icmp ugt i32 %l.3, 16383
  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
  store i32 %s.3, ptr %gep.3, align 4

  ret void
}

; There is no <2 x i64> version of umax, but we can efficiently lower
; compare/select pairs with uniform predicates.
define void @select_umax_2xi64(ptr %ptr, i64 %x) {
; CHECK-LABEL: @select_umax_2xi64(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <2 x i64> [[TMP1]], <i64 16383, i64 16383>
; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %l.0 = load i64, ptr %ptr
  %cmp.0 = icmp ugt i64 %l.0, 16383
  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
  store i64 %s.0, ptr %ptr, align 4

  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1
  %l.1 = load i64, ptr %gep.1
  %cmp.1 = icmp ugt i64 %l.1, 16383
  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
  store i64 %s.1, ptr %gep.1, align 4

  ret void
}

define void @select_umax_uge_8xi16(ptr %ptr, i16 %x) {
; CHECK-LABEL: @select_umax_uge_8xi16(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
; CHECK-NEXT:    [[TMP2:%.*]] = icmp uge <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
; CHECK-NEXT:    ret void
;
entry:
  %l.0 = load i16, ptr %ptr
  %cmp.0 = icmp uge i16 %l.0, 16383
  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
  store i16 %s.0, ptr %ptr, align 2

  %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1
  %l.1 = load i16, ptr %gep.1
  %cmp.1 = icmp uge i16 %l.1, 16383
  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
  store i16 %s.1, ptr %gep.1, align 2

  %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2
  %l.2 = load i16, ptr %gep.2
  %cmp.2 = icmp uge i16 %l.2, 16383
  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
  store i16 %s.2, ptr %gep.2, align 2

  %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3
  %l.3 = load i16, ptr %gep.3
  %cmp.3 = icmp uge i16 %l.3, 16383
  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
  store i16 %s.3, ptr %gep.3, align 2

  %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4
  %l.4 = load i16, ptr %gep.4
  %cmp.4 = icmp uge i16 %l.4, 16383
  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
  store i16 %s.4, ptr %gep.4, align 2

  %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5
  %l.5 = load i16, ptr %gep.5
  %cmp.5 = icmp uge i16 %l.5, 16383
  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
  store i16 %s.5, ptr %gep.5, align 2

  %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6
  %l.6 = load i16, ptr %gep.6
  %cmp.6 = icmp uge i16 %l.6, 16383
  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
  store i16 %s.6, ptr %gep.6, align 2

  %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7
  %l.7 = load i16, ptr %gep.7
  %cmp.7 = icmp uge i16 %l.7, 16383
  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
  store i16 %s.7, ptr %gep.7, align 2
  ret void
}

define void @select_umax_uge_4xi32(ptr %ptr, i32 %x) {
; CHECK-LABEL: @select_umax_uge_4xi32(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = icmp uge <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %l.0 = load i32, ptr %ptr
  %cmp.0 = icmp uge i32 %l.0, 16383
  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
  store i32 %s.0, ptr %ptr, align 4

  %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1
  %l.1 = load i32, ptr %gep.1
  %cmp.1 = icmp uge i32 %l.1, 16383
  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
  store i32 %s.1, ptr %gep.1, align 4

  %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2
  %l.2 = load i32, ptr %gep.2
  %cmp.2 = icmp uge i32 %l.2, 16383
  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
  store i32 %s.2, ptr %gep.2, align 4

  %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3
  %l.3 = load i32, ptr %gep.3
  %cmp.3 = icmp uge i32 %l.3, 16383
  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
  store i32 %s.3, ptr %gep.3, align 4

  ret void
}

; There is no <2 x i64> version of umax, but we can efficiently lower
; compare/select pairs with uniform predicates.
define void @select_umax_uge_2xi64(ptr %ptr, i64 %x) {
; CHECK-LABEL: @select_umax_uge_2xi64(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
; CHECK-NEXT:    [[TMP2:%.*]] = icmp uge <2 x i64> [[TMP1]], <i64 16383, i64 16383>
; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %l.0 = load i64, ptr %ptr
  %cmp.0 = icmp uge i64 %l.0, 16383
  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
  store i64 %s.0, ptr %ptr, align 4

  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1
  %l.1 = load i64, ptr %gep.1
  %cmp.1 = icmp uge i64 %l.1, 16383
  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
  store i64 %s.1, ptr %gep.1, align 4

  ret void
}

define void @select_smax_8xi16(ptr %ptr, i16 %x) {
; CHECK-LABEL: @select_smax_8xi16(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
; CHECK-NEXT:    ret void
;
entry:
  %l.0 = load i16, ptr %ptr
  %cmp.0 = icmp sgt i16 %l.0, 16383
  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
  store i16 %s.0, ptr %ptr, align 2

  %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1
  %l.1 = load i16, ptr %gep.1
  %cmp.1 = icmp sgt i16 %l.1, 16383
  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
  store i16 %s.1, ptr %gep.1, align 2

  %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2
  %l.2 = load i16, ptr %gep.2
  %cmp.2 = icmp sgt i16 %l.2, 16383
  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
  store i16 %s.2, ptr %gep.2, align 2

  %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3
  %l.3 = load i16, ptr %gep.3
  %cmp.3 = icmp sgt i16 %l.3, 16383
  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
  store i16 %s.3, ptr %gep.3, align 2

  %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4
  %l.4 = load i16, ptr %gep.4
  %cmp.4 = icmp sgt i16 %l.4, 16383
  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
  store i16 %s.4, ptr %gep.4, align 2

  %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5
  %l.5 = load i16, ptr %gep.5
  %cmp.5 = icmp sgt i16 %l.5, 16383
  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
  store i16 %s.5, ptr %gep.5, align 2

  %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6
  %l.6 = load i16, ptr %gep.6
  %cmp.6 = icmp sgt i16 %l.6, 16383
  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
  store i16 %s.6, ptr %gep.6, align 2

  %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7
  %l.7 = load i16, ptr %gep.7
  %cmp.7 = icmp sgt i16 %l.7, 16383
  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
  store i16 %s.7, ptr %gep.7, align 2
  ret void
}

define void @select_smax_4xi32(ptr %ptr, i32 %x) {
; CHECK-LABEL: @select_smax_4xi32(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %l.0 = load i32, ptr %ptr
  %cmp.0 = icmp sgt i32 %l.0, 16383
  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
  store i32 %s.0, ptr %ptr, align 4

  %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1
  %l.1 = load i32, ptr %gep.1
  %cmp.1 = icmp sgt i32 %l.1, 16383
  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
  store i32 %s.1, ptr %gep.1, align 4

  %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2
  %l.2 = load i32, ptr %gep.2
  %cmp.2 = icmp sgt i32 %l.2, 16383
  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
  store i32 %s.2, ptr %gep.2, align 4

  %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3
  %l.3 = load i32, ptr %gep.3
  %cmp.3 = icmp sgt i32 %l.3, 16383
  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
  store i32 %s.3, ptr %gep.3, align 4

  ret void
}

; There is no <2 x i64> version of smax, but we can efficiently lower
; compare/select pairs with uniform predicates.
define void @select_smax_2xi64(ptr %ptr, i64 %x) {
; CHECK-LABEL: @select_smax_2xi64(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <2 x i64> [[TMP1]], <i64 16383, i64 16383>
; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %l.0 = load i64, ptr %ptr
  %cmp.0 = icmp sgt i64 %l.0, 16383
  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
  store i64 %s.0, ptr %ptr, align 4

  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1
  %l.1 = load i64, ptr %gep.1
  %cmp.1 = icmp sgt i64 %l.1, 16383
  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
  store i64 %s.1, ptr %gep.1, align 4

  ret void
}


define void @select_smax_sge_8xi16(ptr %ptr, i16 %x) {
; CHECK-LABEL: @select_smax_sge_8xi16(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
; CHECK-NEXT:    [[TMP2:%.*]] = icmp sge <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
; CHECK-NEXT:    ret void
;
entry:
  %l.0 = load i16, ptr %ptr
  %cmp.0 = icmp sge i16 %l.0, 16383
  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
  store i16 %s.0, ptr %ptr, align 2

  %gep.1 = getelementptr inbounds i16, ptr %ptr, i16 1
  %l.1 = load i16, ptr %gep.1
  %cmp.1 = icmp sge i16 %l.1, 16383
  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
  store i16 %s.1, ptr %gep.1, align 2

  %gep.2 = getelementptr inbounds i16, ptr %ptr, i16 2
  %l.2 = load i16, ptr %gep.2
  %cmp.2 = icmp sge i16 %l.2, 16383
  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
  store i16 %s.2, ptr %gep.2, align 2

  %gep.3 = getelementptr inbounds i16, ptr %ptr, i16 3
  %l.3 = load i16, ptr %gep.3
  %cmp.3 = icmp sge i16 %l.3, 16383
  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
  store i16 %s.3, ptr %gep.3, align 2

  %gep.4 = getelementptr inbounds i16, ptr %ptr, i16 4
  %l.4 = load i16, ptr %gep.4
  %cmp.4 = icmp sge i16 %l.4, 16383
  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
  store i16 %s.4, ptr %gep.4, align 2

  %gep.5 = getelementptr inbounds i16, ptr %ptr, i16 5
  %l.5 = load i16, ptr %gep.5
  %cmp.5 = icmp sge i16 %l.5, 16383
  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
  store i16 %s.5, ptr %gep.5, align 2

  %gep.6 = getelementptr inbounds i16, ptr %ptr, i16 6
  %l.6 = load i16, ptr %gep.6
  %cmp.6 = icmp sge i16 %l.6, 16383
  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
  store i16 %s.6, ptr %gep.6, align 2

  %gep.7 = getelementptr inbounds i16, ptr %ptr, i16 7
  %l.7 = load i16, ptr %gep.7
  %cmp.7 = icmp sge i16 %l.7, 16383
  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
  store i16 %s.7, ptr %gep.7, align 2
  ret void
}

define void @select_smax_sge_4xi32(ptr %ptr, i32 %x) {
; CHECK-LABEL: @select_smax_sge_4xi32(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
; CHECK-NEXT:    [[TMP2:%.*]] = icmp sge <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %l.0 = load i32, ptr %ptr
  %cmp.0 = icmp sge i32 %l.0, 16383
  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
  store i32 %s.0, ptr %ptr, align 4

  %gep.1 = getelementptr inbounds i32, ptr %ptr, i32 1
  %l.1 = load i32, ptr %gep.1
  %cmp.1 = icmp sge i32 %l.1, 16383
  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
  store i32 %s.1, ptr %gep.1, align 4

  %gep.2 = getelementptr inbounds i32, ptr %ptr, i32 2
  %l.2 = load i32, ptr %gep.2
  %cmp.2 = icmp sge i32 %l.2, 16383
  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
  store i32 %s.2, ptr %gep.2, align 4

  %gep.3 = getelementptr inbounds i32, ptr %ptr, i32 3
  %l.3 = load i32, ptr %gep.3
  %cmp.3 = icmp sge i32 %l.3, 16383
  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
  store i32 %s.3, ptr %gep.3, align 4

  ret void
}

; There is no <2 x i64> version of smax, but we can efficiently lower
; compare/select pairs with uniform predicates.
define void @select_smax_sge_2xi64(ptr %ptr, i64 %x) {
; CHECK-LABEL: @select_smax_sge_2xi64(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
; CHECK-NEXT:    [[TMP2:%.*]] = icmp sge <2 x i64> [[TMP1]], <i64 16383, i64 16383>
; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %l.0 = load i64, ptr %ptr
  %cmp.0 = icmp sge i64 %l.0, 16383
  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
  store i64 %s.0, ptr %ptr, align 4

  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 1
  %l.1 = load i64, ptr %gep.1
  %cmp.1 = icmp sge i64 %l.1, 16383
  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
  store i64 %s.1, ptr %gep.1, align 4

  ret void
}