llvm/llvm/test/CodeGen/X86/vselect-pcmp.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx       | FileCheck %s --check-prefixes=CHECK,AVX,AVX12,AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2      | FileCheck %s --check-prefixes=CHECK,AVX,AVX12,AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f   | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512F
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl  | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512VL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=xop       | FileCheck %s --check-prefixes=CHECK,XOP

; The condition vector for BLENDV* only cares about the sign bit of each element.
; So in these tests, if we generate BLENDV*, we should be able to remove the redundant cmp op.

; Test 128-bit vectors for all legal element types.

define <16 x i8> @signbit_sel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) {
; CHECK-LABEL: signbit_sel_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
; CHECK-NEXT:    retq
  %tr = icmp slt <16 x i8> %mask, zeroinitializer
  %z = select <16 x i1> %tr, <16 x i8> %x, <16 x i8> %y
  ret <16 x i8> %z
}

; Sorry 16-bit, you're not important enough to support?

define <8 x i16> @signbit_sel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) {
; AVX12-LABEL: signbit_sel_v8i16:
; AVX12:       # %bb.0:
; AVX12-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX12-NEXT:    vpcmpgtw %xmm2, %xmm3, %xmm2
; AVX12-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
; AVX12-NEXT:    retq
;
; AVX512F-LABEL: signbit_sel_v8i16:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512F-NEXT:    vpcmpgtw %xmm2, %xmm3, %xmm2
; AVX512F-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: signbit_sel_v8i16:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT:    vpcmpgtw %xmm2, %xmm3, %xmm2
; AVX512VL-NEXT:    vpternlogq $226, %xmm1, %xmm2, %xmm0
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: signbit_sel_v8i16:
; XOP:       # %bb.0:
; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; XOP-NEXT:    vpcomltw %xmm3, %xmm2, %xmm2
; XOP-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
; XOP-NEXT:    retq
  %tr = icmp slt <8 x i16> %mask, zeroinitializer
  %z = select <8 x i1> %tr, <8 x i16> %x, <8 x i16> %y
  ret <8 x i16> %z
}

define <4 x i32> @signbit_sel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) {
; AVX12-LABEL: signbit_sel_v4i32:
; AVX12:       # %bb.0:
; AVX12-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; AVX12-NEXT:    retq
;
; AVX512F-LABEL: signbit_sel_v4i32:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512F-NEXT:    vpcmpgtd %zmm2, %zmm3, %k1
; AVX512F-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT:    vzeroupper
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: signbit_sel_v4i32:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT:    vpcmpgtd %xmm2, %xmm3, %k1
; AVX512VL-NEXT:    vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: signbit_sel_v4i32:
; XOP:       # %bb.0:
; XOP-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; XOP-NEXT:    retq
  %tr = icmp slt <4 x i32> %mask, zeroinitializer
  %z = select <4 x i1> %tr, <4 x i32> %x, <4 x i32> %y
  ret <4 x i32> %z
}

define <2 x i64> @signbit_sel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) {
; AVX12-LABEL: signbit_sel_v2i64:
; AVX12:       # %bb.0:
; AVX12-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX12-NEXT:    retq
;
; AVX512F-LABEL: signbit_sel_v2i64:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512F-NEXT:    vpcmpgtq %zmm2, %zmm3, %k1
; AVX512F-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT:    vzeroupper
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: signbit_sel_v2i64:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT:    vpcmpgtq %xmm2, %xmm3, %k1
; AVX512VL-NEXT:    vpblendmq %xmm0, %xmm1, %xmm0 {%k1}
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: signbit_sel_v2i64:
; XOP:       # %bb.0:
; XOP-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; XOP-NEXT:    retq
  %tr = icmp slt <2 x i64> %mask, zeroinitializer
  %z = select <2 x i1> %tr, <2 x i64> %x, <2 x i64> %y
  ret <2 x i64> %z
}

define <4 x float> @signbit_sel_v4f32(<4 x float> %x, <4 x float> %y, <4 x i32> %mask) {
; AVX12-LABEL: signbit_sel_v4f32:
; AVX12:       # %bb.0:
; AVX12-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; AVX12-NEXT:    retq
;
; AVX512F-LABEL: signbit_sel_v4f32:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512F-NEXT:    vpcmpgtd %zmm2, %zmm3, %k1
; AVX512F-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT:    vzeroupper
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: signbit_sel_v4f32:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT:    vpcmpgtd %xmm2, %xmm3, %k1
; AVX512VL-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1}
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: signbit_sel_v4f32:
; XOP:       # %bb.0:
; XOP-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; XOP-NEXT:    retq
  %tr = icmp slt <4 x i32> %mask, zeroinitializer
  %z = select <4 x i1> %tr, <4 x float> %x, <4 x float> %y
  ret <4 x float> %z
}

define <2 x double> @signbit_sel_v2f64(<2 x double> %x, <2 x double> %y, <2 x i64> %mask) {
; AVX12-LABEL: signbit_sel_v2f64:
; AVX12:       # %bb.0:
; AVX12-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX12-NEXT:    retq
;
; AVX512F-LABEL: signbit_sel_v2f64:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512F-NEXT:    vpcmpgtq %zmm2, %zmm3, %k1
; AVX512F-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT:    vzeroupper
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: signbit_sel_v2f64:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT:    vpcmpgtq %xmm2, %xmm3, %k1
; AVX512VL-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: signbit_sel_v2f64:
; XOP:       # %bb.0:
; XOP-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; XOP-NEXT:    retq
  %tr = icmp slt <2 x i64> %mask, zeroinitializer
  %z = select <2 x i1> %tr, <2 x double> %x, <2 x double> %y
  ret <2 x double> %z
}

; Test 256-bit vectors to see differences between AVX1 and AVX2.

define <32 x i8> @signbit_sel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %mask) {
; AVX1-LABEL: signbit_sel_v32i8:
; AVX1:       # %bb.0:
; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
; AVX1-NEXT:    vpcmpgtb %xmm3, %xmm4, %xmm3
; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm4, %xmm2
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX1-NEXT:    vandnps %ymm1, %ymm2, %ymm1
; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT:    retq
;
; AVX2-LABEL: signbit_sel_v32i8:
; AVX2:       # %bb.0:
; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT:    retq
;
; AVX512-LABEL: signbit_sel_v32i8:
; AVX512:       # %bb.0:
; AVX512-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX512-NEXT:    retq
;
; XOP-LABEL: signbit_sel_v32i8:
; XOP:       # %bb.0:
; XOP-NEXT:    vextractf128 $1, %ymm2, %xmm3
; XOP-NEXT:    vpxor %xmm4, %xmm4, %xmm4
; XOP-NEXT:    vpcomltb %xmm4, %xmm3, %xmm3
; XOP-NEXT:    vpcomltb %xmm4, %xmm2, %xmm2
; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
; XOP-NEXT:    vpcmov %ymm2, %ymm1, %ymm0, %ymm0
; XOP-NEXT:    retq
  %tr = icmp slt <32 x i8> %mask, zeroinitializer
  %z = select <32 x i1> %tr, <32 x i8> %x, <32 x i8> %y
  ret <32 x i8> %z
}

; Sorry 16-bit, you'll never be important enough to support?

define <16 x i16> @signbit_sel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %mask) {
; AVX1-LABEL: signbit_sel_v16i16:
; AVX1:       # %bb.0:
; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
; AVX1-NEXT:    vpcmpgtw %xmm3, %xmm4, %xmm3
; AVX1-NEXT:    vpcmpgtw %xmm2, %xmm4, %xmm2
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX1-NEXT:    vandnps %ymm1, %ymm2, %ymm1
; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT:    retq
;
; AVX2-LABEL: signbit_sel_v16i16:
; AVX2:       # %bb.0:
; AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX2-NEXT:    vpcmpgtw %ymm2, %ymm3, %ymm2
; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT:    retq
;
; AVX512F-LABEL: signbit_sel_v16i16:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512F-NEXT:    vpcmpgtw %ymm2, %ymm3, %ymm2
; AVX512F-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: signbit_sel_v16i16:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT:    vpcmpgtw %ymm2, %ymm3, %ymm2
; AVX512VL-NEXT:    vpternlogq $226, %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: signbit_sel_v16i16:
; XOP:       # %bb.0:
; XOP-NEXT:    vextractf128 $1, %ymm2, %xmm3
; XOP-NEXT:    vpxor %xmm4, %xmm4, %xmm4
; XOP-NEXT:    vpcomltw %xmm4, %xmm3, %xmm3
; XOP-NEXT:    vpcomltw %xmm4, %xmm2, %xmm2
; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
; XOP-NEXT:    vpcmov %ymm2, %ymm1, %ymm0, %ymm0
; XOP-NEXT:    retq
  %tr = icmp slt <16 x i16> %mask, zeroinitializer
  %z = select <16 x i1> %tr, <16 x i16> %x, <16 x i16> %y
  ret <16 x i16> %z
}

define <8 x i32> @signbit_sel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %mask) {
; AVX12-LABEL: signbit_sel_v8i32:
; AVX12:       # %bb.0:
; AVX12-NEXT:    vblendvps %ymm2, %ymm0, %ymm1, %ymm0
; AVX12-NEXT:    retq
;
; AVX512F-LABEL: signbit_sel_v8i32:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512F-NEXT:    vpcmpgtd %zmm2, %zmm3, %k1
; AVX512F-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: signbit_sel_v8i32:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT:    vpcmpgtd %ymm2, %ymm3, %k1
; AVX512VL-NEXT:    vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: signbit_sel_v8i32:
; XOP:       # %bb.0:
; XOP-NEXT:    vblendvps %ymm2, %ymm0, %ymm1, %ymm0
; XOP-NEXT:    retq
  %tr = icmp slt <8 x i32> %mask, zeroinitializer
  %z = select <8 x i1> %tr, <8 x i32> %x, <8 x i32> %y
  ret <8 x i32> %z
}

define <4 x i64> @signbit_sel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %mask) {
; AVX12-LABEL: signbit_sel_v4i64:
; AVX12:       # %bb.0:
; AVX12-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX12-NEXT:    retq
;
; AVX512F-LABEL: signbit_sel_v4i64:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512F-NEXT:    vpcmpgtq %zmm2, %zmm3, %k1
; AVX512F-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: signbit_sel_v4i64:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT:    vpcmpgtq %ymm2, %ymm3, %k1
; AVX512VL-NEXT:    vpblendmq %ymm0, %ymm1, %ymm0 {%k1}
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: signbit_sel_v4i64:
; XOP:       # %bb.0:
; XOP-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; XOP-NEXT:    retq
  %tr = icmp slt <4 x i64> %mask, zeroinitializer
  %z = select <4 x i1> %tr, <4 x i64> %x, <4 x i64> %y
  ret <4 x i64> %z
}

define <4 x double> @signbit_sel_v4f64(<4 x double> %x, <4 x double> %y, <4 x i64> %mask) {
; AVX12-LABEL: signbit_sel_v4f64:
; AVX12:       # %bb.0:
; AVX12-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX12-NEXT:    retq
;
; AVX512F-LABEL: signbit_sel_v4f64:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512F-NEXT:    vpcmpgtq %zmm2, %zmm3, %k1
; AVX512F-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: signbit_sel_v4f64:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT:    vpcmpgtq %ymm2, %ymm3, %k1
; AVX512VL-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: signbit_sel_v4f64:
; XOP:       # %bb.0:
; XOP-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; XOP-NEXT:    retq
  %tr = icmp slt <4 x i64> %mask, zeroinitializer
  %z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y
  ret <4 x double> %z
}

; Try a condition with a different type than the select operands.

define <4 x double> @signbit_sel_v4f64_small_mask(<4 x double> %x, <4 x double> %y, <4 x i32> %mask) {
; AVX1-LABEL: signbit_sel_v4f64_small_mask:
; AVX1:       # %bb.0:
; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm3
; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm2
; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT:    retq
;
; AVX2-LABEL: signbit_sel_v4f64_small_mask:
; AVX2:       # %bb.0:
; AVX2-NEXT:    vpmovsxdq %xmm2, %ymm2
; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT:    retq
;
; AVX512F-LABEL: signbit_sel_v4f64_small_mask:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512F-NEXT:    vpcmpgtd %zmm2, %zmm3, %k1
; AVX512F-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: signbit_sel_v4f64_small_mask:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT:    vpcmpgtd %xmm2, %xmm3, %k1
; AVX512VL-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: signbit_sel_v4f64_small_mask:
; XOP:       # %bb.0:
; XOP-NEXT:    vpmovsxdq %xmm2, %xmm3
; XOP-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
; XOP-NEXT:    vpmovsxdq %xmm2, %xmm2
; XOP-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
; XOP-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; XOP-NEXT:    retq
  %tr = icmp slt <4 x i32> %mask, zeroinitializer
  %z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y
  ret <4 x double> %z
}

; Try a 512-bit vector to make sure AVX-512 is handled as expected.

define <8 x double> @signbit_sel_v8f64(<8 x double> %x, <8 x double> %y, <8 x i64> %mask) {
; AVX12-LABEL: signbit_sel_v8f64:
; AVX12:       # %bb.0:
; AVX12-NEXT:    vblendvpd %ymm4, %ymm0, %ymm2, %ymm0
; AVX12-NEXT:    vblendvpd %ymm5, %ymm1, %ymm3, %ymm1
; AVX12-NEXT:    retq
;
; AVX512-LABEL: signbit_sel_v8f64:
; AVX512:       # %bb.0:
; AVX512-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512-NEXT:    vpcmpgtq %zmm2, %zmm3, %k1
; AVX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
; AVX512-NEXT:    retq
;
; XOP-LABEL: signbit_sel_v8f64:
; XOP:       # %bb.0:
; XOP-NEXT:    vblendvpd %ymm4, %ymm0, %ymm2, %ymm0
; XOP-NEXT:    vblendvpd %ymm5, %ymm1, %ymm3, %ymm1
; XOP-NEXT:    retq
  %tr = icmp slt <8 x i64> %mask, zeroinitializer
  %z = select <8 x i1> %tr, <8 x double> %x, <8 x double> %y
  ret <8 x double> %z
}

; If we have a floating-point compare:
; (1) Don't die.
; (2) FIXME: If we don't care about signed-zero (and NaN?), the compare should still get folded.

define <4 x float> @signbit_sel_v4f32_fcmp(<4 x float> %x, <4 x float> %y, <4 x float> %mask) #0 {
; AVX12-LABEL: signbit_sel_v4f32_fcmp:
; AVX12:       # %bb.0:
; AVX12-NEXT:    vxorps %xmm2, %xmm2, %xmm2
; AVX12-NEXT:    vcmpltps %xmm2, %xmm0, %xmm2
; AVX12-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; AVX12-NEXT:    retq
;
; AVX512F-LABEL: signbit_sel_v4f32_fcmp:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT:    vxorps %xmm2, %xmm2, %xmm2
; AVX512F-NEXT:    vcmpltps %zmm2, %zmm0, %k1
; AVX512F-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT:    vzeroupper
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: signbit_sel_v4f32_fcmp:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vxorps %xmm2, %xmm2, %xmm2
; AVX512VL-NEXT:    vcmpltps %xmm2, %xmm0, %k1
; AVX512VL-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1}
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: signbit_sel_v4f32_fcmp:
; XOP:       # %bb.0:
; XOP-NEXT:    vxorps %xmm2, %xmm2, %xmm2
; XOP-NEXT:    vcmpltps %xmm2, %xmm0, %xmm2
; XOP-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; XOP-NEXT:    retq
  %cmp = fcmp olt <4 x float> %x, zeroinitializer
  %sel = select <4 x i1> %cmp, <4 x float> %x, <4 x float> %y
  ret <4 x float> %sel
}

define <4 x i64> @blend_splat1_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) {
; AVX1-LABEL: blend_splat1_mask_cond_v4i64:
; AVX1:       # %bb.0:
; AVX1-NEXT:    vpsllq $63, %xmm0, %xmm3
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT:    vpsllq $63, %xmm0, %xmm0
; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
; AVX1-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
; AVX1-NEXT:    retq
;
; AVX2-LABEL: blend_splat1_mask_cond_v4i64:
; AVX2:       # %bb.0:
; AVX2-NEXT:    vpsllq $63, %ymm0, %ymm0
; AVX2-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
; AVX2-NEXT:    retq
;
; AVX512F-LABEL: blend_splat1_mask_cond_v4i64:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512F-NEXT:    vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
; AVX512F-NEXT:    vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: blend_splat1_mask_cond_v4i64:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
; AVX512VL-NEXT:    vpblendmq %ymm1, %ymm2, %ymm0 {%k1}
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: blend_splat1_mask_cond_v4i64:
; XOP:       # %bb.0:
; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm3
; XOP-NEXT:    vpsllq $63, %xmm3, %xmm3
; XOP-NEXT:    vpmovsxbq {{.*#+}} xmm4 = [18446744073709551553,18446744073709551553]
; XOP-NEXT:    vpshaq %xmm4, %xmm3, %xmm3
; XOP-NEXT:    vpsllq $63, %xmm0, %xmm0
; XOP-NEXT:    vpshaq %xmm4, %xmm0, %xmm0
; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
; XOP-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
; XOP-NEXT:    retq
  %a = and <4 x i64> %x, <i64 1, i64 1, i64 1, i64 1>
  %c = icmp eq <4 x i64> %a, zeroinitializer
  %r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z
  ret <4 x i64> %r
}

define <4 x i32> @blend_splat1_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; AVX12-LABEL: blend_splat1_mask_cond_v4i32:
; AVX12:       # %bb.0:
; AVX12-NEXT:    vpslld $31, %xmm0, %xmm0
; AVX12-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
; AVX12-NEXT:    retq
;
; AVX512F-LABEL: blend_splat1_mask_cond_v4i32:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
; AVX512F-NEXT:    vpblendmd %zmm1, %zmm2, %zmm0 {%k1}
; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT:    vzeroupper
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: blend_splat1_mask_cond_v4i32:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
; AVX512VL-NEXT:    vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: blend_splat1_mask_cond_v4i32:
; XOP:       # %bb.0:
; XOP-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; XOP-NEXT:    vpcomneqd %xmm3, %xmm0, %xmm0
; XOP-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
; XOP-NEXT:    retq
  %a = and <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
  %c = icmp eq <4 x i32> %a, zeroinitializer
  %r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z
  ret <4 x i32> %r
}

define <16 x i16> @blend_splat1_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %z) {
; AVX1-LABEL: blend_splat1_mask_cond_v16i16:
; AVX1:       # %bb.0:
; AVX1-NEXT:    vpsllw $15, %xmm0, %xmm3
; AVX1-NEXT:    vpsraw $15, %xmm3, %xmm3
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT:    vpsllw $15, %xmm0, %xmm0
; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm0
; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
; AVX1-NEXT:    vandnps %ymm1, %ymm0, %ymm1
; AVX1-NEXT:    vandps %ymm0, %ymm2, %ymm0
; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT:    retq
;
; AVX2-LABEL: blend_splat1_mask_cond_v16i16:
; AVX2:       # %bb.0:
; AVX2-NEXT:    vpsllw $15, %ymm0, %ymm0
; AVX2-NEXT:    vpsraw $15, %ymm0, %ymm0
; AVX2-NEXT:    vpblendvb %ymm0, %ymm2, %ymm1, %ymm0
; AVX2-NEXT:    retq
;
; AVX512F-LABEL: blend_splat1_mask_cond_v16i16:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512F-NEXT:    vpcmpeqw %ymm3, %ymm0, %ymm0
; AVX512F-NEXT:    vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: blend_splat1_mask_cond_v16i16:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT:    vpcmpeqw %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT:    vpternlogq $202, %ymm2, %ymm1, %ymm0
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: blend_splat1_mask_cond_v16i16:
; XOP:       # %bb.0:
; XOP-NEXT:    vpsllw $15, %xmm0, %xmm3
; XOP-NEXT:    vpsraw $15, %xmm3, %xmm3
; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm0
; XOP-NEXT:    vpsllw $15, %xmm0, %xmm0
; XOP-NEXT:    vpsraw $15, %xmm0, %xmm0
; XOP-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
; XOP-NEXT:    vpcmov %ymm0, %ymm1, %ymm2, %ymm0
; XOP-NEXT:    retq
  %a = and <16 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
  %c = icmp eq <16 x i16> %a, zeroinitializer
  %r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z
  ret <16 x i16> %r
}

define <16 x i8> @blend_splat1_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z) {
; AVX12-LABEL: blend_splat1_mask_cond_v16i8:
; AVX12:       # %bb.0:
; AVX12-NEXT:    vpsllw $7, %xmm0, %xmm0
; AVX12-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
; AVX12-NEXT:    retq
;
; AVX512F-LABEL: blend_splat1_mask_cond_v16i8:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512F-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
; AVX512F-NEXT:    vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: blend_splat1_mask_cond_v16i8:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
; AVX512VL-NEXT:    vpternlogq $202, %xmm2, %xmm1, %xmm0
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: blend_splat1_mask_cond_v16i8:
; XOP:       # %bb.0:
; XOP-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; XOP-NEXT:    vpcomneqb %xmm3, %xmm0, %xmm0
; XOP-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
; XOP-NEXT:    retq
  %a = and <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
  %c = icmp eq <16 x i8> %a, zeroinitializer
  %r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z
  ret <16 x i8> %r
}

define <2 x i64> @blend_splatmax_mask_cond_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
; AVX12-LABEL: blend_splatmax_mask_cond_v2i64:
; AVX12:       # %bb.0:
; AVX12-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
; AVX12-NEXT:    retq
;
; AVX512F-LABEL: blend_splatmax_mask_cond_v2i64:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT:    vpbroadcastq {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
; AVX512F-NEXT:    vptestnmq %zmm3, %zmm0, %k1
; AVX512F-NEXT:    vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT:    vzeroupper
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: blend_splatmax_mask_cond_v2i64:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
; AVX512VL-NEXT:    vpblendmq %xmm1, %xmm2, %xmm0 {%k1}
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: blend_splatmax_mask_cond_v2i64:
; XOP:       # %bb.0:
; XOP-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; XOP-NEXT:    vpcomneqq %xmm3, %xmm0, %xmm0
; XOP-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
; XOP-NEXT:    retq
  %a = and <2 x i64> %x, <i64 9223372036854775808, i64 9223372036854775808>
  %c = icmp eq <2 x i64> %a, zeroinitializer
  %r = select <2 x i1> %c, <2 x i64> %y, <2 x i64> %z
  ret <2 x i64> %r
}

define <8 x i32> @blend_splatmax_mask_cond_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
; AVX12-LABEL: blend_splatmax_mask_cond_v8i32:
; AVX12:       # %bb.0:
; AVX12-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
; AVX12-NEXT:    retq
;
; AVX512F-LABEL: blend_splatmax_mask_cond_v8i32:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512F-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
; AVX512F-NEXT:    vpblendmd %zmm1, %zmm2, %zmm0 {%k1}
; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: blend_splatmax_mask_cond_v8i32:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
; AVX512VL-NEXT:    vpblendmd %ymm1, %ymm2, %ymm0 {%k1}
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: blend_splatmax_mask_cond_v8i32:
; XOP:       # %bb.0:
; XOP-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
; XOP-NEXT:    retq
  %a = and <8 x i32> %x, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
  %c = icmp eq <8 x i32> %a, zeroinitializer
  %r = select <8 x i1> %c, <8 x i32> %y, <8 x i32> %z
  ret <8 x i32> %r
}

define <8 x i16> @blend_splatmax_mask_cond_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
; AVX12-LABEL: blend_splatmax_mask_cond_v8i16:
; AVX12:       # %bb.0:
; AVX12-NEXT:    vpsraw $15, %xmm0, %xmm0
; AVX12-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
; AVX12-NEXT:    retq
;
; AVX512F-LABEL: blend_splatmax_mask_cond_v8i16:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512F-NEXT:    vpcmpeqw %xmm3, %xmm0, %xmm0
; AVX512F-NEXT:    vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: blend_splatmax_mask_cond_v8i16:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT:    vpcmpeqw %xmm3, %xmm0, %xmm0
; AVX512VL-NEXT:    vpternlogq $202, %xmm2, %xmm1, %xmm0
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: blend_splatmax_mask_cond_v8i16:
; XOP:       # %bb.0:
; XOP-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; XOP-NEXT:    vpcomneqw %xmm3, %xmm0, %xmm0
; XOP-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
; XOP-NEXT:    retq
  %a = and <8 x i16> %x, <i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768>
  %c = icmp eq <8 x i16> %a, zeroinitializer
  %r = select <8 x i1> %c, <8 x i16> %y, <8 x i16> %z
  ret <8 x i16> %r
}

define <32 x i8> @blend_splatmax_mask_cond_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %z) {
; AVX1-LABEL: blend_splatmax_mask_cond_v32i8:
; AVX1:       # %bb.0:
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
; AVX1-NEXT:    vpcmpgtb %xmm3, %xmm4, %xmm3
; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm4, %xmm0
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX1-NEXT:    vandnps %ymm1, %ymm0, %ymm1
; AVX1-NEXT:    vandps %ymm0, %ymm2, %ymm0
; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT:    retq
;
; AVX2-LABEL: blend_splatmax_mask_cond_v32i8:
; AVX2:       # %bb.0:
; AVX2-NEXT:    vpblendvb %ymm0, %ymm2, %ymm1, %ymm0
; AVX2-NEXT:    retq
;
; AVX512F-LABEL: blend_splatmax_mask_cond_v32i8:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512F-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
; AVX512F-NEXT:    vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: blend_splatmax_mask_cond_v32i8:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT:    vpternlogq $202, %ymm2, %ymm1, %ymm0
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: blend_splatmax_mask_cond_v32i8:
; XOP:       # %bb.0:
; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm3
; XOP-NEXT:    vpxor %xmm4, %xmm4, %xmm4
; XOP-NEXT:    vpcmpgtb %xmm3, %xmm4, %xmm3
; XOP-NEXT:    vpcmpgtb %xmm0, %xmm4, %xmm0
; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
; XOP-NEXT:    vpcmov %ymm0, %ymm1, %ymm2, %ymm0
; XOP-NEXT:    retq
  %a = and <32 x i8> %x, <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>
  %c = icmp eq <32 x i8> %a, zeroinitializer
  %r = select <32 x i1> %c, <32 x i8> %y, <32 x i8> %z
  ret <32 x i8> %r
}

define <4 x i64> @blend_splat_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) {
; AVX1-LABEL: blend_splat_mask_cond_v4i64:
; AVX1:       # %bb.0:
; AVX1-NEXT:    vpsllq $62, %xmm0, %xmm3
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT:    vpsllq $62, %xmm0, %xmm0
; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
; AVX1-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
; AVX1-NEXT:    retq
;
; AVX2-LABEL: blend_splat_mask_cond_v4i64:
; AVX2:       # %bb.0:
; AVX2-NEXT:    vpsllq $62, %ymm0, %ymm0
; AVX2-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
; AVX2-NEXT:    retq
;
; AVX512F-LABEL: blend_splat_mask_cond_v4i64:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512F-NEXT:    vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
; AVX512F-NEXT:    vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: blend_splat_mask_cond_v4i64:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
; AVX512VL-NEXT:    vpblendmq %ymm1, %ymm2, %ymm0 {%k1}
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: blend_splat_mask_cond_v4i64:
; XOP:       # %bb.0:
; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm3
; XOP-NEXT:    vpsllq $62, %xmm3, %xmm3
; XOP-NEXT:    vpmovsxbq {{.*#+}} xmm4 = [18446744073709551553,18446744073709551553]
; XOP-NEXT:    vpshaq %xmm4, %xmm3, %xmm3
; XOP-NEXT:    vpsllq $62, %xmm0, %xmm0
; XOP-NEXT:    vpshaq %xmm4, %xmm0, %xmm0
; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
; XOP-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
; XOP-NEXT:    retq
  %a = and <4 x i64> %x, <i64 2, i64 2, i64 2, i64 2>
  %c = icmp eq <4 x i64> %a, zeroinitializer
  %r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z
  ret <4 x i64> %r
}

define <4 x i32> @blend_splat_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; AVX12-LABEL: blend_splat_mask_cond_v4i32:
; AVX12:       # %bb.0:
; AVX12-NEXT:    vpslld $15, %xmm0, %xmm0
; AVX12-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
; AVX12-NEXT:    retq
;
; AVX512F-LABEL: blend_splat_mask_cond_v4i32:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
; AVX512F-NEXT:    vpblendmd %zmm1, %zmm2, %zmm0 {%k1}
; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT:    vzeroupper
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: blend_splat_mask_cond_v4i32:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
; AVX512VL-NEXT:    vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: blend_splat_mask_cond_v4i32:
; XOP:       # %bb.0:
; XOP-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; XOP-NEXT:    vpcomneqd %xmm3, %xmm0, %xmm0
; XOP-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
; XOP-NEXT:    retq
  %a = and <4 x i32> %x, <i32 65536, i32 65536, i32 65536, i32 65536>
  %c = icmp eq <4 x i32> %a, zeroinitializer
  %r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z
  ret <4 x i32> %r
}

define <16 x i16> @blend_splat_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %z) {
; AVX1-LABEL: blend_splat_mask_cond_v16i16:
; AVX1:       # %bb.0:
; AVX1-NEXT:    vpsllw $5, %xmm0, %xmm3
; AVX1-NEXT:    vpsraw $15, %xmm3, %xmm3
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT:    vpsllw $5, %xmm0, %xmm0
; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm0
; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
; AVX1-NEXT:    vandnps %ymm1, %ymm0, %ymm1
; AVX1-NEXT:    vandps %ymm0, %ymm2, %ymm0
; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT:    retq
;
; AVX2-LABEL: blend_splat_mask_cond_v16i16:
; AVX2:       # %bb.0:
; AVX2-NEXT:    vpsllw $5, %ymm0, %ymm0
; AVX2-NEXT:    vpsraw $15, %ymm0, %ymm0
; AVX2-NEXT:    vpblendvb %ymm0, %ymm2, %ymm1, %ymm0
; AVX2-NEXT:    retq
;
; AVX512F-LABEL: blend_splat_mask_cond_v16i16:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512F-NEXT:    vpcmpeqw %ymm3, %ymm0, %ymm0
; AVX512F-NEXT:    vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: blend_splat_mask_cond_v16i16:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT:    vpcmpeqw %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT:    vpternlogq $202, %ymm2, %ymm1, %ymm0
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: blend_splat_mask_cond_v16i16:
; XOP:       # %bb.0:
; XOP-NEXT:    vpsllw $5, %xmm0, %xmm3
; XOP-NEXT:    vpsraw $15, %xmm3, %xmm3
; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm0
; XOP-NEXT:    vpsllw $5, %xmm0, %xmm0
; XOP-NEXT:    vpsraw $15, %xmm0, %xmm0
; XOP-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
; XOP-NEXT:    vpcmov %ymm0, %ymm1, %ymm2, %ymm0
; XOP-NEXT:    retq
  %a = and <16 x i16> %x, <i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024>
  %c = icmp eq <16 x i16> %a, zeroinitializer
  %r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z
  ret <16 x i16> %r
}

define <16 x i8> @blend_splat_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z) {
; AVX12-LABEL: blend_splat_mask_cond_v16i8:
; AVX12:       # %bb.0:
; AVX12-NEXT:    vpsllw $5, %xmm0, %xmm0
; AVX12-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
; AVX12-NEXT:    retq
;
; AVX512F-LABEL: blend_splat_mask_cond_v16i8:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512F-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
; AVX512F-NEXT:    vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: blend_splat_mask_cond_v16i8:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
; AVX512VL-NEXT:    vpternlogq $202, %xmm2, %xmm1, %xmm0
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: blend_splat_mask_cond_v16i8:
; XOP:       # %bb.0:
; XOP-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; XOP-NEXT:    vpcomneqb %xmm3, %xmm0, %xmm0
; XOP-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
; XOP-NEXT:    retq
  %a = and <16 x i8> %x, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
  %c = icmp eq <16 x i8> %a, zeroinitializer
  %r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z
  ret <16 x i8> %r
}

define <2 x i64> @blend_mask_cond_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
; AVX1-LABEL: blend_mask_cond_v2i64:
; AVX1:       # %bb.0:
; AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm0, %xmm0
; AVX1-NEXT:    vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
; AVX1-NEXT:    retq
;
; AVX2-LABEL: blend_mask_cond_v2i64:
; AVX2:       # %bb.0:
; AVX2-NEXT:    vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX2-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
; AVX2-NEXT:    retq
;
; AVX512F-LABEL: blend_mask_cond_v2i64:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT:    vpmovsxbq {{.*#+}} xmm3 = [1,4]
; AVX512F-NEXT:    vptestnmq %zmm3, %zmm0, %k1
; AVX512F-NEXT:    vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT:    vzeroupper
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: blend_mask_cond_v2i64:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1
; AVX512VL-NEXT:    vpblendmq %xmm1, %xmm2, %xmm0 {%k1}
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: blend_mask_cond_v2i64:
; XOP:       # %bb.0:
; XOP-NEXT:    vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; XOP-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
; XOP-NEXT:    retq
  %a = and <2 x i64> %x, <i64 1, i64 4>
  %c = icmp eq <2 x i64> %a, zeroinitializer
  %r = select <2 x i1> %c, <2 x i64> %y, <2 x i64> %z
  ret <2 x i64> %r
}

define <4 x i32> @blend_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; AVX1-LABEL: blend_mask_cond_v4i32:
; AVX1:       # %bb.0:
; AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
; AVX1-NEXT:    retq
;
; AVX2-LABEL: blend_mask_cond_v4i32:
; AVX2:       # %bb.0:
; AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX2-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
; AVX2-NEXT:    retq
;
; AVX512F-LABEL: blend_mask_cond_v4i32:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm3 = [65536,512,2,1]
; AVX512F-NEXT:    vptestnmd %zmm3, %zmm0, %k1
; AVX512F-NEXT:    vpblendmd %zmm1, %zmm2, %zmm0 {%k1}
; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT:    vzeroupper
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: blend_mask_cond_v4i32:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1
; AVX512VL-NEXT:    vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: blend_mask_cond_v4i32:
; XOP:       # %bb.0:
; XOP-NEXT:    vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; XOP-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
; XOP-NEXT:    retq
  %a = and <4 x i32> %x, <i32 65536, i32 512, i32 2, i32 1>
  %c = icmp eq <4 x i32> %a, zeroinitializer
  %r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z
  ret <4 x i32> %r
}

define <8 x i16> @blend_mask_cond_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
; AVX12-LABEL: blend_mask_cond_v8i16:
; AVX12:       # %bb.0:
; AVX12-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX12-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX12-NEXT:    vpcmpeqw %xmm3, %xmm0, %xmm0
; AVX12-NEXT:    vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
; AVX12-NEXT:    retq
;
; AVX512F-LABEL: blend_mask_cond_v8i16:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512F-NEXT:    vpcmpeqw %xmm3, %xmm0, %xmm0
; AVX512F-NEXT:    vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: blend_mask_cond_v8i16:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT:    vpcmpeqw %xmm3, %xmm0, %xmm0
; AVX512VL-NEXT:    vpternlogq $202, %xmm2, %xmm1, %xmm0
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: blend_mask_cond_v8i16:
; XOP:       # %bb.0:
; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; XOP-NEXT:    vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; XOP-NEXT:    vpcomltw %xmm3, %xmm0, %xmm0
; XOP-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
; XOP-NEXT:    retq
  %a = and <8 x i16> %x, <i16 1, i16 2, i16 8, i16 4, i16 8, i16 1024, i16 2, i16 4096>
  %c = icmp eq <8 x i16> %a, zeroinitializer
  %r = select <8 x i1> %c, <8 x i16> %y, <8 x i16> %z
  ret <8 x i16> %r
}

define <16 x i8> @blend_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z) {
; AVX12-LABEL: blend_mask_cond_v16i8:
; AVX12:       # %bb.0:
; AVX12-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX12-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX12-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
; AVX12-NEXT:    vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
; AVX12-NEXT:    retq
;
; AVX512F-LABEL: blend_mask_cond_v16i8:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512F-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
; AVX512F-NEXT:    vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: blend_mask_cond_v16i8:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
; AVX512VL-NEXT:    vpternlogq $202, %xmm2, %xmm1, %xmm0
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: blend_mask_cond_v16i8:
; XOP:       # %bb.0:
; XOP-NEXT:    vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; XOP-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
; XOP-NEXT:    retq
  %a = and <16 x i8> %x, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 4, i8 4, i8 4, i8 4, i8 2, i8 2, i8 2, i8 2>
  %c = icmp eq <16 x i8> %a, zeroinitializer
  %r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z
  ret <16 x i8> %r
}

define <4 x i64> @blend_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) {
; AVX1-LABEL: blend_mask_cond_v4i64:
; AVX1:       # %bb.0:
; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
; AVX1-NEXT:    vpcmpeqq %xmm4, %xmm3, %xmm3
; AVX1-NEXT:    vpcmpeqq %xmm4, %xmm0, %xmm0
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX1-NEXT:    vblendvpd %ymm0, %ymm1, %ymm2, %ymm0
; AVX1-NEXT:    retq
;
; AVX2-LABEL: blend_mask_cond_v4i64:
; AVX2:       # %bb.0:
; AVX2-NEXT:    vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX2-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
; AVX2-NEXT:    retq
;
; AVX512F-LABEL: blend_mask_cond_v4i64:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512F-NEXT:    vpmovzxwq {{.*#+}} ymm3 = [2,4,32768,1]
; AVX512F-NEXT:    vptestnmq %zmm3, %zmm0, %k1
; AVX512F-NEXT:    vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: blend_mask_cond_v4i64:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k1
; AVX512VL-NEXT:    vpblendmq %ymm1, %ymm2, %ymm0 {%k1}
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: blend_mask_cond_v4i64:
; XOP:       # %bb.0:
; XOP-NEXT:    vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm0
; XOP-NEXT:    vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; XOP-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
; XOP-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
; XOP-NEXT:    retq
  %a = and <4 x i64> %x, <i64 2, i64 4, i64 32768, i64 1>
  %c = icmp eq <4 x i64> %a, zeroinitializer
  %r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z
  ret <4 x i64> %r
}

define <8 x i32> @blend_mask_cond_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
; AVX1-LABEL: blend_mask_cond_v8i32:
; AVX1:       # %bb.0:
; AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
; AVX1-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
; AVX1-NEXT:    retq
;
; AVX2-LABEL: blend_mask_cond_v8i32:
; AVX2:       # %bb.0:
; AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX2-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
; AVX2-NEXT:    retq
;
; AVX512F-LABEL: blend_mask_cond_v8i32:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512F-NEXT:    vpmovsxwd {{.*#+}} ymm3 = [1,2,8,4,8,1024,2,4096]
; AVX512F-NEXT:    vptestnmd %zmm3, %zmm0, %k1
; AVX512F-NEXT:    vpblendmd %zmm1, %zmm2, %zmm0 {%k1}
; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: blend_mask_cond_v8i32:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k1
; AVX512VL-NEXT:    vpblendmd %ymm1, %ymm2, %ymm0 {%k1}
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: blend_mask_cond_v8i32:
; XOP:       # %bb.0:
; XOP-NEXT:    vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm0
; XOP-NEXT:    vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; XOP-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
; XOP-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
; XOP-NEXT:    retq
  %a = and <8 x i32> %x, <i32 1, i32 2, i32 8, i32 4, i32 8, i32 1024, i32 2, i32 4096>
  %c = icmp eq <8 x i32> %a, zeroinitializer
  %r = select <8 x i1> %c, <8 x i32> %y, <8 x i32> %z
  ret <8 x i32> %r
}

define <16 x i16> @blend_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %z) {
; AVX1-LABEL: blend_mask_cond_v16i16:
; AVX1:       # %bb.0:
; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm3, %xmm3
; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm0, %xmm0
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX1-NEXT:    vandnps %ymm2, %ymm0, %ymm2
; AVX1-NEXT:    vandps %ymm0, %ymm1, %ymm0
; AVX1-NEXT:    vorps %ymm2, %ymm0, %ymm0
; AVX1-NEXT:    retq
;
; AVX2-LABEL: blend_mask_cond_v16i16:
; AVX2:       # %bb.0:
; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX2-NEXT:    vpcmpeqw %ymm3, %ymm0, %ymm0
; AVX2-NEXT:    vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
; AVX2-NEXT:    retq
;
; AVX512F-LABEL: blend_mask_cond_v16i16:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512F-NEXT:    vpcmpeqw %ymm3, %ymm0, %ymm0
; AVX512F-NEXT:    vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: blend_mask_cond_v16i16:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT:    vpcmpeqw %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT:    vpternlogq $202, %ymm2, %ymm1, %ymm0
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: blend_mask_cond_v16i16:
; XOP:       # %bb.0:
; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm3
; XOP-NEXT:    vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
; XOP-NEXT:    vpxor %xmm4, %xmm4, %xmm4
; XOP-NEXT:    vpcomltw %xmm4, %xmm3, %xmm3
; XOP-NEXT:    vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; XOP-NEXT:    vpcomltw %xmm4, %xmm0, %xmm0
; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
; XOP-NEXT:    vpcmov %ymm0, %ymm1, %ymm2, %ymm0
; XOP-NEXT:    retq
  %a = and <16 x i16> %x, <i16 1, i16 2, i16 8, i16 4, i16 8, i16 2, i16 2, i16 2, i16 2, i16 8, i16 8, i16 64, i16 64, i16 1024, i16 4096, i16 1024>
  %c = icmp eq <16 x i16> %a, zeroinitializer
  %r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z
  ret <16 x i16> %r
}

define <32 x i8> @blend_mask_cond_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %z) {
; AVX1-LABEL: blend_mask_cond_v32i8:
; AVX1:       # %bb.0:
; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm3, %xmm3
; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm0, %xmm0
; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX1-NEXT:    vandnps %ymm2, %ymm0, %ymm2
; AVX1-NEXT:    vandps %ymm0, %ymm1, %ymm0
; AVX1-NEXT:    vorps %ymm2, %ymm0, %ymm0
; AVX1-NEXT:    retq
;
; AVX2-LABEL: blend_mask_cond_v32i8:
; AVX2:       # %bb.0:
; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX2-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
; AVX2-NEXT:    vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
; AVX2-NEXT:    retq
;
; AVX512F-LABEL: blend_mask_cond_v32i8:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512F-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
; AVX512F-NEXT:    vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: blend_mask_cond_v32i8:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
; AVX512VL-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT:    vpternlogq $202, %ymm2, %ymm1, %ymm0
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: blend_mask_cond_v32i8:
; XOP:       # %bb.0:
; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm3
; XOP-NEXT:    vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
; XOP-NEXT:    vpxor %xmm4, %xmm4, %xmm4
; XOP-NEXT:    vpcomltb %xmm4, %xmm3, %xmm3
; XOP-NEXT:    vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; XOP-NEXT:    vpcomltb %xmm4, %xmm0, %xmm0
; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
; XOP-NEXT:    vpcmov %ymm0, %ymm1, %ymm2, %ymm0
; XOP-NEXT:    retq
  %a = and <32 x i8> %x, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 4, i8 4, i8 4, i8 4, i8 2, i8 2, i8 2, i8 2, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 4, i8 4, i8 4, i8 4, i8 128, i8 4, i8 2, i8 16>
  %c = icmp eq <32 x i8> %a, zeroinitializer
  %r = select <32 x i1> %c, <32 x i8> %y, <32 x i8> %z
  ret <32 x i8> %r
}

define void @store_blend_load_v4i64(ptr %a0, ptr %a1, ptr %a2) {
; AVX1-LABEL: store_blend_load_v4i64:
; AVX1:       # %bb.0:
; AVX1-NEXT:    vmovapd (%rsi), %ymm0
; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; AVX1-NEXT:    # xmm1 = mem[0,0]
; AVX1-NEXT:    vpxor 16(%rdi), %xmm1, %xmm2
; AVX1-NEXT:    vmovddup {{.*#+}} xmm3 = [9223372036854775815,9223372036854775815]
; AVX1-NEXT:    # xmm3 = mem[0,0]
; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
; AVX1-NEXT:    vpxor (%rdi), %xmm1, %xmm1
; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm1, %xmm1
; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT:    vblendvpd %ymm1, (%rdi), %ymm0, %ymm0
; AVX1-NEXT:    vmovapd %ymm0, (%rdx)
; AVX1-NEXT:    vzeroupper
; AVX1-NEXT:    retq
;
; AVX2-LABEL: store_blend_load_v4i64:
; AVX2:       # %bb.0:
; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
; AVX2-NEXT:    vmovapd (%rsi), %ymm1
; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm2
; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm3 = [9223372036854775815,9223372036854775815,9223372036854775815,9223372036854775815]
; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT:    vmovapd %ymm0, (%rdx)
; AVX2-NEXT:    vzeroupper
; AVX2-NEXT:    retq
;
; AVX512F-LABEL: store_blend_load_v4i64:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vmovdqa (%rdi), %ymm0
; AVX512F-NEXT:    vmovdqa (%rsi), %ymm1
; AVX512F-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
; AVX512F-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k1}
; AVX512F-NEXT:    vmovdqa %ymm1, (%rdx)
; AVX512F-NEXT:    vzeroupper
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: store_blend_load_v4i64:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vmovdqa (%rdi), %ymm0
; AVX512VL-NEXT:    vmovdqa (%rsi), %ymm1
; AVX512VL-NEXT:    vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
; AVX512VL-NEXT:    vmovdqa64 %ymm0, %ymm1 {%k1}
; AVX512VL-NEXT:    vmovdqa %ymm1, (%rdx)
; AVX512VL-NEXT:    vzeroupper
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: store_blend_load_v4i64:
; XOP:       # %bb.0:
; XOP-NEXT:    vmovapd (%rsi), %ymm0
; XOP-NEXT:    vpmovsxbq {{.*#+}} xmm1 = [7,7]
; XOP-NEXT:    vpcomltuq 16(%rdi), %xmm1, %xmm2
; XOP-NEXT:    vpcomltuq (%rdi), %xmm1, %xmm1
; XOP-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
; XOP-NEXT:    vblendvpd %ymm1, (%rdi), %ymm0, %ymm0
; XOP-NEXT:    vmovapd %ymm0, (%rdx)
; XOP-NEXT:    vzeroupper
; XOP-NEXT:    retq
  %v0 = load <4 x i64>, ptr %a0
  %v1 = load <4 x i64>, ptr %a1
  %cmp = icmp ugt <4 x i64> %v0, <i64 7, i64 7, i64 7, i64 7>
  %res = select <4 x i1> %cmp, <4 x i64> %v0, <4 x i64> %v1
  store <4 x i64> %res, ptr %a2
  ret void
}

define void @store_blend_load_v8i32(ptr %a0, ptr %a1, ptr %a2) {
; AVX1-LABEL: store_blend_load_v8i32:
; AVX1:       # %bb.0:
; AVX1-NEXT:    vmovaps (%rsi), %ymm0
; AVX1-NEXT:    vmovdqa (%rdi), %xmm1
; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm2
; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm3 = [8,8,8,8]
; AVX1-NEXT:    vpmaxud %xmm3, %xmm2, %xmm4
; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm2, %xmm2
; AVX1-NEXT:    vpmaxud %xmm3, %xmm1, %xmm3
; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm1, %xmm1
; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT:    vblendvps %ymm1, (%rdi), %ymm0, %ymm0
; AVX1-NEXT:    vmovaps %ymm0, (%rdx)
; AVX1-NEXT:    vzeroupper
; AVX1-NEXT:    retq
;
; AVX2-LABEL: store_blend_load_v8i32:
; AVX2:       # %bb.0:
; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
; AVX2-NEXT:    vmovaps (%rsi), %ymm1
; AVX2-NEXT:    vpbroadcastd {{.*#+}} ymm2 = [8,8,8,8,8,8,8,8]
; AVX2-NEXT:    vpmaxud %ymm2, %ymm0, %ymm2
; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm0, %ymm2
; AVX2-NEXT:    vblendvps %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT:    vmovaps %ymm0, (%rdx)
; AVX2-NEXT:    vzeroupper
; AVX2-NEXT:    retq
;
; AVX512F-LABEL: store_blend_load_v8i32:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vmovdqa (%rdi), %ymm0
; AVX512F-NEXT:    vmovdqa (%rsi), %ymm1
; AVX512F-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
; AVX512F-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
; AVX512F-NEXT:    vmovdqa %ymm1, (%rdx)
; AVX512F-NEXT:    vzeroupper
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: store_blend_load_v8i32:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vmovdqa (%rdi), %ymm0
; AVX512VL-NEXT:    vmovdqa (%rsi), %ymm1
; AVX512VL-NEXT:    vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
; AVX512VL-NEXT:    vmovdqa32 %ymm0, %ymm1 {%k1}
; AVX512VL-NEXT:    vmovdqa %ymm1, (%rdx)
; AVX512VL-NEXT:    vzeroupper
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: store_blend_load_v8i32:
; XOP:       # %bb.0:
; XOP-NEXT:    vmovaps (%rsi), %ymm0
; XOP-NEXT:    vbroadcastss {{.*#+}} xmm1 = [7,7,7,7]
; XOP-NEXT:    vpcomltud 16(%rdi), %xmm1, %xmm2
; XOP-NEXT:    vpcomltud (%rdi), %xmm1, %xmm1
; XOP-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
; XOP-NEXT:    vblendvps %ymm1, (%rdi), %ymm0, %ymm0
; XOP-NEXT:    vmovaps %ymm0, (%rdx)
; XOP-NEXT:    vzeroupper
; XOP-NEXT:    retq
  %v0 = load <8 x i32>, ptr %a0
  %v1 = load <8 x i32>, ptr %a1
  %cmp = icmp ugt <8 x i32> %v0, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
  %res = select <8 x i1> %cmp, <8 x i32> %v0, <8 x i32> %v1
  store <8 x i32> %res, ptr %a2
  ret void
}

define void @store_blend_load_v16i16(ptr %a0, ptr %a1, ptr %a2) {
; AVX1-LABEL: store_blend_load_v16i16:
; AVX1:       # %bb.0:
; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm1
; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8]
; AVX1-NEXT:    vpmaxuw %xmm2, %xmm0, %xmm3
; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm0, %xmm3
; AVX1-NEXT:    vpmaxuw %xmm2, %xmm1, %xmm2
; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm1, %xmm2
; AVX1-NEXT:    vmovdqa (%rsi), %xmm4
; AVX1-NEXT:    vmovdqa 16(%rsi), %xmm5
; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm5, %xmm1
; AVX1-NEXT:    vpblendvb %xmm3, %xmm0, %xmm4, %xmm0
; AVX1-NEXT:    vmovdqa %xmm0, (%rdx)
; AVX1-NEXT:    vmovdqa %xmm1, 16(%rdx)
; AVX1-NEXT:    retq
;
; AVX2-LABEL: store_blend_load_v16i16:
; AVX2:       # %bb.0:
; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
; AVX2-NEXT:    vmovdqa (%rsi), %ymm1
; AVX2-NEXT:    vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2
; AVX2-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm2
; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT:    vmovdqa %ymm0, (%rdx)
; AVX2-NEXT:    vzeroupper
; AVX2-NEXT:    retq
;
; AVX512F-LABEL: store_blend_load_v16i16:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vmovdqa (%rdi), %ymm0
; AVX512F-NEXT:    vmovdqa (%rsi), %ymm1
; AVX512F-NEXT:    vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2
; AVX512F-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm2
; AVX512F-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX512F-NEXT:    vmovdqa %ymm0, (%rdx)
; AVX512F-NEXT:    vzeroupper
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: store_blend_load_v16i16:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vmovdqa (%rdi), %ymm0
; AVX512VL-NEXT:    vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
; AVX512VL-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm1
; AVX512VL-NEXT:    vpternlogq $202, (%rsi), %ymm0, %ymm1
; AVX512VL-NEXT:    vmovdqa %ymm1, (%rdx)
; AVX512VL-NEXT:    vzeroupper
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: store_blend_load_v16i16:
; XOP:       # %bb.0:
; XOP-NEXT:    vmovdqa (%rdi), %ymm0
; XOP-NEXT:    vbroadcastss {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7]
; XOP-NEXT:    vpcomltuw 16(%rdi), %xmm1, %xmm2
; XOP-NEXT:    vpcomltuw (%rdi), %xmm1, %xmm1
; XOP-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
; XOP-NEXT:    vpcmov %ymm1, (%rsi), %ymm0, %ymm0
; XOP-NEXT:    vmovdqa %ymm0, (%rdx)
; XOP-NEXT:    vzeroupper
; XOP-NEXT:    retq
  %v0 = load <16 x i16>, ptr %a0
  %v1 = load <16 x i16>, ptr %a1
  %cmp = icmp ugt <16 x i16> %v0, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
  %res = select <16 x i1> %cmp, <16 x i16> %v0, <16 x i16> %v1
  store <16 x i16> %res, ptr %a2
  ret void
}

define void @store_blend_load_v32i8(ptr %a0, ptr %a1, ptr %a2) {
; AVX1-LABEL: store_blend_load_v32i8:
; AVX1:       # %bb.0:
; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm1
; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; AVX1-NEXT:    vpmaxub %xmm2, %xmm0, %xmm3
; AVX1-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm3
; AVX1-NEXT:    vpmaxub %xmm2, %xmm1, %xmm2
; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm2
; AVX1-NEXT:    vmovdqa (%rsi), %xmm4
; AVX1-NEXT:    vmovdqa 16(%rsi), %xmm5
; AVX1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm5, %xmm1
; AVX1-NEXT:    vpblendvb %xmm3, %xmm0, %xmm4, %xmm0
; AVX1-NEXT:    vmovdqa %xmm0, (%rdx)
; AVX1-NEXT:    vmovdqa %xmm1, 16(%rdx)
; AVX1-NEXT:    retq
;
; AVX2-LABEL: store_blend_load_v32i8:
; AVX2:       # %bb.0:
; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
; AVX2-NEXT:    vmovdqa (%rsi), %ymm1
; AVX2-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2
; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm2
; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT:    vmovdqa %ymm0, (%rdx)
; AVX2-NEXT:    vzeroupper
; AVX2-NEXT:    retq
;
; AVX512F-LABEL: store_blend_load_v32i8:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vmovdqa (%rdi), %ymm0
; AVX512F-NEXT:    vmovdqa (%rsi), %ymm1
; AVX512F-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2
; AVX512F-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm2
; AVX512F-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX512F-NEXT:    vmovdqa %ymm0, (%rdx)
; AVX512F-NEXT:    vzeroupper
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: store_blend_load_v32i8:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vmovdqa (%rdi), %ymm0
; AVX512VL-NEXT:    vpmaxub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
; AVX512VL-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm1
; AVX512VL-NEXT:    vpternlogq $202, (%rsi), %ymm0, %ymm1
; AVX512VL-NEXT:    vmovdqa %ymm1, (%rdx)
; AVX512VL-NEXT:    vzeroupper
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: store_blend_load_v32i8:
; XOP:       # %bb.0:
; XOP-NEXT:    vmovdqa (%rdi), %ymm0
; XOP-NEXT:    vbroadcastss {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
; XOP-NEXT:    vpcomltub 16(%rdi), %xmm1, %xmm2
; XOP-NEXT:    vpcomltub (%rdi), %xmm1, %xmm1
; XOP-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
; XOP-NEXT:    vpcmov %ymm1, (%rsi), %ymm0, %ymm0
; XOP-NEXT:    vmovdqa %ymm0, (%rdx)
; XOP-NEXT:    vzeroupper
; XOP-NEXT:    retq
  %v0 = load <32 x i8>, ptr %a0
  %v1 = load <32 x i8>, ptr %a1
  %cmp = icmp ugt <32 x i8> %v0, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
  %res = select <32 x i1> %cmp, <32 x i8> %v0, <32 x i8> %v1
  store <32 x i8> %res, ptr %a2
  ret void
}

define void @PR46531(ptr %x, ptr %y, ptr %z) {
; AVX12-LABEL: PR46531:
; AVX12:       # %bb.0:
; AVX12-NEXT:    vmovdqu (%rsi), %xmm0
; AVX12-NEXT:    vmovdqu (%rdx), %xmm1
; AVX12-NEXT:    vpor %xmm0, %xmm1, %xmm2
; AVX12-NEXT:    vpxor %xmm0, %xmm1, %xmm0
; AVX12-NEXT:    vpslld $31, %xmm1, %xmm1
; AVX12-NEXT:    vblendvps %xmm1, %xmm0, %xmm2, %xmm0
; AVX12-NEXT:    vmovups %xmm0, (%rdi)
; AVX12-NEXT:    retq
;
; AVX512F-LABEL: PR46531:
; AVX512F:       # %bb.0:
; AVX512F-NEXT:    vmovdqu (%rsi), %xmm0
; AVX512F-NEXT:    vmovdqu (%rdx), %xmm1
; AVX512F-NEXT:    vpor %xmm0, %xmm1, %xmm2
; AVX512F-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %k1
; AVX512F-NEXT:    vpxor %xmm0, %xmm1, %xmm0
; AVX512F-NEXT:    vmovdqa32 %zmm2, %zmm0 {%k1}
; AVX512F-NEXT:    vmovdqu %xmm0, (%rdi)
; AVX512F-NEXT:    vzeroupper
; AVX512F-NEXT:    retq
;
; AVX512VL-LABEL: PR46531:
; AVX512VL:       # %bb.0:
; AVX512VL-NEXT:    vmovdqu (%rsi), %xmm0
; AVX512VL-NEXT:    vmovdqu (%rdx), %xmm1
; AVX512VL-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %k1
; AVX512VL-NEXT:    vpxor %xmm0, %xmm1, %xmm2
; AVX512VL-NEXT:    vpord %xmm0, %xmm1, %xmm2 {%k1}
; AVX512VL-NEXT:    vmovdqu %xmm2, (%rdi)
; AVX512VL-NEXT:    retq
;
; XOP-LABEL: PR46531:
; XOP:       # %bb.0:
; XOP-NEXT:    vmovdqu (%rsi), %xmm0
; XOP-NEXT:    vmovdqu (%rdx), %xmm1
; XOP-NEXT:    vpor %xmm0, %xmm1, %xmm2
; XOP-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm3
; XOP-NEXT:    vpxor %xmm4, %xmm4, %xmm4
; XOP-NEXT:    vpcomneqd %xmm4, %xmm3, %xmm3
; XOP-NEXT:    vpxor %xmm0, %xmm1, %xmm0
; XOP-NEXT:    vblendvps %xmm3, %xmm0, %xmm2, %xmm0
; XOP-NEXT:    vmovups %xmm0, (%rdi)
; XOP-NEXT:    retq
  %a = load <4 x i32>, ptr %y, align 4
  %b = load <4 x i32>, ptr %z, align 4
  %or = or <4 x i32> %b, %a
  %and = and <4 x i32> %b, <i32 1, i32 1, i32 1, i32 1>
  %cmp = icmp eq <4 x i32> %and, zeroinitializer
  %xor = xor <4 x i32> %b, %a
  %sel = select <4 x i1> %cmp, <4 x i32> %or, <4 x i32> %xor
  store <4 x i32> %sel, ptr %x, align 4
  ret void
}

attributes #0 = { "no-nans-fp-math"="true" }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; AVX: {{.*}}