llvm/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

;
; ASHR - Immediate
;

define <8 x i16> @sse2_psrai_w_0(<8 x i16> %v) {
; CHECK-LABEL: @sse2_psrai_w_0(
; CHECK-NEXT:    ret <8 x i16> [[V:%.*]]
;
  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 0)
  ret <8 x i16> %1
}

define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) {
; CHECK-LABEL: @sse2_psrai_w_15(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
;
  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 15)
  ret <8 x i16> %1
}

define <8 x i16> @sse2_psrai_w_64(<8 x i16> %v) {
; CHECK-LABEL: @sse2_psrai_w_64(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
;
  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 64)
  ret <8 x i16> %1
}

define <4 x i32> @sse2_psrai_d_0(<4 x i32> %v) {
; CHECK-LABEL: @sse2_psrai_d_0(
; CHECK-NEXT:    ret <4 x i32> [[V:%.*]]
;
  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 0)
  ret <4 x i32> %1
}

define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) {
; CHECK-LABEL: @sse2_psrai_d_15(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 15)
  ret <4 x i32> %1
}

define <4 x i32> @sse2_psrai_d_64(<4 x i32> %v) {
; CHECK-LABEL: @sse2_psrai_d_64(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 31>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 64)
  ret <4 x i32> %1
}

define <16 x i16> @avx2_psrai_w_0(<16 x i16> %v) {
; CHECK-LABEL: @avx2_psrai_w_0(
; CHECK-NEXT:    ret <16 x i16> [[V:%.*]]
;
  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 0)
  ret <16 x i16> %1
}

define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) {
; CHECK-LABEL: @avx2_psrai_w_15(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
;
  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 15)
  ret <16 x i16> %1
}

define <16 x i16> @avx2_psrai_w_64(<16 x i16> %v) {
; CHECK-LABEL: @avx2_psrai_w_64(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
;
  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 64)
  ret <16 x i16> %1
}

define <8 x i32> @avx2_psrai_d_0(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psrai_d_0(
; CHECK-NEXT:    ret <8 x i32> [[V:%.*]]
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 0)
  ret <8 x i32> %1
}

define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psrai_d_15(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 15)
  ret <8 x i32> %1
}

define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psrai_d_64(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 64)
  ret <8 x i32> %1
}

define <2 x i64> @avx512_psrai_q_128_0(<2 x i64> %v) {
; CHECK-LABEL: @avx512_psrai_q_128_0(
; CHECK-NEXT:    ret <2 x i64> [[V:%.*]]
;
  %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 0)
  ret <2 x i64> %1
}

define <2 x i64> @avx512_psrai_q_128_15(<2 x i64> %v) {
; CHECK-LABEL: @avx512_psrai_q_128_15(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], <i64 15, i64 15>
; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
;
  %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 15)
  ret <2 x i64> %1
}

define <2 x i64> @avx512_psrai_q_128_64(<2 x i64> %v) {
; CHECK-LABEL: @avx512_psrai_q_128_64(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], <i64 63, i64 63>
; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
;
  %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 64)
  ret <2 x i64> %1
}

define <4 x i64> @avx512_psrai_q_256_0(<4 x i64> %v) {
; CHECK-LABEL: @avx512_psrai_q_256_0(
; CHECK-NEXT:    ret <4 x i64> [[V:%.*]]
;
  %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 0)
  ret <4 x i64> %1
}

define <4 x i64> @avx512_psrai_q_256_15(<4 x i64> %v) {
; CHECK-LABEL: @avx512_psrai_q_256_15(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15>
; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
;
  %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 15)
  ret <4 x i64> %1
}

define <4 x i64> @avx512_psrai_q_256_64(<4 x i64> %v) {
; CHECK-LABEL: @avx512_psrai_q_256_64(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], <i64 63, i64 63, i64 63, i64 63>
; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
;
  %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 64)
  ret <4 x i64> %1
}

define <32 x i16> @avx512_psrai_w_512_0(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psrai_w_512_0(
; CHECK-NEXT:    ret <32 x i16> [[V:%.*]]
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 0)
  ret <32 x i16> %1
}

define <32 x i16> @avx512_psrai_w_512_15(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psrai_w_512_15(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 15)
  ret <32 x i16> %1
}

define <32 x i16> @avx512_psrai_w_512_64(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psrai_w_512_64(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 64)
  ret <32 x i16> %1
}

define <16 x i32> @avx512_psrai_d_512_0(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psrai_d_512_0(
; CHECK-NEXT:    ret <16 x i32> [[V:%.*]]
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 0)
  ret <16 x i32> %1
}

define <16 x i32> @avx512_psrai_d_512_15(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psrai_d_512_15(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 15)
  ret <16 x i32> %1
}

define <16 x i32> @avx512_psrai_d_512_64(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psrai_d_512_64(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 64)
  ret <16 x i32> %1
}

define <8 x i64> @avx512_psrai_q_512_0(<8 x i64> %v) {
; CHECK-LABEL: @avx512_psrai_q_512_0(
; CHECK-NEXT:    ret <8 x i64> [[V:%.*]]
;
  %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 0)
  ret <8 x i64> %1
}

define <8 x i64> @avx512_psrai_q_512_15(<8 x i64> %v) {
; CHECK-LABEL: @avx512_psrai_q_512_15(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
;
  %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 15)
  ret <8 x i64> %1
}

define <8 x i64> @avx512_psrai_q_512_64(<8 x i64> %v) {
; CHECK-LABEL: @avx512_psrai_q_512_64(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>
; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
;
  %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 64)
  ret <8 x i64> %1
}

;
; LSHR - Immediate
;

define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) {
; CHECK-LABEL: @sse2_psrli_w_0(
; CHECK-NEXT:    ret <8 x i16> [[V:%.*]]
;
  %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 0)
  ret <8 x i16> %1
}

define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) {
; CHECK-LABEL: @sse2_psrli_w_15(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
;
  %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15)
  ret <8 x i16> %1
}

define <8 x i16> @sse2_psrli_w_64(<8 x i16> %v) {
; CHECK-LABEL: @sse2_psrli_w_64(
; CHECK-NEXT:    ret <8 x i16> zeroinitializer
;
  %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 64)
  ret <8 x i16> %1
}

define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) {
; CHECK-LABEL: @sse2_psrli_d_0(
; CHECK-NEXT:    ret <4 x i32> [[V:%.*]]
;
  %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 0)
  ret <4 x i32> %1
}

define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) {
; CHECK-LABEL: @sse2_psrli_d_15(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15)
  ret <4 x i32> %1
}

define <4 x i32> @sse2_psrli_d_64(<4 x i32> %v) {
; CHECK-LABEL: @sse2_psrli_d_64(
; CHECK-NEXT:    ret <4 x i32> zeroinitializer
;
  %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 64)
  ret <4 x i32> %1
}

define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) {
; CHECK-LABEL: @sse2_psrli_q_0(
; CHECK-NEXT:    ret <2 x i64> [[V:%.*]]
;
  %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 0)
  ret <2 x i64> %1
}

define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) {
; CHECK-LABEL: @sse2_psrli_q_15(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], <i64 15, i64 15>
; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
;
  %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15)
  ret <2 x i64> %1
}

define <2 x i64> @sse2_psrli_q_64(<2 x i64> %v) {
; CHECK-LABEL: @sse2_psrli_q_64(
; CHECK-NEXT:    ret <2 x i64> zeroinitializer
;
  %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 64)
  ret <2 x i64> %1
}

define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) {
; CHECK-LABEL: @avx2_psrli_w_0(
; CHECK-NEXT:    ret <16 x i16> [[V:%.*]]
;
  %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 0)
  ret <16 x i16> %1
}

define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) {
; CHECK-LABEL: @avx2_psrli_w_15(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
;
  %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15)
  ret <16 x i16> %1
}

define <16 x i16> @avx2_psrli_w_64(<16 x i16> %v) {
; CHECK-LABEL: @avx2_psrli_w_64(
; CHECK-NEXT:    ret <16 x i16> zeroinitializer
;
  %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 64)
  ret <16 x i16> %1
}

define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psrli_d_0(
; CHECK-NEXT:    ret <8 x i32> [[V:%.*]]
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 0)
  ret <8 x i32> %1
}

define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psrli_d_15(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15)
  ret <8 x i32> %1
}

define <8 x i32> @avx2_psrli_d_64(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psrli_d_64(
; CHECK-NEXT:    ret <8 x i32> zeroinitializer
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 64)
  ret <8 x i32> %1
}

define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) {
; CHECK-LABEL: @avx2_psrli_q_0(
; CHECK-NEXT:    ret <4 x i64> [[V:%.*]]
;
  %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 0)
  ret <4 x i64> %1
}

define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) {
; CHECK-LABEL: @avx2_psrli_q_15(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15>
; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
;
  %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15)
  ret <4 x i64> %1
}

define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) {
; CHECK-LABEL: @avx2_psrli_q_64(
; CHECK-NEXT:    ret <4 x i64> zeroinitializer
;
  %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 64)
  ret <4 x i64> %1
}

define <32 x i16> @avx512_psrli_w_512_0(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psrli_w_512_0(
; CHECK-NEXT:    ret <32 x i16> [[V:%.*]]
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 0)
  ret <32 x i16> %1
}

define <32 x i16> @avx512_psrli_w_512_15(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psrli_w_512_15(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 15)
  ret <32 x i16> %1
}

define <32 x i16> @avx512_psrli_w_512_64(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psrli_w_512_64(
; CHECK-NEXT:    ret <32 x i16> zeroinitializer
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 64)
  ret <32 x i16> %1
}

define <16 x i32> @avx512_psrli_d_512_0(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psrli_d_512_0(
; CHECK-NEXT:    ret <16 x i32> [[V:%.*]]
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 0)
  ret <16 x i32> %1
}

define <16 x i32> @avx512_psrli_d_512_15(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psrli_d_512_15(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 15)
  ret <16 x i32> %1
}

define <16 x i32> @avx512_psrli_d_512_64(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psrli_d_512_64(
; CHECK-NEXT:    ret <16 x i32> zeroinitializer
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 64)
  ret <16 x i32> %1
}

define <8 x i64> @avx512_psrli_q_512_0(<8 x i64> %v) {
; CHECK-LABEL: @avx512_psrli_q_512_0(
; CHECK-NEXT:    ret <8 x i64> [[V:%.*]]
;
  %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 0)
  ret <8 x i64> %1
}

define <8 x i64> @avx512_psrli_q_512_15(<8 x i64> %v) {
; CHECK-LABEL: @avx512_psrli_q_512_15(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
;
  %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 15)
  ret <8 x i64> %1
}

define <8 x i64> @avx512_psrli_q_512_64(<8 x i64> %v) {
; CHECK-LABEL: @avx512_psrli_q_512_64(
; CHECK-NEXT:    ret <8 x i64> zeroinitializer
;
  %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 64)
  ret <8 x i64> %1
}

;
; SHL - Immediate
;

define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) {
; CHECK-LABEL: @sse2_pslli_w_0(
; CHECK-NEXT:    ret <8 x i16> [[V:%.*]]
;
  %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 0)
  ret <8 x i16> %1
}

define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) {
; CHECK-LABEL: @sse2_pslli_w_15(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
;
  %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15)
  ret <8 x i16> %1
}

define <8 x i16> @sse2_pslli_w_64(<8 x i16> %v) {
; CHECK-LABEL: @sse2_pslli_w_64(
; CHECK-NEXT:    ret <8 x i16> zeroinitializer
;
  %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 64)
  ret <8 x i16> %1
}

define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) {
; CHECK-LABEL: @sse2_pslli_d_0(
; CHECK-NEXT:    ret <4 x i32> [[V:%.*]]
;
  %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 0)
  ret <4 x i32> %1
}

define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) {
; CHECK-LABEL: @sse2_pslli_d_15(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15)
  ret <4 x i32> %1
}

define <4 x i32> @sse2_pslli_d_64(<4 x i32> %v) {
; CHECK-LABEL: @sse2_pslli_d_64(
; CHECK-NEXT:    ret <4 x i32> zeroinitializer
;
  %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 64)
  ret <4 x i32> %1
}

define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) {
; CHECK-LABEL: @sse2_pslli_q_0(
; CHECK-NEXT:    ret <2 x i64> [[V:%.*]]
;
  %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 0)
  ret <2 x i64> %1
}

define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) {
; CHECK-LABEL: @sse2_pslli_q_15(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], <i64 15, i64 15>
; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
;
  %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15)
  ret <2 x i64> %1
}

define <2 x i64> @sse2_pslli_q_64(<2 x i64> %v) {
; CHECK-LABEL: @sse2_pslli_q_64(
; CHECK-NEXT:    ret <2 x i64> zeroinitializer
;
  %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 64)
  ret <2 x i64> %1
}

define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) {
; CHECK-LABEL: @avx2_pslli_w_0(
; CHECK-NEXT:    ret <16 x i16> [[V:%.*]]
;
  %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 0)
  ret <16 x i16> %1
}

define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) {
; CHECK-LABEL: @avx2_pslli_w_15(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
;
  %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15)
  ret <16 x i16> %1
}

define <16 x i16> @avx2_pslli_w_64(<16 x i16> %v) {
; CHECK-LABEL: @avx2_pslli_w_64(
; CHECK-NEXT:    ret <16 x i16> zeroinitializer
;
  %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 64)
  ret <16 x i16> %1
}

define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) {
; CHECK-LABEL: @avx2_pslli_d_0(
; CHECK-NEXT:    ret <8 x i32> [[V:%.*]]
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 0)
  ret <8 x i32> %1
}

define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) {
; CHECK-LABEL: @avx2_pslli_d_15(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15)
  ret <8 x i32> %1
}

define <8 x i32> @avx2_pslli_d_64(<8 x i32> %v) {
; CHECK-LABEL: @avx2_pslli_d_64(
; CHECK-NEXT:    ret <8 x i32> zeroinitializer
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 64)
  ret <8 x i32> %1
}

define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) {
; CHECK-LABEL: @avx2_pslli_q_0(
; CHECK-NEXT:    ret <4 x i64> [[V:%.*]]
;
  %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 0)
  ret <4 x i64> %1
}

define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) {
; CHECK-LABEL: @avx2_pslli_q_15(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15>
; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
;
  %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15)
  ret <4 x i64> %1
}

define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) {
; CHECK-LABEL: @avx2_pslli_q_64(
; CHECK-NEXT:    ret <4 x i64> zeroinitializer
;
  %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 64)
  ret <4 x i64> %1
}

define <32 x i16> @avx512_pslli_w_512_0(<32 x i16> %v) {
; CHECK-LABEL: @avx512_pslli_w_512_0(
; CHECK-NEXT:    ret <32 x i16> [[V:%.*]]
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 0)
  ret <32 x i16> %1
}

define <32 x i16> @avx512_pslli_w_512_15(<32 x i16> %v) {
; CHECK-LABEL: @avx512_pslli_w_512_15(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 15)
  ret <32 x i16> %1
}

define <32 x i16> @avx512_pslli_w_512_64(<32 x i16> %v) {
; CHECK-LABEL: @avx512_pslli_w_512_64(
; CHECK-NEXT:    ret <32 x i16> zeroinitializer
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 64)
  ret <32 x i16> %1
}

define <16 x i32> @avx512_pslli_d_512_0(<16 x i32> %v) {
; CHECK-LABEL: @avx512_pslli_d_512_0(
; CHECK-NEXT:    ret <16 x i32> [[V:%.*]]
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 0)
  ret <16 x i32> %1
}

define <16 x i32> @avx512_pslli_d_512_15(<16 x i32> %v) {
; CHECK-LABEL: @avx512_pslli_d_512_15(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 15)
  ret <16 x i32> %1
}

define <16 x i32> @avx512_pslli_d_512_64(<16 x i32> %v) {
; CHECK-LABEL: @avx512_pslli_d_512_64(
; CHECK-NEXT:    ret <16 x i32> zeroinitializer
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 64)
  ret <16 x i32> %1
}

define <8 x i64> @avx512_pslli_q_512_0(<8 x i64> %v) {
; CHECK-LABEL: @avx512_pslli_q_512_0(
; CHECK-NEXT:    ret <8 x i64> [[V:%.*]]
;
  %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 0)
  ret <8 x i64> %1
}

define <8 x i64> @avx512_pslli_q_512_15(<8 x i64> %v) {
; CHECK-LABEL: @avx512_pslli_q_512_15(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
;
  %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 15)
  ret <8 x i64> %1
}

define <8 x i64> @avx512_pslli_q_512_64(<8 x i64> %v) {
; CHECK-LABEL: @avx512_pslli_q_512_64(
; CHECK-NEXT:    ret <8 x i64> zeroinitializer
;
  %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 64)
  ret <8 x i64> %1
}

;
; ASHR - Constant Vector
;

define <8 x i16> @sse2_psra_w_0(<8 x i16> %v) {
; CHECK-LABEL: @sse2_psra_w_0(
; CHECK-NEXT:    ret <8 x i16> [[V:%.*]]
;
  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> zeroinitializer)
  ret <8 x i16> %1
}

define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) {
; CHECK-LABEL: @sse2_psra_w_15(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
;
  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
  ret <8 x i16> %1
}

define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) {
; CHECK-LABEL: @sse2_psra_w_15_splat(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
;
  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
  ret <8 x i16> %1
}

define <8 x i16> @sse2_psra_w_64(<8 x i16> %v) {
; CHECK-LABEL: @sse2_psra_w_64(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
;
  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
  ret <8 x i16> %1
}

define <4 x i32> @sse2_psra_d_0(<4 x i32> %v) {
; CHECK-LABEL: @sse2_psra_d_0(
; CHECK-NEXT:    ret <4 x i32> [[V:%.*]]
;
  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> zeroinitializer)
  ret <4 x i32> %1
}

define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) {
; CHECK-LABEL: @sse2_psra_d_15(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
  ret <4 x i32> %1
}

define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) {
; CHECK-LABEL: @sse2_psra_d_15_splat(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 31>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
  ret <4 x i32> %1
}

define <4 x i32> @sse2_psra_d_64(<4 x i32> %v) {
; CHECK-LABEL: @sse2_psra_d_64(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 31>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
  ret <4 x i32> %1
}

define <16 x i16> @avx2_psra_w_0(<16 x i16> %v) {
; CHECK-LABEL: @avx2_psra_w_0(
; CHECK-NEXT:    ret <16 x i16> [[V:%.*]]
;
  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> zeroinitializer)
  ret <16 x i16> %1
}

define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) {
; CHECK-LABEL: @avx2_psra_w_15(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
;
  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
  ret <16 x i16> %1
}

define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) {
; CHECK-LABEL: @avx2_psra_w_15_splat(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
;
  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
  ret <16 x i16> %1
}

define <16 x i16> @avx2_psra_w_64(<16 x i16> %v) {
; CHECK-LABEL: @avx2_psra_w_64(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
;
  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
  ret <16 x i16> %1
}

define <8 x i32> @avx2_psra_d_0(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psra_d_0(
; CHECK-NEXT:    ret <8 x i32> [[V:%.*]]
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> zeroinitializer)
  ret <8 x i32> %1
}

define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psra_d_15(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
  ret <8 x i32> %1
}

define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psra_d_15_splat(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
  ret <8 x i32> %1
}

define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psra_d_64(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
  ret <8 x i32> %1
}

define <2 x i64> @avx512_psra_q_128_0(<2 x i64> %v) {
; CHECK-LABEL: @avx512_psra_q_128_0(
; CHECK-NEXT:    ret <2 x i64> [[V:%.*]]
;
  %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> zeroinitializer)
  ret <2 x i64> %1
}

define <2 x i64> @avx512_psra_q_128_15(<2 x i64> %v) {
; CHECK-LABEL: @avx512_psra_q_128_15(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], <i64 15, i64 15>
; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
;
  %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
  ret <2 x i64> %1
}

define <2 x i64> @avx512_psra_q_128_64(<2 x i64> %v) {
; CHECK-LABEL: @avx512_psra_q_128_64(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], <i64 63, i64 63>
; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
;
  %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
  ret <2 x i64> %1
}

define <4 x i64> @avx512_psra_q_256_0(<4 x i64> %v) {
; CHECK-LABEL: @avx512_psra_q_256_0(
; CHECK-NEXT:    ret <4 x i64> [[V:%.*]]
;
  %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> zeroinitializer)
  ret <4 x i64> %1
}

define <4 x i64> @avx512_psra_q_256_15(<4 x i64> %v) {
; CHECK-LABEL: @avx512_psra_q_256_15(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15>
; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
;
  %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
  ret <4 x i64> %1
}

define <4 x i64> @avx512_psra_q_256_64(<4 x i64> %v) {
; CHECK-LABEL: @avx512_psra_q_256_64(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], <i64 63, i64 63, i64 63, i64 63>
; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
;
  %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
  ret <4 x i64> %1
}

define <32 x i16> @avx512_psra_w_512_0(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psra_w_512_0(
; CHECK-NEXT:    ret <32 x i16> [[V:%.*]]
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> zeroinitializer)
  ret <32 x i16> %1
}

define <32 x i16> @avx512_psra_w_512_15(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psra_w_512_15(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
  ret <32 x i16> %1
}

define <32 x i16> @avx512_psra_w_512_15_splat(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psra_w_512_15_splat(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
  ret <32 x i16> %1
}

define <32 x i16> @avx512_psra_w_512_64(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psra_w_512_64(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
  ret <32 x i16> %1
}

define <16 x i32> @avx512_psra_d_512_0(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psra_d_512_0(
; CHECK-NEXT:    ret <16 x i32> [[V:%.*]]
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> zeroinitializer)
  ret <16 x i32> %1
}

define <16 x i32> @avx512_psra_d_512_15(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psra_d_512_15(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
  ret <16 x i32> %1
}

define <16 x i32> @avx512_psra_d_512_15_splat(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psra_d_512_15_splat(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
  ret <16 x i32> %1
}

define <16 x i32> @avx512_psra_d_512_64(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psra_d_512_64(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
  ret <16 x i32> %1
}

define <8 x i64> @avx512_psra_q_512_0(<8 x i64> %v) {
; CHECK-LABEL: @avx512_psra_q_512_0(
; CHECK-NEXT:    ret <8 x i64> [[V:%.*]]
;
  %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> zeroinitializer)
  ret <8 x i64> %1
}

define <8 x i64> @avx512_psra_q_512_15(<8 x i64> %v) {
; CHECK-LABEL: @avx512_psra_q_512_15(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
;
  %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> <i64 15, i64 9999>)
  ret <8 x i64> %1
}

define <8 x i64> @avx512_psra_q_512_64(<8 x i64> %v) {
; CHECK-LABEL: @avx512_psra_q_512_64(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>
; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
;
  %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> <i64 64, i64 9999>)
  ret <8 x i64> %1
}

;
; LSHR - Constant Vector
;

define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) {
; CHECK-LABEL: @sse2_psrl_w_0(
; CHECK-NEXT:    ret <8 x i16> [[V:%.*]]
;
  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer)
  ret <8 x i16> %1
}

define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) {
; CHECK-LABEL: @sse2_psrl_w_15(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
;
  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
  ret <8 x i16> %1
}

define <8 x i16> @sse2_psrl_w_15_splat(<8 x i16> %v) {
; CHECK-LABEL: @sse2_psrl_w_15_splat(
; CHECK-NEXT:    ret <8 x i16> zeroinitializer
;
  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
  ret <8 x i16> %1
}

define <8 x i16> @sse2_psrl_w_64(<8 x i16> %v) {
; CHECK-LABEL: @sse2_psrl_w_64(
; CHECK-NEXT:    ret <8 x i16> zeroinitializer
;
  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
  ret <8 x i16> %1
}

define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) {
; CHECK-LABEL: @sse2_psrl_d_0(
; CHECK-NEXT:    ret <4 x i32> [[V:%.*]]
;
  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer)
  ret <4 x i32> %1
}

define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) {
; CHECK-LABEL: @sse2_psrl_d_15(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
  ret <4 x i32> %1
}

define <4 x i32> @sse2_psrl_d_15_splat(<4 x i32> %v) {
; CHECK-LABEL: @sse2_psrl_d_15_splat(
; CHECK-NEXT:    ret <4 x i32> zeroinitializer
;
  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
  ret <4 x i32> %1
}

define <4 x i32> @sse2_psrl_d_64(<4 x i32> %v) {
; CHECK-LABEL: @sse2_psrl_d_64(
; CHECK-NEXT:    ret <4 x i32> zeroinitializer
;
  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
  ret <4 x i32> %1
}

define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) {
; CHECK-LABEL: @sse2_psrl_q_0(
; CHECK-NEXT:    ret <2 x i64> [[V:%.*]]
;
  %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer)
  ret <2 x i64> %1
}

define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) {
; CHECK-LABEL: @sse2_psrl_q_15(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], <i64 15, i64 15>
; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
;
  %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
  ret <2 x i64> %1
}

define <2 x i64> @sse2_psrl_q_64(<2 x i64> %v) {
; CHECK-LABEL: @sse2_psrl_q_64(
; CHECK-NEXT:    ret <2 x i64> zeroinitializer
;
  %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
  ret <2 x i64> %1
}

define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) {
; CHECK-LABEL: @avx2_psrl_w_0(
; CHECK-NEXT:    ret <16 x i16> [[V:%.*]]
;
  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer)
  ret <16 x i16> %1
}

define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) {
; CHECK-LABEL: @avx2_psrl_w_15(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
;
  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
  ret <16 x i16> %1
}

define <16 x i16> @avx2_psrl_w_15_splat(<16 x i16> %v) {
; CHECK-LABEL: @avx2_psrl_w_15_splat(
; CHECK-NEXT:    ret <16 x i16> zeroinitializer
;
  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
  ret <16 x i16> %1
}

define <16 x i16> @avx2_psrl_w_64(<16 x i16> %v) {
; CHECK-LABEL: @avx2_psrl_w_64(
; CHECK-NEXT:    ret <16 x i16> zeroinitializer
;
  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
  ret <16 x i16> %1
}

define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psrl_d_0(
; CHECK-NEXT:    ret <8 x i32> [[V:%.*]]
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer)
  ret <8 x i32> %1
}

define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psrl_d_15(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
  ret <8 x i32> %1
}

define <8 x i32> @avx2_psrl_d_15_splat(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psrl_d_15_splat(
; CHECK-NEXT:    ret <8 x i32> zeroinitializer
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
  ret <8 x i32> %1
}

define <8 x i32> @avx2_psrl_d_64(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psrl_d_64(
; CHECK-NEXT:    ret <8 x i32> zeroinitializer
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
  ret <8 x i32> %1
}

define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) {
; CHECK-LABEL: @avx2_psrl_q_0(
; CHECK-NEXT:    ret <4 x i64> [[V:%.*]]
;
  %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer)
  ret <4 x i64> %1
}

define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) {
; CHECK-LABEL: @avx2_psrl_q_15(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15>
; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
;
  %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
  ret <4 x i64> %1
}

define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) {
; CHECK-LABEL: @avx2_psrl_q_64(
; CHECK-NEXT:    ret <4 x i64> zeroinitializer
;
  %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
  ret <4 x i64> %1
}

define <32 x i16> @avx512_psrl_w_512_0(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psrl_w_512_0(
; CHECK-NEXT:    ret <32 x i16> [[V:%.*]]
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> zeroinitializer)
  ret <32 x i16> %1
}

define <32 x i16> @avx512_psrl_w_512_15(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psrl_w_512_15(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
  ret <32 x i16> %1
}

define <32 x i16> @avx512_psrl_w_512_15_splat(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psrl_w_512_15_splat(
; CHECK-NEXT:    ret <32 x i16> zeroinitializer
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
  ret <32 x i16> %1
}

define <32 x i16> @avx512_psrl_w_512_64(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psrl_w_512_64(
; CHECK-NEXT:    ret <32 x i16> zeroinitializer
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
  ret <32 x i16> %1
}

define <16 x i32> @avx512_psrl_d_512_0(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psrl_d_512_0(
; CHECK-NEXT:    ret <16 x i32> [[V:%.*]]
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> zeroinitializer)
  ret <16 x i32> %1
}

define <16 x i32> @avx512_psrl_d_512_15(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psrl_d_512_15(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
  ret <16 x i32> %1
}

define <16 x i32> @avx512_psrl_d_512_15_splat(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psrl_d_512_15_splat(
; CHECK-NEXT:    ret <16 x i32> zeroinitializer
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
  ret <16 x i32> %1
}

define <16 x i32> @avx512_psrl_d_512_64(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psrl_d_512_64(
; CHECK-NEXT:    ret <16 x i32> zeroinitializer
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
  ret <16 x i32> %1
}

define <8 x i64> @avx512_psrl_q_512_0(<8 x i64> %v) {
; CHECK-LABEL: @avx512_psrl_q_512_0(
; CHECK-NEXT:    ret <8 x i64> [[V:%.*]]
;
  %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> zeroinitializer)
  ret <8 x i64> %1
}

define <8 x i64> @avx512_psrl_q_512_15(<8 x i64> %v) {
; CHECK-LABEL: @avx512_psrl_q_512_15(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
;
  %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> <i64 15, i64 9999>)
  ret <8 x i64> %1
}

define <8 x i64> @avx512_psrl_q_512_64(<8 x i64> %v) {
; CHECK-LABEL: @avx512_psrl_q_512_64(
; CHECK-NEXT:    ret <8 x i64> zeroinitializer
;
  %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> <i64 64, i64 9999>)
  ret <8 x i64> %1
}

;
; SHL - Constant Vector
;

define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) {
; CHECK-LABEL: @sse2_psll_w_0(
; CHECK-NEXT:    ret <8 x i16> [[V:%.*]]
;
  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> zeroinitializer)
  ret <8 x i16> %1
}

define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) {
; CHECK-LABEL: @sse2_psll_w_15(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
;
  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
  ret <8 x i16> %1
}

define <8 x i16> @sse2_psll_w_15_splat(<8 x i16> %v) {
; CHECK-LABEL: @sse2_psll_w_15_splat(
; CHECK-NEXT:    ret <8 x i16> zeroinitializer
;
  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
  ret <8 x i16> %1
}

define <8 x i16> @sse2_psll_w_64(<8 x i16> %v) {
; CHECK-LABEL: @sse2_psll_w_64(
; CHECK-NEXT:    ret <8 x i16> zeroinitializer
;
  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
  ret <8 x i16> %1
}

define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) {
; CHECK-LABEL: @sse2_psll_d_0(
; CHECK-NEXT:    ret <4 x i32> [[V:%.*]]
;
  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> zeroinitializer)
  ret <4 x i32> %1
}

define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) {
; CHECK-LABEL: @sse2_psll_d_15(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
  ret <4 x i32> %1
}

define <4 x i32> @sse2_psll_d_15_splat(<4 x i32> %v) {
; CHECK-LABEL: @sse2_psll_d_15_splat(
; CHECK-NEXT:    ret <4 x i32> zeroinitializer
;
  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
  ret <4 x i32> %1
}

define <4 x i32> @sse2_psll_d_64(<4 x i32> %v) {
; CHECK-LABEL: @sse2_psll_d_64(
; CHECK-NEXT:    ret <4 x i32> zeroinitializer
;
  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
  ret <4 x i32> %1
}

define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) {
; CHECK-LABEL: @sse2_psll_q_0(
; CHECK-NEXT:    ret <2 x i64> [[V:%.*]]
;
  %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> zeroinitializer)
  ret <2 x i64> %1
}

define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) {
; CHECK-LABEL: @sse2_psll_q_15(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], <i64 15, i64 15>
; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
;
  %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
  ret <2 x i64> %1
}

define <2 x i64> @sse2_psll_q_64(<2 x i64> %v) {
; CHECK-LABEL: @sse2_psll_q_64(
; CHECK-NEXT:    ret <2 x i64> zeroinitializer
;
  %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
  ret <2 x i64> %1
}

define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) {
; CHECK-LABEL: @avx2_psll_w_0(
; CHECK-NEXT:    ret <16 x i16> [[V:%.*]]
;
  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> zeroinitializer)
  ret <16 x i16> %1
}

define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) {
; CHECK-LABEL: @avx2_psll_w_15(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
;
  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
  ret <16 x i16> %1
}

define <16 x i16> @avx2_psll_w_15_splat(<16 x i16> %v) {
; CHECK-LABEL: @avx2_psll_w_15_splat(
; CHECK-NEXT:    ret <16 x i16> zeroinitializer
;
  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
  ret <16 x i16> %1
}

define <16 x i16> @avx2_psll_w_64(<16 x i16> %v) {
; CHECK-LABEL: @avx2_psll_w_64(
; CHECK-NEXT:    ret <16 x i16> zeroinitializer
;
  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
  ret <16 x i16> %1
}

define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psll_d_0(
; CHECK-NEXT:    ret <8 x i32> [[V:%.*]]
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> zeroinitializer)
  ret <8 x i32> %1
}

define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psll_d_15(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
  ret <8 x i32> %1
}

define <8 x i32> @avx2_psll_d_15_splat(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psll_d_15_splat(
; CHECK-NEXT:    ret <8 x i32> zeroinitializer
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
  ret <8 x i32> %1
}

define <8 x i32> @avx2_psll_d_64(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psll_d_64(
; CHECK-NEXT:    ret <8 x i32> zeroinitializer
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
  ret <8 x i32> %1
}

define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) {
; CHECK-LABEL: @avx2_psll_q_0(
; CHECK-NEXT:    ret <4 x i64> [[V:%.*]]
;
  %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> zeroinitializer)
  ret <4 x i64> %1
}

define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) {
; CHECK-LABEL: @avx2_psll_q_15(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15>
; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
;
  %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
  ret <4 x i64> %1
}

define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) {
; CHECK-LABEL: @avx2_psll_q_64(
; CHECK-NEXT:    ret <4 x i64> zeroinitializer
;
  %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
  ret <4 x i64> %1
}

define <32 x i16> @avx512_psll_w_512_0(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psll_w_512_0(
; CHECK-NEXT:    ret <32 x i16> [[V:%.*]]
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> zeroinitializer)
  ret <32 x i16> %1
}

define <32 x i16> @avx512_psll_w_512_15(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psll_w_512_15(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
  ret <32 x i16> %1
}

define <32 x i16> @avx512_psll_w_15_512_splat(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psll_w_15_512_splat(
; CHECK-NEXT:    ret <32 x i16> zeroinitializer
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
  ret <32 x i16> %1
}

define <32 x i16> @avx512_psll_w_512_64(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psll_w_512_64(
; CHECK-NEXT:    ret <32 x i16> zeroinitializer
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
  ret <32 x i16> %1
}

define <16 x i32> @avx512_psll_d_512_0(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psll_d_512_0(
; CHECK-NEXT:    ret <16 x i32> [[V:%.*]]
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> zeroinitializer)
  ret <16 x i32> %1
}

define <16 x i32> @avx512_psll_d_512_15(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psll_d_512_15(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
  ret <16 x i32> %1
}

define <16 x i32> @avx512_psll_d_512_15_splat(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psll_d_512_15_splat(
; CHECK-NEXT:    ret <16 x i32> zeroinitializer
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
  ret <16 x i32> %1
}

define <16 x i32> @avx512_psll_d_512_64(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psll_d_512_64(
; CHECK-NEXT:    ret <16 x i32> zeroinitializer
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
  ret <16 x i32> %1
}

define <8 x i64> @avx512_psll_q_512_0(<8 x i64> %v) {
; CHECK-LABEL: @avx512_psll_q_512_0(
; CHECK-NEXT:    ret <8 x i64> [[V:%.*]]
;
  %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> zeroinitializer)
  ret <8 x i64> %1
}

define <8 x i64> @avx512_psll_q_512_15(<8 x i64> %v) {
; CHECK-LABEL: @avx512_psll_q_512_15(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
;
  %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> <i64 15, i64 9999>)
  ret <8 x i64> %1
}

define <8 x i64> @avx512_psll_q_512_64(<8 x i64> %v) {
; CHECK-LABEL: @avx512_psll_q_512_64(
; CHECK-NEXT:    ret <8 x i64> zeroinitializer
;
  %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> <i64 64, i64 9999>)
  ret <8 x i64> %1
}

;
; ASHR - Constant Per-Element Vector
;

define <4 x i32> @avx2_psrav_d_128_0(<4 x i32> %v) {
; CHECK-LABEL: @avx2_psrav_d_128_0(
; CHECK-NEXT:    ret <4 x i32> [[V:%.*]]
;
  %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> zeroinitializer)
  ret <4 x i32> %1
}

define <8 x i32> @avx2_psrav_d_256_0(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psrav_d_256_0(
; CHECK-NEXT:    ret <8 x i32> [[V:%.*]]
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> zeroinitializer)
  ret <8 x i32> %1
}

define <16 x i32> @avx512_psrav_d_512_0(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psrav_d_512_0(
; CHECK-NEXT:    ret <16 x i32> [[V:%.*]]
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> zeroinitializer)
  ret <16 x i32> %1
}

define <4 x i32> @avx2_psrav_d_128_var(<4 x i32> %v) {
; CHECK-LABEL: @avx2_psrav_d_128_var(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 31>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
  ret <4 x i32> %1
}

define <8 x i32> @avx2_psrav_d_256_var(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psrav_d_256_var(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>)
  ret <8 x i32> %1
}

define <16 x i32> @avx512_psrav_d_512_var(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psrav_d_512_var(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>)
  ret <16 x i32> %1
}

define <4 x i32> @avx2_psrav_d_128_allbig(<4 x i32> %v) {
; CHECK-LABEL: @avx2_psrav_d_128_allbig(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 undef>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>)
  ret <4 x i32> %1
}

define <8 x i32> @avx2_psrav_d_256_allbig(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psrav_d_256_allbig(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], <i32 undef, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
  ret <8 x i32> %1
}

define <16 x i32> @avx512_psrav_d_512_allbig(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psrav_d_512_allbig(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], <i32 undef, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 undef, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555, i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
  ret <16 x i32> %1
}

define <4 x i32> @avx2_psrav_d_128_undef(<4 x i32> %v) {
; CHECK-LABEL: @avx2_psrav_d_128_undef(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], <i32 undef, i32 8, i32 16, i32 31>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 64>, i32 undef, i32 0
  %2 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> %1)
  ret <4 x i32> %2
}

define <8 x i32> @avx2_psrav_d_256_undef(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psrav_d_256_undef(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], <i32 0, i32 undef, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
;
  %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>, i32 undef, i32 1
  %2 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> %1)
  ret <8 x i32> %2
}

define <16 x i32> @avx512_psrav_d_512_undef(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psrav_d_512_undef(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], <i32 0, i32 undef, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
;
  %1 = insertelement <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>, i32 undef, i32 1
  %2 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> %1)
  ret <16 x i32> %2
}

define <2 x i64> @avx512_psrav_q_128_0(<2 x i64> %v) {
; CHECK-LABEL: @avx512_psrav_q_128_0(
; CHECK-NEXT:    ret <2 x i64> [[V:%.*]]
;
  %1 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> zeroinitializer)
  ret <2 x i64> %1
}

define <4 x i64> @avx512_psrav_q_256_0(<4 x i64> %v) {
; CHECK-LABEL: @avx512_psrav_q_256_0(
; CHECK-NEXT:    ret <4 x i64> [[V:%.*]]
;
  %1 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> zeroinitializer)
  ret <4 x i64> %1
}

define <2 x i64> @avx512_psrav_q_128_var(<2 x i64> %v) {
; CHECK-LABEL: @avx512_psrav_q_128_var(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], <i64 0, i64 8>
; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
;
  %1 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> <i64 0, i64 8>)
  ret <2 x i64> %1
}

define <4 x i64> @avx512_psrav_q_256_var(<4 x i64> %v) {
; CHECK-LABEL: @avx512_psrav_q_256_var(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], <i64 0, i64 8, i64 16, i64 31>
; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
;
  %1 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>)
  ret <4 x i64> %1
}

define <2 x i64> @avx512_psrav_q_128_allbig(<2 x i64> %v) {
; CHECK-LABEL: @avx512_psrav_q_128_allbig(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], <i64 63, i64 undef>
; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
;
  %1 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> <i64 64, i64 undef>)
  ret <2 x i64> %1
}

define <4 x i64> @avx512_psrav_q_256_allbig(<4 x i64> %v) {
; CHECK-LABEL: @avx512_psrav_q_256_allbig(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], <i64 63, i64 undef, i64 63, i64 63>
; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
;
  %1 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>)
  ret <4 x i64> %1
}

define <2 x i64> @avx512_psrav_q_128_undef(<2 x i64> %v) {
; CHECK-LABEL: @avx512_psrav_q_128_undef(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], <i64 undef, i64 8>
; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
;
  %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 undef, i64 0
  %2 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> %1)
  ret <2 x i64> %2
}

define <4 x i64> @avx512_psrav_q_256_undef(<4 x i64> %v) {
; CHECK-LABEL: @avx512_psrav_q_256_undef(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], <i64 undef, i64 8, i64 16, i64 31>
; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
;
  %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
  %2 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> %1)
  ret <4 x i64> %2
}

define <8 x i64> @avx512_psrav_q_512_0(<8 x i64> %v) {
; CHECK-LABEL: @avx512_psrav_q_512_0(
; CHECK-NEXT:    ret <8 x i64> [[V:%.*]]
;
  %1 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> zeroinitializer)
  ret <8 x i64> %1
}

define <8 x i64> @avx512_psrav_q_512_var(<8 x i64> %v) {
; CHECK-LABEL: @avx512_psrav_q_512_var(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
;
  %1 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>)
  ret <8 x i64> %1
}

define <8 x i64> @avx512_psrav_q_512_allbig(<8 x i64> %v) {
; CHECK-LABEL: @avx512_psrav_q_512_allbig(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], <i64 63, i64 undef, i64 63, i64 63, i64 63, i64 undef, i64 63, i64 63>
; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
;
  %1 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> <i64 64, i64 undef, i64 -128, i64 -60, i64 64, i64 undef, i64 -128, i64 -60>)
  ret <8 x i64> %1
}

define <8 x i64> @avx512_psrav_q_512_undef(<8 x i64> %v) {
; CHECK-LABEL: @avx512_psrav_q_512_undef(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], <i64 undef, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
;
  %1 = insertelement <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
  %2 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> %1)
  ret <8 x i64> %2
}

define <8 x i16> @avx512_psrav_w_128_0(<8 x i16> %v) {
; CHECK-LABEL: @avx512_psrav_w_128_0(
; CHECK-NEXT:    ret <8 x i16> [[V:%.*]]
;
  %1 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> zeroinitializer)
  ret <8 x i16> %1
}

define <8 x i16> @avx512_psrav_w_128_var(<8 x i16> %v) {
; CHECK-LABEL: @avx512_psrav_w_128_var(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
;
  %1 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
  ret <8 x i16> %1
}

define <8 x i16> @avx512_psrav_w_128_allbig(<8 x i16> %v) {
; CHECK-LABEL: @avx512_psrav_w_128_allbig(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef>
; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
;
  %1 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 undef>)
  ret <8 x i16> %1
}

define <8 x i16> @avx512_psrav_w_128_undef(<8 x i16> %v) {
; CHECK-LABEL: @avx512_psrav_w_128_undef(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
;
  %1 = insertelement <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i16 undef, i64 0
  %2 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> %1)
  ret <8 x i16> %2
}

define <16 x i16> @avx512_psrav_w_256_0(<16 x i16> %v) {
; CHECK-LABEL: @avx512_psrav_w_256_0(
; CHECK-NEXT:    ret <16 x i16> [[V:%.*]]
;
  %1 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> zeroinitializer)
  ret <16 x i16> %1
}

define <16 x i16> @avx512_psrav_w_256_var(<16 x i16> %v) {
; CHECK-LABEL: @avx512_psrav_w_256_var(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
;
  %1 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
  ret <16 x i16> %1
}

define <16 x i16> @avx512_psrav_w_256_allbig(<16 x i16> %v) {
; CHECK-LABEL: @avx512_psrav_w_256_allbig(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
;
  %1 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 256, i16 16, i16 28, i16 65535, i16 32767>)
  ret <16 x i16> %1
}

define <16 x i16> @avx512_psrav_w_256_undef(<16 x i16> %v) {
; CHECK-LABEL: @avx512_psrav_w_256_undef(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
;
  %1 = insertelement <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, i16 undef, i64 0
  %2 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> %1)
  ret <16 x i16> %2
}

define <32 x i16> @avx512_psrav_w_512_0(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psrav_w_512_0(
; CHECK-NEXT:    ret <32 x i16> [[V:%.*]]
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> zeroinitializer)
  ret <32 x i16> %1
}

define <32 x i16> @avx512_psrav_w_512_var(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psrav_w_512_var(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
  ret <32 x i16> %1
}

define <32 x i16> @avx512_psrav_w_512_allbig(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psrav_w_512_allbig(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef, i16 15, i16 15, i16 undef, i16 15, i16 15>
; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 128, i16 16, i16 28, i16 65535, i16 32767, i16 56, i16 -14, i16 undef, i16 16, i16 67, i16 567, i16 -32768, i16 4096, i16 8192, i16 -12345, i16 undef, i16 345, i16 123, i16 undef, i16 1024, i16 54321>)
  ret <32 x i16> %1
}

define <32 x i16> @avx512_psrav_w_512_undef(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psrav_w_512_undef(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
;
  %1 = insertelement <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 undef, i64 0
  %2 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> %1)
  ret <32 x i16> %2
}

;
; LSHR - Constant Per-Element Vector
;

define <4 x i32> @avx2_psrlv_d_128_0(<4 x i32> %v) {
; CHECK-LABEL: @avx2_psrlv_d_128_0(
; CHECK-NEXT:    ret <4 x i32> [[V:%.*]]
;
  %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> zeroinitializer)
  ret <4 x i32> %1
}

define <8 x i32> @avx2_psrlv_d_256_0(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psrlv_d_256_0(
; CHECK-NEXT:    ret <8 x i32> [[V:%.*]]
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer)
  ret <8 x i32> %1
}

define <4 x i32> @avx2_psrlv_d_128_var(<4 x i32> %v) {
; CHECK-LABEL: @avx2_psrlv_d_128_var(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 31>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 31>)
  ret <4 x i32> %1
}

define <8 x i32> @avx2_psrlv_d_256_var(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psrlv_d_256_var(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
  ret <8 x i32> %1
}

define <4 x i32> @avx2_psrlv_d_128_big(<4 x i32> %v) {
; CHECK-LABEL: @avx2_psrlv_d_128_big(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> [[V:%.*]], <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
  ret <4 x i32> %1
}

define <8 x i32> @avx2_psrlv_d_256_big(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psrlv_d_256_big(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> [[V:%.*]], <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
  ret <8 x i32> %1
}

define <4 x i32> @avx2_psrlv_d_128_allbig(<4 x i32> %v) {
; CHECK-LABEL: @avx2_psrlv_d_128_allbig(
; CHECK-NEXT:    ret <4 x i32> <i32 0, i32 0, i32 0, i32 undef>
;
  %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>)
  ret <4 x i32> %1
}

define <8 x i32> @avx2_psrlv_d_256_allbig(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psrlv_d_256_allbig(
; CHECK-NEXT:    ret <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
  ret <8 x i32> %1
}

define <4 x i32> @avx2_psrlv_d_128_undef(<4 x i32> %v) {
; CHECK-LABEL: @avx2_psrlv_d_128_undef(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], <i32 undef, i32 8, i32 16, i32 31>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 31>, i32 undef, i32 0
  %2 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> %1)
  ret <4 x i32> %2
}

define <8 x i32> @avx2_psrlv_d_256_undef(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psrlv_d_256_undef(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
;
  %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
  %2 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> %1)
  ret <8 x i32> %2
}

define <2 x i64> @avx2_psrlv_q_128_0(<2 x i64> %v) {
; CHECK-LABEL: @avx2_psrlv_q_128_0(
; CHECK-NEXT:    ret <2 x i64> [[V:%.*]]
;
  %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> zeroinitializer)
  ret <2 x i64> %1
}

define <4 x i64> @avx2_psrlv_q_256_0(<4 x i64> %v) {
; CHECK-LABEL: @avx2_psrlv_q_256_0(
; CHECK-NEXT:    ret <4 x i64> [[V:%.*]]
;
  %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer)
  ret <4 x i64> %1
}

define <2 x i64> @avx2_psrlv_q_128_var(<2 x i64> %v) {
; CHECK-LABEL: @avx2_psrlv_q_128_var(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], <i64 0, i64 8>
; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
;
  %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 8>)
  ret <2 x i64> %1
}

define <4 x i64> @avx2_psrlv_q_256_var(<4 x i64> %v) {
; CHECK-LABEL: @avx2_psrlv_q_256_var(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], <i64 0, i64 8, i64 16, i64 31>
; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
;
  %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>)
  ret <4 x i64> %1
}

define <2 x i64> @avx2_psrlv_q_128_big(<2 x i64> %v) {
; CHECK-LABEL: @avx2_psrlv_q_128_big(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> [[V:%.*]], <2 x i64> <i64 0, i64 128>)
; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
;
  %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
  ret <2 x i64> %1
}

define <4 x i64> @avx2_psrlv_q_256_big(<4 x i64> %v) {
; CHECK-LABEL: @avx2_psrlv_q_256_big(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> [[V:%.*]], <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
;
  %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
  ret <4 x i64> %1
}

define <2 x i64> @avx2_psrlv_q_128_allbig(<2 x i64> %v) {
; CHECK-LABEL: @avx2_psrlv_q_128_allbig(
; CHECK-NEXT:    ret <2 x i64> zeroinitializer
;
  %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 128, i64 -64>)
  ret <2 x i64> %1
}

define <4 x i64> @avx2_psrlv_q_256_allbig(<4 x i64> %v) {
; CHECK-LABEL: @avx2_psrlv_q_256_allbig(
; CHECK-NEXT:    ret <4 x i64> <i64 0, i64 undef, i64 0, i64 0>
;
  %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>)
  ret <4 x i64> %1
}

; The shift amount is 0 (the poison lane could be 0), so we return the unshifted input.

define <2 x i64> @avx2_psrlv_q_128_poison(<2 x i64> %v) {
; CHECK-LABEL: @avx2_psrlv_q_128_poison(
; CHECK-NEXT:    ret <2 x i64> [[V:%.*]]
;
  %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 poison, i64 1
  %2 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> %1)
  ret <2 x i64> %2
}

define <4 x i64> @avx2_psrlv_q_256_poison(<4 x i64> %v) {
; CHECK-LABEL: @avx2_psrlv_q_256_poison(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], <i64 poison, i64 8, i64 16, i64 31>
; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
;
  %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 poison, i64 0
  %2 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> %1)
  ret <4 x i64> %2
}

define <16 x i32> @avx2_psrlv_d_512_0(<16 x i32> %v) {
; CHECK-LABEL: @avx2_psrlv_d_512_0(
; CHECK-NEXT:    ret <16 x i32> [[V:%.*]]
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> zeroinitializer)
  ret <16 x i32> %1
}

define <16 x i32> @avx512_psrlv_d_512_var(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psrlv_d_512_var(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
  ret <16 x i32> %1
}

define <16 x i32> @avx512_psrlv_d_512_big(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psrlv_d_512_big(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[V:%.*]], <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
  ret <16 x i32> %1
}

define <16 x i32> @avx512_psrlv_d_512_allbig(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psrlv_d_512_allbig(
; CHECK-NEXT:    ret <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555, i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
  ret <16 x i32> %1
}

define <16 x i32> @avx512_psrlv_d_512_undef(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psrlv_d_512_undef(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
;
  %1 = insertelement <16 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
  %2 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> %1)
  ret <16 x i32> %2
}

define <8 x i64> @avx512_psrlv_q_512_0(<8 x i64> %v) {
; CHECK-LABEL: @avx512_psrlv_q_512_0(
; CHECK-NEXT:    ret <8 x i64> [[V:%.*]]
;
  %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> zeroinitializer)
  ret <8 x i64> %1
}

define <8 x i64> @avx512_psrlv_q_512_var(<8 x i64> %v) {
; CHECK-LABEL: @avx512_psrlv_q_512_var(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
;
  %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>)
  ret <8 x i64> %1
}

define <8 x i64> @avx512_psrlv_q_512_big(<8 x i64> %v) {
; CHECK-LABEL: @avx512_psrlv_q_512_big(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[V:%.*]], <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>)
; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
;
  %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>)
  ret <8 x i64> %1
}

define <8 x i64> @avx512_psrlv_q_512_allbig(<8 x i64> %v) {
; CHECK-LABEL: @avx512_psrlv_q_512_allbig(
; CHECK-NEXT:    ret <8 x i64> <i64 0, i64 undef, i64 0, i64 0, i64 0, i64 undef, i64 0, i64 0>
;
  %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> <i64 64, i64 undef, i64 -128, i64 -60, i64 64, i64 undef, i64 -128, i64 -60>)
  ret <8 x i64> %1
}

define <8 x i64> @avx512_psrlv_q_512_undef(<8 x i64> %v) {
; CHECK-LABEL: @avx512_psrlv_q_512_undef(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], <i64 undef, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
;
  %1 = insertelement <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
  %2 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> %1)
  ret <8 x i64> %2
}

define <8 x i16> @avx512_psrlv_w_128_0(<8 x i16> %v) {
; CHECK-LABEL: @avx512_psrlv_w_128_0(
; CHECK-NEXT:    ret <8 x i16> [[V:%.*]]
;
  %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> zeroinitializer)
  ret <8 x i16> %1
}

define <8 x i16> @avx512_psrlv_w_128_var(<8 x i16> %v) {
; CHECK-LABEL: @avx512_psrlv_w_128_var(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
;
  %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
  ret <8 x i16> %1
}

define <8 x i16> @avx512_psrlv_w_128_big(<8 x i16> %v) {
; CHECK-LABEL: @avx512_psrlv_w_128_big(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> [[V:%.*]], <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 16>)
; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
;
  %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 16>)
  ret <8 x i16> %1
}

define <8 x i16> @avx512_psrlv_w_128_allbig(<8 x i16> %v) {
; CHECK-LABEL: @avx512_psrlv_w_128_allbig(
; CHECK-NEXT:    ret <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef>
;
  %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 undef>)
  ret <8 x i16> %1
}

define <8 x i16> @avx512_psrlv_w_128_undef(<8 x i16> %v) {
; CHECK-LABEL: @avx512_psrlv_w_128_undef(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
;
  %1 = insertelement <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i16 undef, i64 0
  %2 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> %1)
  ret <8 x i16> %2
}

define <16 x i16> @avx512_psrlv_w_256_0(<16 x i16> %v) {
; CHECK-LABEL: @avx512_psrlv_w_256_0(
; CHECK-NEXT:    ret <16 x i16> [[V:%.*]]
;
  %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> zeroinitializer)
  ret <16 x i16> %1
}

define <16 x i16> @avx512_psrlv_w_256_var(<16 x i16> %v) {
; CHECK-LABEL: @avx512_psrlv_w_256_var(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
;
  %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
  ret <16 x i16> %1
}

define <16 x i16> @avx512_psrlv_w_256_big(<16 x i16> %v) {
; CHECK-LABEL: @avx512_psrlv_w_256_big(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> [[V:%.*]], <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 16>)
; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
;
  %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 16>)
  ret <16 x i16> %1
}

define <16 x i16> @avx512_psrlv_w_256_allbig(<16 x i16> %v) {
; CHECK-LABEL: @avx512_psrlv_w_256_allbig(
; CHECK-NEXT:    ret <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
;
  %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 256, i16 16, i16 28, i16 65535, i16 32767>)
  ret <16 x i16> %1
}

define <16 x i16> @avx512_psrlv_w_256_undef(<16 x i16> %v) {
; CHECK-LABEL: @avx512_psrlv_w_256_undef(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
;
  %1 = insertelement <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, i16 undef, i64 0
  %2 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> %1)
  ret <16 x i16> %2
}

define <32 x i16> @avx512_psrlv_w_512_0(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psrlv_w_512_0(
; CHECK-NEXT:    ret <32 x i16> [[V:%.*]]
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> zeroinitializer)
  ret <32 x i16> %1
}

define <32 x i16> @avx512_psrlv_w_512_var(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psrlv_w_512_var(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
  ret <32 x i16> %1
}

define <32 x i16> @avx512_psrlv_w_512_big(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psrlv_w_512_big(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> [[V:%.*]], <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
  ret <32 x i16> %1
}

define <32 x i16> @avx512_psrlv_w_512_allbig(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psrlv_w_512_allbig(
; CHECK-NEXT:    ret <32 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 undef, i16 0, i16 0>
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 128, i16 16, i16 28, i16 65535, i16 32767, i16 56, i16 -14, i16 undef, i16 16, i16 67, i16 567, i16 -32768, i16 4096, i16 8192, i16 -12345, i16 undef, i16 345, i16 123, i16 undef, i16 1024, i16 54321>)
  ret <32 x i16> %1
}

define <32 x i16> @avx512_psrlv_w_512_undef(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psrlv_w_512_undef(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
;
  %1 = insertelement <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 undef, i64 0
  %2 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> %1)
  ret <32 x i16> %2
}

;
; SHL - Constant Per-Element Vector
;

define <4 x i32> @avx2_psllv_d_128_0(<4 x i32> %v) {
; CHECK-LABEL: @avx2_psllv_d_128_0(
; CHECK-NEXT:    ret <4 x i32> [[V:%.*]]
;
  %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> zeroinitializer)
  ret <4 x i32> %1
}

define <8 x i32> @avx2_psllv_d_256_0(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psllv_d_256_0(
; CHECK-NEXT:    ret <8 x i32> [[V:%.*]]
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer)
  ret <8 x i32> %1
}

define <4 x i32> @avx2_psllv_d_128_var(<4 x i32> %v) {
; CHECK-LABEL: @avx2_psllv_d_128_var(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 31>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 31>)
  ret <4 x i32> %1
}

define <8 x i32> @avx2_psllv_d_256_var(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psllv_d_256_var(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
  ret <8 x i32> %1
}

define <4 x i32> @avx2_psllv_d_128_big(<4 x i32> %v) {
; CHECK-LABEL: @avx2_psllv_d_128_big(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> [[V:%.*]], <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
  ret <4 x i32> %1
}

define <8 x i32> @avx2_psllv_d_256_big(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psllv_d_256_big(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> [[V:%.*]], <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
  ret <8 x i32> %1
}

define <4 x i32> @avx2_psllv_d_128_allbig(<4 x i32> %v) {
; CHECK-LABEL: @avx2_psllv_d_128_allbig(
; CHECK-NEXT:    ret <4 x i32> <i32 0, i32 0, i32 0, i32 undef>
;
  %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>)
  ret <4 x i32> %1
}

define <8 x i32> @avx2_psllv_d_256_allbig(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psllv_d_256_allbig(
; CHECK-NEXT:    ret <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
  ret <8 x i32> %1
}

define <4 x i32> @avx2_psllv_d_128_undef(<4 x i32> %v) {
; CHECK-LABEL: @avx2_psllv_d_128_undef(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], <i32 undef, i32 8, i32 16, i32 31>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 31>, i32 undef, i32 0
  %2 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> %1)
  ret <4 x i32> %2
}

define <8 x i32> @avx2_psllv_d_256_undef(<8 x i32> %v) {
; CHECK-LABEL: @avx2_psllv_d_256_undef(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
;
  %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
  %2 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> %1)
  ret <8 x i32> %2
}

define <2 x i64> @avx2_psllv_q_128_0(<2 x i64> %v) {
; CHECK-LABEL: @avx2_psllv_q_128_0(
; CHECK-NEXT:    ret <2 x i64> [[V:%.*]]
;
  %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> zeroinitializer)
  ret <2 x i64> %1
}

define <4 x i64> @avx2_psllv_q_256_0(<4 x i64> %v) {
; CHECK-LABEL: @avx2_psllv_q_256_0(
; CHECK-NEXT:    ret <4 x i64> [[V:%.*]]
;
  %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer)
  ret <4 x i64> %1
}

define <2 x i64> @avx2_psllv_q_128_var(<2 x i64> %v) {
; CHECK-LABEL: @avx2_psllv_q_128_var(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], <i64 0, i64 8>
; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
;
  %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 8>)
  ret <2 x i64> %1
}

define <4 x i64> @avx2_psllv_q_256_var(<4 x i64> %v) {
; CHECK-LABEL: @avx2_psllv_q_256_var(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], <i64 0, i64 8, i64 16, i64 31>
; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
;
  %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>)
  ret <4 x i64> %1
}

define <2 x i64> @avx2_psllv_q_128_big(<2 x i64> %v) {
; CHECK-LABEL: @avx2_psllv_q_128_big(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> [[V:%.*]], <2 x i64> <i64 0, i64 128>)
; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
;
  %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
  ret <2 x i64> %1
}

define <4 x i64> @avx2_psllv_q_256_big(<4 x i64> %v) {
; CHECK-LABEL: @avx2_psllv_q_256_big(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> [[V:%.*]], <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
;
  %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
  ret <4 x i64> %1
}

define <2 x i64> @avx2_psllv_q_128_allbig(<2 x i64> %v) {
; CHECK-LABEL: @avx2_psllv_q_128_allbig(
; CHECK-NEXT:    ret <2 x i64> zeroinitializer
;
  %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 128, i64 -64>)
  ret <2 x i64> %1
}

define <4 x i64> @avx2_psllv_q_256_allbig(<4 x i64> %v) {
; CHECK-LABEL: @avx2_psllv_q_256_allbig(
; CHECK-NEXT:    ret <4 x i64> <i64 0, i64 undef, i64 0, i64 0>
;
  %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>)
  ret <4 x i64> %1
}

; The shift amount is 0 (the undef lane could be 0), so we return the unshifted input.

define <2 x i64> @avx2_psllv_q_128_poison(<2 x i64> %v) {
; CHECK-LABEL: @avx2_psllv_q_128_poison(
; CHECK-NEXT:    ret <2 x i64> [[V:%.*]]
;
  %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 poison, i64 1
  %2 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> %1)
  ret <2 x i64> %2
}

define <4 x i64> @avx2_psllv_q_256_poison(<4 x i64> %v) {
; CHECK-LABEL: @avx2_psllv_q_256_poison(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], <i64 poison, i64 8, i64 16, i64 31>
; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
;
  %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 poison, i64 0
  %2 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> %1)
  ret <4 x i64> %2
}

define <16 x i32> @avx512_psllv_d_512_0(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psllv_d_512_0(
; CHECK-NEXT:    ret <16 x i32> [[V:%.*]]
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> zeroinitializer)
  ret <16 x i32> %1
}

define <16 x i32> @avx512_psllv_d_512_var(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psllv_d_512_var(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
  ret <16 x i32> %1
}

define <16 x i32> @avx512_psllv_d_512_big(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psllv_d_512_big(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[V:%.*]], <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
  ret <16 x i32> %1
}

define <16 x i32> @avx512_psllv_d_512_allbig(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psllv_d_512_allbig(
; CHECK-NEXT:    ret <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555, i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
  ret <16 x i32> %1
}

define <16 x i32> @avx512_psllv_d_512_undef(<16 x i32> %v) {
; CHECK-LABEL: @avx512_psllv_d_512_undef(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
;
  %1 = insertelement <16 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
  %2 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> %1)
  ret <16 x i32> %2
}

define <8 x i64> @avx512_psllv_q_512_0(<8 x i64> %v) {
; CHECK-LABEL: @avx512_psllv_q_512_0(
; CHECK-NEXT:    ret <8 x i64> [[V:%.*]]
;
  %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> zeroinitializer)
  ret <8 x i64> %1
}

define <8 x i64> @avx512_psllv_q_512_var(<8 x i64> %v) {
; CHECK-LABEL: @avx512_psllv_q_512_var(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
;
  %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>)
  ret <8 x i64> %1
}

define <8 x i64> @avx512_psllv_q_512_big(<8 x i64> %v) {
; CHECK-LABEL: @avx512_psllv_q_512_big(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[V:%.*]], <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>)
; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
;
  %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>)
  ret <8 x i64> %1
}

define <8 x i64> @avx512_psllv_q_512_allbig(<8 x i64> %v) {
; CHECK-LABEL: @avx512_psllv_q_512_allbig(
; CHECK-NEXT:    ret <8 x i64> <i64 0, i64 undef, i64 0, i64 0, i64 0, i64 undef, i64 0, i64 0>
;
  %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> <i64 64, i64 undef, i64 -128, i64 -60, i64 64, i64 undef, i64 -128, i64 -60>)
  ret <8 x i64> %1
}

define <8 x i64> @avx512_psllv_q_512_undef(<8 x i64> %v) {
; CHECK-LABEL: @avx512_psllv_q_512_undef(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], <i64 undef, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
;
  %1 = insertelement <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
  %2 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> %1)
  ret <8 x i64> %2
}

define <8 x i16> @avx512_psllv_w_128_0(<8 x i16> %v) {
; CHECK-LABEL: @avx512_psllv_w_128_0(
; CHECK-NEXT:    ret <8 x i16> [[V:%.*]]
;
  %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> zeroinitializer)
  ret <8 x i16> %1
}

define <8 x i16> @avx512_psllv_w_128_var(<8 x i16> %v) {
; CHECK-LABEL: @avx512_psllv_w_128_var(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
;
  %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
  ret <8 x i16> %1
}

define <8 x i16> @avx512_psllv_w_128_big(<8 x i16> %v) {
; CHECK-LABEL: @avx512_psllv_w_128_big(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> [[V:%.*]], <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 16>)
; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
;
  %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 16>)
  ret <8 x i16> %1
}

define <8 x i16> @avx512_psllv_w_128_allbig(<8 x i16> %v) {
; CHECK-LABEL: @avx512_psllv_w_128_allbig(
; CHECK-NEXT:    ret <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef>
;
  %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 undef>)
  ret <8 x i16> %1
}

define <8 x i16> @avx512_psllv_w_128_undef(<8 x i16> %v) {
; CHECK-LABEL: @avx512_psllv_w_128_undef(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
;
  %1 = insertelement <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i16 undef, i64 0
  %2 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> %1)
  ret <8 x i16> %2
}

define <16 x i16> @avx512_psllv_w_256_0(<16 x i16> %v) {
; CHECK-LABEL: @avx512_psllv_w_256_0(
; CHECK-NEXT:    ret <16 x i16> [[V:%.*]]
;
  %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> zeroinitializer)
  ret <16 x i16> %1
}

define <16 x i16> @avx512_psllv_w_256_var(<16 x i16> %v) {
; CHECK-LABEL: @avx512_psllv_w_256_var(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
;
  %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
  ret <16 x i16> %1
}

define <16 x i16> @avx512_psllv_w_256_big(<16 x i16> %v) {
; CHECK-LABEL: @avx512_psllv_w_256_big(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> [[V:%.*]], <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 16>)
; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
;
  %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 16>)
  ret <16 x i16> %1
}

define <16 x i16> @avx512_psllv_w_256_allbig(<16 x i16> %v) {
; CHECK-LABEL: @avx512_psllv_w_256_allbig(
; CHECK-NEXT:    ret <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
;
  %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 256, i16 16, i16 28, i16 65535, i16 32767>)
  ret <16 x i16> %1
}

define <16 x i16> @avx512_psllv_w_256_undef(<16 x i16> %v) {
; CHECK-LABEL: @avx512_psllv_w_256_undef(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
;
  %1 = insertelement <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, i16 undef, i64 0
  %2 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> %1)
  ret <16 x i16> %2
}

define <32 x i16> @avx512_psllv_w_512_0(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psllv_w_512_0(
; CHECK-NEXT:    ret <32 x i16> [[V:%.*]]
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> zeroinitializer)
  ret <32 x i16> %1
}

define <32 x i16> @avx512_psllv_w_512_var(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psllv_w_512_var(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
  ret <32 x i16> %1
}

define <32 x i16> @avx512_psllv_w_512_big(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psllv_w_512_big(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> [[V:%.*]], <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
  ret <32 x i16> %1
}

define <32 x i16> @avx512_psllv_w_512_allbig(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psllv_w_512_allbig(
; CHECK-NEXT:    ret <32 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 undef, i16 0, i16 0>
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 128, i16 16, i16 28, i16 65535, i16 32767, i16 56, i16 -14, i16 undef, i16 16, i16 67, i16 567, i16 -32768, i16 4096, i16 8192, i16 -12345, i16 undef, i16 345, i16 123, i16 undef, i16 1024, i16 54321>)
  ret <32 x i16> %1
}

define <32 x i16> @avx512_psllv_w_512_undef(<32 x i16> %v) {
; CHECK-LABEL: @avx512_psllv_w_512_undef(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
;
  %1 = insertelement <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 undef, i64 0
  %2 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> %1)
  ret <32 x i16> %2
}

;
; Vector Masked Shift Amounts
;

define <8 x i16> @sse2_psra_w_128_masked(<8 x i16> %v, <8 x i16> %a) {
; CHECK-LABEL: @sse2_psra_w_128_masked(
; CHECK-NEXT:    [[TMP1:%.*]] = and <8 x i16> [[A:%.*]], <i16 15, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>
; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = ashr <8 x i16> [[V:%.*]], [[TMP2]]
; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
;
  %1 = and <8 x i16> %a, <i16 15, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 undef, i16 undef>
  %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1)
  ret <8 x i16> %2
}

define <8 x i32> @avx2_psra_d_256_masked(<8 x i32> %v, <4 x i32> %a) {
; CHECK-LABEL: @avx2_psra_d_256_masked(
; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], <i32 31, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = ashr <8 x i32> [[V:%.*]], [[TMP2]]
; CHECK-NEXT:    ret <8 x i32> [[TMP3]]
;
  %1 = and <4 x i32> %a, <i32 31, i32 0, i32 undef, i32 undef>
  %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %1)
  ret <8 x i32> %2
}

define <8 x i64> @avx512_psra_q_512_masked(<8 x i64> %v, <2 x i64> %a) {
; CHECK-LABEL: @avx512_psra_q_512_masked(
; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[A:%.*]], <i64 63, i64 poison>
; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <8 x i32> zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = ashr <8 x i64> [[V:%.*]], [[TMP2]]
; CHECK-NEXT:    ret <8 x i64> [[TMP3]]
;
  %1 = and <2 x i64> %a, <i64 63, i64 undef>
  %2 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> %1)
  ret <8 x i64> %2
}

define <4 x i32> @sse2_psrl_d_128_masked(<4 x i32> %v, <4 x i32> %a) {
; CHECK-LABEL: @sse2_psrl_d_128_masked(
; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], <i32 31, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i32> [[V:%.*]], [[TMP2]]
; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
;
  %1 = and <4 x i32> %a, <i32 31, i32 0, i32 undef, i32 undef>
  %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %1)
  ret <4 x i32> %2
}

define <4 x i64> @avx2_psrl_q_256_masked(<4 x i64> %v, <2 x i64> %a) {
; CHECK-LABEL: @avx2_psrl_q_256_masked(
; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[A:%.*]], <i64 63, i64 poison>
; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i64> [[V:%.*]], [[TMP2]]
; CHECK-NEXT:    ret <4 x i64> [[TMP3]]
;
  %1 = and <2 x i64> %a, <i64 63, i64 undef>
  %2 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %1)
  ret <4 x i64> %2
}

define <32 x i16> @avx512_psrl_w_512_masked(<32 x i16> %v, <8 x i16> %a) {
; CHECK-LABEL: @avx512_psrl_w_512_masked(
; CHECK-NEXT:    [[TMP1:%.*]] = and <8 x i16> [[A:%.*]], <i16 15, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>
; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <32 x i32> zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = lshr <32 x i16> [[V:%.*]], [[TMP2]]
; CHECK-NEXT:    ret <32 x i16> [[TMP3]]
;
  %1 = and <8 x i16> %a, <i16 15, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 undef, i16 undef>
  %2 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> %1)
  ret <32 x i16> %2
}

define <2 x i64> @sse2_psll_q_128_masked(<2 x i64> %v, <2 x i64> %a) {
; CHECK-LABEL: @sse2_psll_q_128_masked(
; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[A:%.*]], <i64 63, i64 poison>
; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i64> [[V:%.*]], [[TMP2]]
; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
;
  %1 = and <2 x i64> %a, <i64 63, i64 undef>
  %2 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %1)
  ret <2 x i64> %2
}

; The shift amount is in range (masked with 31 and high 32-bits are zero),
; so convert to standard IR - https://llvm.org/PR50123

define <2 x i64> @sse2_psll_q_128_masked_bitcast(<2 x i64> %v, <2 x i64> %a) {
; CHECK-LABEL: @sse2_psll_q_128_masked_bitcast(
; CHECK-NEXT:    [[B:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32>
; CHECK-NEXT:    [[M:%.*]] = and <4 x i32> [[B]], <i32 31, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT:    [[I:%.*]] = insertelement <4 x i32> [[M]], i32 0, i64 1
; CHECK-NEXT:    [[SHAMT:%.*]] = bitcast <4 x i32> [[I]] to <2 x i64>
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i64> [[SHAMT]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT:    [[R:%.*]] = shl <2 x i64> [[V:%.*]], [[TMP1]]
; CHECK-NEXT:    ret <2 x i64> [[R]]
;
  %b = bitcast <2 x i64> %a to <4 x i32>
  %m = and <4 x i32> %b, <i32 31, i32 poison, i32 poison, i32 poison>
  %i = insertelement <4 x i32> %m, i32 0, i32 1
  %shamt = bitcast <4 x i32> %i to <2 x i64>
  %r = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %shamt) #2
  ret <2 x i64> %r
}

; TODO: This could be recognized as an over-shift.

define <2 x i64> @sse2_psll_q_128_masked_bitcast_overshift(<2 x i64> %v, <2 x i64> %a) {
; CHECK-LABEL: @sse2_psll_q_128_masked_bitcast_overshift(
; CHECK-NEXT:    [[B:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32>
; CHECK-NEXT:    [[M:%.*]] = and <4 x i32> [[B]], <i32 31, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT:    [[I:%.*]] = insertelement <4 x i32> [[M]], i32 1, i64 1
; CHECK-NEXT:    [[SHAMT:%.*]] = bitcast <4 x i32> [[I]] to <2 x i64>
; CHECK-NEXT:    [[R:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[V:%.*]], <2 x i64> [[SHAMT]])
; CHECK-NEXT:    ret <2 x i64> [[R]]
;
  %b = bitcast <2 x i64> %a to <4 x i32>
  %m = and <4 x i32> %b, <i32 31, i32 poison, i32 poison, i32 poison>
  %i = insertelement <4 x i32> %m, i32 1, i32 1
  %shamt = bitcast <4 x i32> %i to <2 x i64>
  %r = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %shamt) #2
  ret <2 x i64> %r
}

define <16 x i16> @avx2_psll_w_256_masked(<16 x i16> %v, <8 x i16> %a) {
; CHECK-LABEL: @avx2_psll_w_256_masked(
; CHECK-NEXT:    [[TMP1:%.*]] = and <8 x i16> [[A:%.*]], <i16 15, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>
; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <16 x i32> zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = shl <16 x i16> [[V:%.*]], [[TMP2]]
; CHECK-NEXT:    ret <16 x i16> [[TMP3]]
;
  %1 = and <8 x i16> %a, <i16 15, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 undef, i16 undef>
  %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %1)
  ret <16 x i16> %2
}

define <16 x i32> @avx512_psll_d_512_masked(<16 x i32> %v, <4 x i32> %a) {
; CHECK-LABEL: @avx512_psll_d_512_masked(
; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], <i32 31, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <16 x i32> zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = shl <16 x i32> [[V:%.*]], [[TMP2]]
; CHECK-NEXT:    ret <16 x i32> [[TMP3]]
;
  %1 = and <4 x i32> %a, <i32 31, i32 0, i32 undef, i32 undef>
  %2 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> %1)
  ret <16 x i32> %2
}

define <8 x i16> @sse2_psrai_w_128_masked(<8 x i16> %v, i32 %a) {
; CHECK-LABEL: @sse2_psrai_w_128_masked(
; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16
; CHECK-NEXT:    [[TMP2:%.*]] = and i16 [[TMP1]], 15
; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i64 0
; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = ashr <8 x i16> [[V:%.*]], [[DOTSPLAT]]
; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
;
  %1 = and i32 %a, 15
  %2 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 %1)
  ret <8 x i16> %2
}

define <8 x i32> @avx2_psrai_d_256_masked(<8 x i32> %v, i32 %a) {
; CHECK-LABEL: @avx2_psrai_d_256_masked(
; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], 31
; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TMP1]], i64 0
; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT:    [[TMP2:%.*]] = ashr <8 x i32> [[V:%.*]], [[DOTSPLAT]]
; CHECK-NEXT:    ret <8 x i32> [[TMP2]]
;
  %1 = and i32 %a, 31
  %2 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 %1)
  ret <8 x i32> %2
}

define <8 x i64> @avx512_psrai_q_512_masked(<8 x i64> %v, i32 %a) {
; CHECK-LABEL: @avx512_psrai_q_512_masked(
; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], 63
; CHECK-NEXT:    [[TMP2:%.*]] = zext nneg i32 [[TMP1]] to i64
; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TMP2]], i64 0
; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = ashr <8 x i64> [[V:%.*]], [[DOTSPLAT]]
; CHECK-NEXT:    ret <8 x i64> [[TMP3]]
;
  %1 = and i32 %a, 63
  %2 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 %1)
  ret <8 x i64> %2
}

define <4 x i32> @sse2_psrli_d_128_masked(<4 x i32> %v, i32 %a) {
; CHECK-LABEL: @sse2_psrli_d_128_masked(
; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], 31
; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0
; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT:    [[TMP2:%.*]] = lshr <4 x i32> [[V:%.*]], [[DOTSPLAT]]
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %1 = and i32 %a, 31
  %2 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 %1)
  ret <4 x i32> %2
}

define <4 x i64> @avx2_psrli_q_256_masked(<4 x i64> %v, i32 %a) {
; CHECK-LABEL: @avx2_psrli_q_256_masked(
; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], 63
; CHECK-NEXT:    [[TMP2:%.*]] = zext nneg i32 [[TMP1]] to i64
; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP2]], i64 0
; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i64> [[V:%.*]], [[DOTSPLAT]]
; CHECK-NEXT:    ret <4 x i64> [[TMP3]]
;
  %1 = and i32 %a, 63
  %2 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 %1)
  ret <4 x i64> %2
}

define <32 x i16> @avx512_psrli_w_512_masked(<32 x i16> %v, i32 %a) {
; CHECK-LABEL: @avx512_psrli_w_512_masked(
; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16
; CHECK-NEXT:    [[TMP2:%.*]] = and i16 [[TMP1]], 15
; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[TMP2]], i64 0
; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <32 x i16> [[DOTSPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = lshr <32 x i16> [[V:%.*]], [[DOTSPLAT]]
; CHECK-NEXT:    ret <32 x i16> [[TMP3]]
;
  %1 = and i32 %a, 15
  %2 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 %1)
  ret <32 x i16> %2
}

define <2 x i64> @sse2_pslli_q_128_masked(<2 x i64> %v, i32 %a) {
; CHECK-LABEL: @sse2_pslli_q_128_masked(
; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], 63
; CHECK-NEXT:    [[TMP2:%.*]] = zext nneg i32 [[TMP1]] to i64
; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i64> [[V:%.*]], [[DOTSPLAT]]
; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
;
  %1 = and i32 %a, 63
  %2 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 %1)
  ret <2 x i64> %2
}

define <16 x i16> @avx2_pslli_w_256_masked(<16 x i16> %v, i32 %a) {
; CHECK-LABEL: @avx2_pslli_w_256_masked(
; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16
; CHECK-NEXT:    [[TMP2:%.*]] = and i16 [[TMP1]], 15
; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[TMP2]], i64 0
; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
; CHECK-NEXT:    [[TMP3:%.*]] = shl <16 x i16> [[V:%.*]], [[DOTSPLAT]]
; CHECK-NEXT:    ret <16 x i16> [[TMP3]]
;
  %1 = and i32 %a, 15
  %2 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 %1)
  ret <16 x i16> %2
}

define <16 x i32> @avx512_pslli_d_512_masked(<16 x i32> %v, i32 %a) {
; CHECK-LABEL: @avx512_pslli_d_512_masked(
; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], 31
; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP1]], i64 0
; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i32> [[DOTSPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
; CHECK-NEXT:    [[TMP2:%.*]] = shl <16 x i32> [[V:%.*]], [[DOTSPLAT]]
; CHECK-NEXT:    ret <16 x i32> [[TMP2]]
;
  %1 = and i32 %a, 31
  %2 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 %1)
  ret <16 x i32> %2
}

define <4 x i32> @avx2_psrav_d_128_masked(<4 x i32> %v, <4 x i32> %a) {
; CHECK-LABEL: @avx2_psrav_d_128_masked(
; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], <i32 31, i32 31, i32 31, i32 31>
; CHECK-NEXT:    [[TMP2:%.*]] = ashr <4 x i32> [[V:%.*]], [[TMP1]]
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %1 = and <4 x i32> %a, <i32 31, i32 31, i32 31, i32 31>
  %2 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> %1)
  ret <4 x i32> %2
}

define <4 x i32> @avx2_psrav_d_128_masked_shuffle(<4 x i32> %v, <4 x i32> %a) {
; CHECK-LABEL: @avx2_psrav_d_128_masked_shuffle(
; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], <i32 poison, i32 poison, i32 15, i32 31>
; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
; CHECK-NEXT:    [[TMP3:%.*]] = ashr <4 x i32> [[V:%.*]], [[TMP2]]
; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
;
  %1 = and <4 x i32> %a, <i32 undef, i32 undef, i32 15, i32 31>
  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
  %3 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> %2)
  ret <4 x i32> %3
}

define <8 x i32> @avx2_psrav_d_256_masked(<8 x i32> %v, <8 x i32> %a) {
; CHECK-LABEL: @avx2_psrav_d_256_masked(
; CHECK-NEXT:    [[TMP1:%.*]] = and <8 x i32> [[A:%.*]], <i32 0, i32 1, i32 7, i32 15, i32 16, i32 30, i32 31, i32 31>
; CHECK-NEXT:    [[TMP2:%.*]] = ashr <8 x i32> [[V:%.*]], [[TMP1]]
; CHECK-NEXT:    ret <8 x i32> [[TMP2]]
;
  %1 = and <8 x i32> %a, <i32 0, i32 1, i32 7, i32 15, i32 16, i32 30, i32 31, i32 31>
  %2 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> %1)
  ret <8 x i32> %2
}

define <32 x i16> @avx512_psrav_w_512_masked(<32 x i16> %v, <32 x i16> %a) {
; CHECK-LABEL: @avx512_psrav_w_512_masked(
; CHECK-NEXT:    [[TMP1:%.*]] = and <32 x i16> [[A:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
; CHECK-NEXT:    [[TMP2:%.*]] = ashr <32 x i16> [[V:%.*]], [[TMP1]]
; CHECK-NEXT:    ret <32 x i16> [[TMP2]]
;
  %1 = and <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
  %2 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> %1)
  ret <32 x i16> %2
}

define <2 x i64> @avx2_psrlv_q_128_masked(<2 x i64> %v, <2 x i64> %a) {
; CHECK-LABEL: @avx2_psrlv_q_128_masked(
; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[A:%.*]], <i64 32, i64 63>
; CHECK-NEXT:    [[TMP2:%.*]] = lshr <2 x i64> [[V:%.*]], [[TMP1]]
; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
;
  %1 = and <2 x i64> %a, <i64 32, i64 63>
  %2 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> %1)
  ret <2 x i64> %2
}

define <8 x i32> @avx2_psrlv_d_256_masked(<8 x i32> %v, <8 x i32> %a) {
; CHECK-LABEL: @avx2_psrlv_d_256_masked(
; CHECK-NEXT:    [[TMP1:%.*]] = and <8 x i32> [[A:%.*]], <i32 0, i32 1, i32 7, i32 15, i32 16, i32 30, i32 31, i32 31>
; CHECK-NEXT:    [[TMP2:%.*]] = lshr <8 x i32> [[V:%.*]], [[TMP1]]
; CHECK-NEXT:    ret <8 x i32> [[TMP2]]
;
  %1 = and <8 x i32> %a, <i32 0, i32 1, i32 7, i32 15, i32 16, i32 30, i32 31, i32 31>
  %2 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> %1)
  ret <8 x i32> %2
}

define <8 x i64> @avx512_psrlv_q_512_masked(<8 x i64> %v, <8 x i64> %a) {
; CHECK-LABEL: @avx512_psrlv_q_512_masked(
; CHECK-NEXT:    [[TMP1:%.*]] = and <8 x i64> [[A:%.*]], <i64 0, i64 1, i64 4, i64 16, i64 32, i64 47, i64 62, i64 63>
; CHECK-NEXT:    [[TMP2:%.*]] = lshr <8 x i64> [[V:%.*]], [[TMP1]]
; CHECK-NEXT:    ret <8 x i64> [[TMP2]]
;
  %1 = and <8 x i64> %a, <i64 0, i64 1, i64 4, i64 16, i64 32, i64 47, i64 62, i64 63>
  %2 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> %1)
  ret <8 x i64> %2
}

define <4 x i32> @avx2_psllv_d_128_masked(<4 x i32> %v, <4 x i32> %a) {
; CHECK-LABEL: @avx2_psllv_d_128_masked(
; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], <i32 0, i32 15, i32 16, i32 31>
; CHECK-NEXT:    [[TMP2:%.*]] = shl <4 x i32> [[V:%.*]], [[TMP1]]
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %1 = and <4 x i32> %a, <i32 0, i32 15, i32 16, i32 31>
  %2 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> %1)
  ret <4 x i32> %2
}

define <4 x i64> @avx2_psllv_q_256_masked(<4 x i64> %v, <4 x i64> %a) {
; CHECK-LABEL: @avx2_psllv_q_256_masked(
; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i64> [[A:%.*]], <i64 0, i64 16, i64 32, i64 63>
; CHECK-NEXT:    [[TMP2:%.*]] = shl <4 x i64> [[V:%.*]], [[TMP1]]
; CHECK-NEXT:    ret <4 x i64> [[TMP2]]
;
  %1 = and <4 x i64> %a, <i64 0, i64 16, i64 32, i64 63>
  %2 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> %1)
  ret <4 x i64> %2
}

define <32 x i16> @avx512_psllv_w_512_masked(<32 x i16> %v, <32 x i16> %a) {
; CHECK-LABEL: @avx512_psllv_w_512_masked(
; CHECK-NEXT:    [[TMP1:%.*]] = and <32 x i16> [[A:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
; CHECK-NEXT:    [[TMP2:%.*]] = shl <32 x i16> [[V:%.*]], [[TMP1]]
; CHECK-NEXT:    ret <32 x i16> [[TMP2]]
;
  %1 = and <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
  %2 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> %1)
  ret <32 x i16> %2
}

;
; Vector Demanded Bits
;

define <8 x i16> @sse2_psra_w_var(<8 x i16> %v, <8 x i16> %a) {
; CHECK-LABEL: @sse2_psra_w_var(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> [[V:%.*]], <8 x i16> [[A:%.*]])
; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
;
  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
  %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1)
  ret <8 x i16> %2
}

define <8 x i16> @sse2_psra_w_var_bc(<8 x i16> %v, <2 x i64> %a) {
; CHECK-LABEL: @sse2_psra_w_var_bc(
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[A:%.*]] to <8 x i16>
; CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> [[V:%.*]], <8 x i16> [[TMP1]])
; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
;
  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
  %2 = bitcast <2 x i64> %1 to <8 x i16>
  %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %2)
  ret <8 x i16> %3
}

define <4 x i32> @sse2_psra_d_var(<4 x i32> %v, <4 x i32> %a) {
; CHECK-LABEL: @sse2_psra_d_var(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> [[V:%.*]], <4 x i32> [[A:%.*]])
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
  %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1)
  ret <4 x i32> %2
}

define <4 x i32> @sse2_psra_d_var_bc(<4 x i32> %v, <8 x i16> %a) {
; CHECK-LABEL: @sse2_psra_d_var_bc(
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[A:%.*]] to <4 x i32>
; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> [[V:%.*]], <4 x i32> [[TMP1]])
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
  %2 = bitcast <8 x i16> %1 to <4 x i32>
  %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %2)
  ret <4 x i32> %3
}

define <16 x i16> @avx2_psra_w_var(<16 x i16> %v, <8 x i16> %a) {
; CHECK-LABEL: @avx2_psra_w_var(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> [[V:%.*]], <8 x i16> [[A:%.*]])
; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
;
  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
  %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %1)
  ret <16 x i16> %2
}

define <8 x i32> @avx2_psra_d_var(<8 x i32> %v, <4 x i32> %a) {
; CHECK-LABEL: @avx2_psra_d_var(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> [[V:%.*]], <4 x i32> [[A:%.*]])
; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
;
  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
  %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %1)
  ret <8 x i32> %2
}

define <2 x i64> @avx512_psra_q_128_var(<2 x i64> %v, <2 x i64> %a) {
; CHECK-LABEL: @avx512_psra_q_128_var(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> [[V:%.*]], <2 x i64> [[A:%.*]])
; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
;
  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
  %2 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> %1)
  ret <2 x i64> %2
}

define <4 x i64> @avx512_psra_q_256_var(<4 x i64> %v, <2 x i64> %a) {
; CHECK-LABEL: @avx512_psra_q_256_var(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> [[V:%.*]], <2 x i64> [[A:%.*]])
; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
;
  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
  %2 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> %1)
  ret <4 x i64> %2
}

define <32 x i16> @avx512_psra_w_512_var(<32 x i16> %v, <8 x i16> %a) {
; CHECK-LABEL: @avx512_psra_w_512_var(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[A:%.*]])
; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
;
  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
  %2 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> %1)
  ret <32 x i16> %2
}

define <16 x i32> @avx512_psra_d_512_var(<16 x i32> %v, <4 x i32> %a) {
; CHECK-LABEL: @avx512_psra_d_512_var(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[A:%.*]])
; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
;
  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
  %2 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> %1)
  ret <16 x i32> %2
}

define <8 x i64> @avx512_psra_q_512_var(<8 x i64> %v, <2 x i64> %a) {
; CHECK-LABEL: @avx512_psra_q_512_var(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[V:%.*]], <2 x i64> [[A:%.*]])
; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
;
  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
  %2 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> %1)
  ret <8 x i64> %2
}

define <8 x i16> @sse2_psrl_w_var(<8 x i16> %v, <8 x i16> %a) {
; CHECK-LABEL: @sse2_psrl_w_var(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> [[V:%.*]], <8 x i16> [[A:%.*]])
; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
;
  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
  %2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %1)
  ret <8 x i16> %2
}

define <4 x i32> @sse2_psrl_d_var(<4 x i32> %v, <4 x i32> %a) {
; CHECK-LABEL: @sse2_psrl_d_var(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> [[V:%.*]], <4 x i32> [[A:%.*]])
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
  %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %1)
  ret <4 x i32> %2
}

define <2 x i64> @sse2_psrl_q_var(<2 x i64> %v, <2 x i64> %a) {
; CHECK-LABEL: @sse2_psrl_q_var(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> [[V:%.*]], <2 x i64> [[A:%.*]])
; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
;
  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
  %2 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %1)
  ret <2 x i64> %2
}

define <16 x i16> @avx2_psrl_w_var(<16 x i16> %v, <8 x i16> %a) {
; CHECK-LABEL: @avx2_psrl_w_var(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[V:%.*]], <8 x i16> [[A:%.*]])
; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
;
  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
  %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1)
  ret <16 x i16> %2
}

define <16 x i16> @avx2_psrl_w_var_bc(<16 x i16> %v, <16 x i8> %a) {
; CHECK-LABEL: @avx2_psrl_w_var_bc(
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[A:%.*]] to <8 x i16>
; CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[V:%.*]], <8 x i16> [[TMP1]])
; CHECK-NEXT:    ret <16 x i16> [[TMP2]]
;
  %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  %2 = bitcast <16 x i8> %1 to <8 x i16>
  %3 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %2)
  ret <16 x i16> %3
}

define <8 x i32> @avx2_psrl_d_var(<8 x i32> %v, <4 x i32> %a) {
; CHECK-LABEL: @avx2_psrl_d_var(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> [[V:%.*]], <4 x i32> [[A:%.*]])
; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
;
  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
  %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1)
  ret <8 x i32> %2
}

define <8 x i32> @avx2_psrl_d_var_bc(<8 x i32> %v, <2 x i64> %a) {
; CHECK-LABEL: @avx2_psrl_d_var_bc(
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32>
; CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> [[V:%.*]], <4 x i32> [[TMP1]])
; CHECK-NEXT:    ret <8 x i32> [[TMP2]]
;
  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
  %2 = bitcast <2 x i64> %1 to <4 x i32>
  %3 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %2)
  ret <8 x i32> %3
}

define <4 x i64> @avx2_psrl_q_var(<4 x i64> %v, <2 x i64> %a) {
; CHECK-LABEL: @avx2_psrl_q_var(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> [[V:%.*]], <2 x i64> [[A:%.*]])
; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
;
  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
  %2 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %1)
  ret <4 x i64> %2
}

define <32 x i16> @avx512_psrl_w_512_var(<32 x i16> %v, <8 x i16> %a) {
; CHECK-LABEL: @avx512_psrl_w_512_var(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[A:%.*]])
; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
;
  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
  %2 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> %1)
  ret <32 x i16> %2
}

define <32 x i16> @avx512_psrl_w_512_var_bc(<32 x i16> %v, <16 x i8> %a) {
; CHECK-LABEL: @avx512_psrl_w_512_var_bc(
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[A:%.*]] to <8 x i16>
; CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[TMP1]])
; CHECK-NEXT:    ret <32 x i16> [[TMP2]]
;
  %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  %2 = bitcast <16 x i8> %1 to <8 x i16>
  %3 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> %2)
  ret <32 x i16> %3
}

define <16 x i32> @avx512_psrl_d_512_var(<16 x i32> %v, <4 x i32> %a) {
; CHECK-LABEL: @avx512_psrl_d_512_var(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[A:%.*]])
; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
;
  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
  %2 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> %1)
  ret <16 x i32> %2
}

define <16 x i32> @avx512_psrl_d_512_var_bc(<16 x i32> %v, <2 x i64> %a) {
; CHECK-LABEL: @avx512_psrl_d_512_var_bc(
; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32>
; CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[TMP1]])
; CHECK-NEXT:    ret <16 x i32> [[TMP2]]
;
  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
  %2 = bitcast <2 x i64> %1 to <4 x i32>
  %3 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> %2)
  ret <16 x i32> %3
}

define <8 x i64> @avx512_psrl_q_512_var(<8 x i64> %v, <2 x i64> %a) {
; CHECK-LABEL: @avx512_psrl_q_512_var(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[V:%.*]], <2 x i64> [[A:%.*]])
; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
;
  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
  %2 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> %1)
  ret <8 x i64> %2
}

define <8 x i16> @sse2_psll_w_var(<8 x i16> %v, <8 x i16> %a) {
; CHECK-LABEL: @sse2_psll_w_var(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> [[V:%.*]], <8 x i16> [[A:%.*]])
; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
;
  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
  %2 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %1)
  ret <8 x i16> %2
}

define <4 x i32> @sse2_psll_d_var(<4 x i32> %v, <4 x i32> %a) {
; CHECK-LABEL: @sse2_psll_d_var(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> [[V:%.*]], <4 x i32> [[A:%.*]])
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
  %2 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %1)
  ret <4 x i32> %2
}

define <2 x i64> @sse2_psll_q_var(<2 x i64> %v, <2 x i64> %a) {
; CHECK-LABEL: @sse2_psll_q_var(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[V:%.*]], <2 x i64> [[A:%.*]])
; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
;
  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
  %2 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %1)
  ret <2 x i64> %2
}

define <16 x i16> @avx2_psll_w_var(<16 x i16> %v, <8 x i16> %a) {
; CHECK-LABEL: @avx2_psll_w_var(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> [[V:%.*]], <8 x i16> [[A:%.*]])
; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
;
  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
  %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %1)
  ret <16 x i16> %2
}

define <8 x i32> @avx2_psll_d_var(<8 x i32> %v, <4 x i32> %a) {
; CHECK-LABEL: @avx2_psll_d_var(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> [[V:%.*]], <4 x i32> [[A:%.*]])
; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
;
  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
  %2 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %1)
  ret <8 x i32> %2
}

define <4 x i64> @avx2_psll_q_var(<4 x i64> %v, <2 x i64> %a) {
; CHECK-LABEL: @avx2_psll_q_var(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> [[V:%.*]], <2 x i64> [[A:%.*]])
; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
;
  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
  %2 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %1)
  ret <4 x i64> %2
}

define <32 x i16> @avx512_psll_w_512_var(<32 x i16> %v, <8 x i16> %a) {
; CHECK-LABEL: @avx512_psll_w_512_var(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[A:%.*]])
; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
;
  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
  %2 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> %1)
  ret <32 x i16> %2
}

define <16 x i32> @avx512_psll_d_512_var(<16 x i32> %v, <4 x i32> %a) {
; CHECK-LABEL: @avx512_psll_d_512_var(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[A:%.*]])
; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
;
  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
  %2 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> %1)
  ret <16 x i32> %2
}

define <8 x i64> @avx512_psll_q_512_var(<8 x i64> %v, <2 x i64> %a) {
; CHECK-LABEL: @avx512_psll_q_512_var(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[V:%.*]], <2 x i64> [[A:%.*]])
; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
;
  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
  %2 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> %1)
  ret <8 x i64> %2
}

;
; Constant Folding
;

define <8 x i16> @test_sse2_psra_w_0(<8 x i16> %A) {
; CHECK-LABEL: @test_sse2_psra_w_0(
; CHECK-NEXT:    ret <8 x i16> [[A:%.*]]
;
  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %A, i32 0)
  %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
  %3 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %2, i32 0)
  ret <8 x i16> %3
}

define <8 x i16> @test_sse2_psra_w_8() {
; CHECK-LABEL: @test_sse2_psra_w_8(
; CHECK-NEXT:    ret <8 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
;
  %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <8 x i16>
  %2 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %1, i32 3)
  %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
  %4 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %3, i32 2)
  ret <8 x i16> %4
}

define <4 x i32> @test_sse2_psra_d_0(<4 x i32> %A) {
; CHECK-LABEL: @test_sse2_psra_d_0(
; CHECK-NEXT:    ret <4 x i32> [[A:%.*]]
;
  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %A, i32 0)
  %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
  %3 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 0)
  ret <4 x i32> %3
}

define <4 x i32> @sse2_psra_d_8() {
; CHECK-LABEL: @sse2_psra_d_8(
; CHECK-NEXT:    ret <4 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608>
;
  %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <4 x i32>
  %2 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 3)
  %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
  %4 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %3, i32 2)
  ret <4 x i32> %4
}

define <16 x i16> @test_avx2_psra_w_0(<16 x i16> %A) {
; CHECK-LABEL: @test_avx2_psra_w_0(
; CHECK-NEXT:    ret <16 x i16> [[A:%.*]]
;
  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0)
  %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
  %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0)
  ret <16 x i16> %3
}

define <16 x i16> @test_avx2_psra_w_8(<16 x i16> %A) {
; CHECK-LABEL: @test_avx2_psra_w_8(
; CHECK-NEXT:    ret <16 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
;
  %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <16 x i16>
  %2 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %1, i32 3)
  %3 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
  %4 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %3, i32 2)
  ret <16 x i16> %4
}

define <8 x i32> @test_avx2_psra_d_0(<8 x i32> %A) {
; CHECK-LABEL: @test_avx2_psra_d_0(
; CHECK-NEXT:    ret <8 x i32> [[A:%.*]]
;
  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0)
  %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
  %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0)
  ret <8 x i32> %3
}

define <8 x i32> @test_avx2_psra_d_8() {
; CHECK-LABEL: @test_avx2_psra_d_8(
; CHECK-NEXT:    ret <8 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608>
;
  %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <8 x i32>
  %2 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %1, i32 3)
  %3 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
  %4 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %3, i32 2)
  ret <8 x i32> %4
}

define <32 x i16> @test_avx512_psra_w_512_0(<32 x i16> %A) {
; CHECK-LABEL: @test_avx512_psra_w_512_0(
; CHECK-NEXT:    ret <32 x i16> [[A:%.*]]
;
  %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %A, i32 0)
  %2 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
  %3 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %2, i32 0)
  ret <32 x i16> %3
}

define <32 x i16> @test_avx512_psra_w_512_8(<32 x i16> %A) {
; CHECK-LABEL: @test_avx512_psra_w_512_8(
; CHECK-NEXT:    ret <32 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
;
  %1 = bitcast <8 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <32 x i16>
  %2 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %1, i32 3)
  %3 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
  %4 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %3, i32 2)
  ret <32 x i16> %4
}

define <16 x i32> @test_avx512_psra_d_512_0(<16 x i32> %A) {
; CHECK-LABEL: @test_avx512_psra_d_512_0(
; CHECK-NEXT:    ret <16 x i32> [[A:%.*]]
;
  %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %A, i32 0)
  %2 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
  %3 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %2, i32 0)
  ret <16 x i32> %3
}

define <16 x i32> @test_avx512_psra_d_512_8() {
; CHECK-LABEL: @test_avx512_psra_d_512_8(
; CHECK-NEXT:    ret <16 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608>
;
  %1 = bitcast <8 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <16 x i32>
  %2 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %1, i32 3)
  %3 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
  %4 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %3, i32 2)
  ret <16 x i32> %4
}

;
; Old Tests
;

define <2 x i64> @test_sse2_1() {
; CHECK-LABEL: @test_sse2_1(
; CHECK-NEXT:    ret <2 x i64> <i64 72058418680037440, i64 144117112246370624>
;
  %S = bitcast i32 1 to i32
  %1 = zext i32 %S to i64
  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
  %3 = insertelement <2 x i64> %2, i64 0, i32 1
  %4 = bitcast <2 x i64> %3 to <8 x i16>
  %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
  %6 = bitcast <8 x i16> %5 to <4 x i32>
  %7 = bitcast <2 x i64> %3 to <4 x i32>
  %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
  %9 = bitcast <4 x i32> %8 to <2 x i64>
  %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
  %11 = bitcast <2 x i64> %10 to <8 x i16>
  %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
  %13 = bitcast <8 x i16> %12 to <4 x i32>
  %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
  %15 = bitcast <4 x i32> %14 to <2 x i64>
  %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
  ret <2 x i64> %16
}

define <4 x i64> @test_avx2_1() {
; CHECK-LABEL: @test_avx2_1(
; CHECK-NEXT:    ret <4 x i64> <i64 64, i64 128, i64 192, i64 256>
;
  %S = bitcast i32 1 to i32
  %1 = zext i32 %S to i64
  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
  %3 = insertelement <2 x i64> %2, i64 0, i32 1
  %4 = bitcast <2 x i64> %3 to <8 x i16>
  %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
  %6 = bitcast <16 x i16> %5 to <8 x i32>
  %7 = bitcast <2 x i64> %3 to <4 x i32>
  %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
  %9 = bitcast <8 x i32> %8 to <4 x i64>
  %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
  %11 = bitcast <4 x i64> %10 to <16 x i16>
  %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
  %13 = bitcast <16 x i16> %12 to <8 x i32>
  %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
  %15 = bitcast <8 x i32> %14 to <4 x i64>
  %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
  ret <4 x i64> %16
}

define <2 x i64> @test_sse2_0() {
; CHECK-LABEL: @test_sse2_0(
; CHECK-NEXT:    ret <2 x i64> zeroinitializer
;
  %S = bitcast i32 128 to i32
  %1 = zext i32 %S to i64
  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
  %3 = insertelement <2 x i64> %2, i64 0, i32 1
  %4 = bitcast <2 x i64> %3 to <8 x i16>
  %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
  %6 = bitcast <8 x i16> %5 to <4 x i32>
  %7 = bitcast <2 x i64> %3 to <4 x i32>
  %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
  %9 = bitcast <4 x i32> %8 to <2 x i64>
  %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
  %11 = bitcast <2 x i64> %10 to <8 x i16>
  %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
  %13 = bitcast <8 x i16> %12 to <4 x i32>
  %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
  %15 = bitcast <4 x i32> %14 to <2 x i64>
  %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
  ret <2 x i64> %16
}

define <4 x i64> @test_avx2_0() {
; CHECK-LABEL: @test_avx2_0(
; CHECK-NEXT:    ret <4 x i64> zeroinitializer
;
  %S = bitcast i32 128 to i32
  %1 = zext i32 %S to i64
  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
  %3 = insertelement <2 x i64> %2, i64 0, i32 1
  %4 = bitcast <2 x i64> %3 to <8 x i16>
  %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
  %6 = bitcast <16 x i16> %5 to <8 x i32>
  %7 = bitcast <2 x i64> %3 to <4 x i32>
  %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
  %9 = bitcast <8 x i32> %8 to <4 x i64>
  %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
  %11 = bitcast <4 x i64> %10 to <16 x i16>
  %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
  %13 = bitcast <16 x i16> %12 to <8 x i32>
  %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
  %15 = bitcast <8 x i32> %14 to <4 x i64>
  %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
  ret <4 x i64> %16
}
define <2 x i64> @test_sse2_psrl_1() {
; CHECK-LABEL: @test_sse2_psrl_1(
; CHECK-NEXT:    ret <2 x i64> <i64 562954248421376, i64 9007267974742020>
;
  %S = bitcast i32 1 to i32
  %1 = zext i32 %S to i64
  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
  %3 = insertelement <2 x i64> %2, i64 0, i32 1
  %4 = bitcast <2 x i64> %3 to <8 x i16>
  %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 16, i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048>, <8 x i16> %4)
  %6 = bitcast <8 x i16> %5 to <4 x i32>
  %7 = bitcast <2 x i64> %3 to <4 x i32>
  %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
  %9 = bitcast <4 x i32> %8 to <2 x i64>
  %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
  %11 = bitcast <2 x i64> %10 to <8 x i16>
  %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
  %13 = bitcast <8 x i16> %12 to <4 x i32>
  %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
  %15 = bitcast <4 x i32> %14 to <2 x i64>
  %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
  ret <2 x i64> %16
}

define <4 x i64> @test_avx2_psrl_1() {
; CHECK-LABEL: @test_avx2_psrl_1(
; CHECK-NEXT:    ret <4 x i64> <i64 16, i64 32, i64 64, i64 128>
;
  %S = bitcast i32 1 to i32
  %1 = zext i32 %S to i64
  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
  %3 = insertelement <2 x i64> %2, i64 0, i32 1
  %4 = bitcast <2 x i64> %3 to <8 x i16>
  %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
  %6 = bitcast <16 x i16> %5 to <8 x i32>
  %7 = bitcast <2 x i64> %3 to <4 x i32>
  %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
  %9 = bitcast <8 x i32> %8 to <4 x i64>
  %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
  %11 = bitcast <4 x i64> %10 to <16 x i16>
  %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
  %13 = bitcast <16 x i16> %12 to <8 x i32>
  %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
  %15 = bitcast <8 x i32> %14 to <4 x i64>
  %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
  ret <4 x i64> %16
}

define <2 x i64> @test_sse2_psrl_0() {
; CHECK-LABEL: @test_sse2_psrl_0(
; CHECK-NEXT:    ret <2 x i64> zeroinitializer
;
  %S = bitcast i32 128 to i32
  %1 = zext i32 %S to i64
  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
  %3 = insertelement <2 x i64> %2, i64 0, i32 1
  %4 = bitcast <2 x i64> %3 to <8 x i16>
  %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096>, <8 x i16> %4)
  %6 = bitcast <8 x i16> %5 to <4 x i32>
  %7 = bitcast <2 x i64> %3 to <4 x i32>
  %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
  %9 = bitcast <4 x i32> %8 to <2 x i64>
  %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
  %11 = bitcast <2 x i64> %10 to <8 x i16>
  %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
  %13 = bitcast <8 x i16> %12 to <4 x i32>
  %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
  %15 = bitcast <4 x i32> %14 to <2 x i64>
  %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
  ret <2 x i64> %16
}

define <4 x i64> @test_avx2_psrl_0() {
; CHECK-LABEL: @test_avx2_psrl_0(
; CHECK-NEXT:    ret <4 x i64> zeroinitializer
;
  %S = bitcast i32 128 to i32
  %1 = zext i32 %S to i64
  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
  %3 = insertelement <2 x i64> %2, i64 0, i32 1
  %4 = bitcast <2 x i64> %3 to <8 x i16>
  %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
  %6 = bitcast <16 x i16> %5 to <8 x i32>
  %7 = bitcast <2 x i64> %3 to <4 x i32>
  %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
  %9 = bitcast <8 x i32> %8 to <4 x i64>
  %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
  %11 = bitcast <4 x i64> %10 to <16 x i16>
  %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
  %13 = bitcast <16 x i16> %12 to <8 x i32>
  %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
  %15 = bitcast <8 x i32> %14 to <4 x i64>
  %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
  ret <4 x i64> %16
}

declare <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64>, i32) #1
declare <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32>, i32) #1
declare <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16>, i32) #1
declare <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64>, <2 x i64>) #1
declare <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32>, <4 x i32>) #1
declare <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16>, <8 x i16>) #1
declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1
declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) #1
declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) #1
declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) #1
declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) #1
declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) #1
declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) #1
declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) #1
declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1
declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1
declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1
declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1

declare <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64>, i32) #1
declare <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32>, i32) #1
declare <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16>, i32) #1
declare <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64>, <2 x i64>) #1
declare <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32>, <4 x i32>) #1
declare <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16>, <8 x i16>) #1
declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1
declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1
declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1
declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) #1
declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) #1
declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) #1
declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) #1
declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) #1
declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) #1
declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) #1
declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) #1
declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) #1

declare <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64>, i32) #1
declare <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32>, i32) #1
declare <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16>, i32) #1
declare <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64>, <2 x i64>) #1
declare <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32>, <4 x i32>) #1
declare <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16>, <8 x i16>) #1
declare <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64>, i32) #1
declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) #1
declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) #1
declare <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64>, <2 x i64>) #1
declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) #1
declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) #1
declare <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64>, i32) #1
declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) #1
declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) #1
declare <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64>, <2 x i64>) #1
declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) #1
declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) #1

declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) #1
declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) #1
declare <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32>, <16 x i32>) #1
declare <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64>, <2 x i64>) #1
declare <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64>, <4 x i64>) #1
declare <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64>, <8 x i64>) #1

declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) #1
declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) #1
declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) #1
declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) #1
declare <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32>, <16 x i32>) #1
declare <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64>, <8 x i64>) #1

declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) #1
declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) #1
declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) #1
declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) #1
declare <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32>, <16 x i32>) #1
declare <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64>, <8 x i64>) #1

declare <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16>, <8 x i16>) #1
declare <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16>, <16 x i16>) #1
declare <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16>, <32 x i16>) #1
declare <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16>, <8 x i16>) #1
declare <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16>, <16 x i16>) #1
declare <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16>, <32 x i16>) #1
declare <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16>, <8 x i16>) #1
declare <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16>, <16 x i16>) #1
declare <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16>, <32 x i16>) #1

attributes #1 = { nounwind readnone }