; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s
;
; UNDEF Elts
;
define <8 x i16> @undef_pmulh_128(<8 x i16> %a0) {
; CHECK-LABEL: @undef_pmulh_128(
; CHECK-NEXT: ret <8 x i16> zeroinitializer
;
%1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> undef)
ret <8 x i16> %1
}
define <8 x i16> @undef_pmulh_128_commute(<8 x i16> %a0) {
; CHECK-LABEL: @undef_pmulh_128_commute(
; CHECK-NEXT: ret <8 x i16> zeroinitializer
;
%1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> undef, <8 x i16> %a0)
ret <8 x i16> %1
}
define <16 x i16> @undef_pmulh_256(<16 x i16> %a0) {
; CHECK-LABEL: @undef_pmulh_256(
; CHECK-NEXT: ret <16 x i16> zeroinitializer
;
%1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> undef)
ret <16 x i16> %1
}
define <16 x i16> @undef_pmulh_256_commute(<16 x i16> %a0) {
; CHECK-LABEL: @undef_pmulh_256_commute(
; CHECK-NEXT: ret <16 x i16> zeroinitializer
;
%1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> undef, <16 x i16> %a0)
ret <16 x i16> %1
}
define <32 x i16> @undef_pmulh_512(<32 x i16> %a0) {
; CHECK-LABEL: @undef_pmulh_512(
; CHECK-NEXT: ret <32 x i16> zeroinitializer
;
%1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> %a0, <32 x i16> undef)
ret <32 x i16> %1
}
define <32 x i16> @undef_pmulh_512_commute(<32 x i16> %a0) {
; CHECK-LABEL: @undef_pmulh_512_commute(
; CHECK-NEXT: ret <32 x i16> zeroinitializer
;
%1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> undef, <32 x i16> %a0)
ret <32 x i16> %1
}
;
; Zero Elts
;
define <8 x i16> @zero_pmulh_128(<8 x i16> %a0) {
; CHECK-LABEL: @zero_pmulh_128(
; CHECK-NEXT: ret <8 x i16> zeroinitializer
;
%1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> zeroinitializer)
ret <8 x i16> %1
}
define <8 x i16> @zero_pmulh_128_commute(<8 x i16> %a0) {
; CHECK-LABEL: @zero_pmulh_128_commute(
; CHECK-NEXT: ret <8 x i16> zeroinitializer
;
%1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> zeroinitializer, <8 x i16> %a0)
ret <8 x i16> %1
}
define <16 x i16> @zero_pmulh_256(<16 x i16> %a0) {
; CHECK-LABEL: @zero_pmulh_256(
; CHECK-NEXT: ret <16 x i16> zeroinitializer
;
%1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> zeroinitializer)
ret <16 x i16> %1
}
define <16 x i16> @zero_pmulh_256_commute(<16 x i16> %a0) {
; CHECK-LABEL: @zero_pmulh_256_commute(
; CHECK-NEXT: ret <16 x i16> zeroinitializer
;
%1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> zeroinitializer, <16 x i16> %a0)
ret <16 x i16> %1
}
define <32 x i16> @zero_pmulh_512(<32 x i16> %a0) {
; CHECK-LABEL: @zero_pmulh_512(
; CHECK-NEXT: ret <32 x i16> zeroinitializer
;
%1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> %a0, <32 x i16> zeroinitializer)
ret <32 x i16> %1
}
define <32 x i16> @zero_pmulh_512_commute(<32 x i16> %a0) {
; CHECK-LABEL: @zero_pmulh_512_commute(
; CHECK-NEXT: ret <32 x i16> zeroinitializer
;
%1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> zeroinitializer, <32 x i16> %a0)
ret <32 x i16> %1
}
;
; Multiply by One
;
define <8 x i16> @one_pmulh_128(<8 x i16> %a0) {
; CHECK-LABEL: @one_pmulh_128(
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> [[A0:%.*]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
; CHECK-NEXT: ret <8 x i16> [[TMP1]]
;
%1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
ret <8 x i16> %1
}
define <8 x i16> @one_pmulh_128_commute(<8 x i16> %a0) {
; CHECK-LABEL: @one_pmulh_128_commute(
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> [[A0:%.*]])
; CHECK-NEXT: ret <8 x i16> [[TMP1]]
;
%1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %a0)
ret <8 x i16> %1
}
define <16 x i16> @one_pmulh_256(<16 x i16> %a0) {
; CHECK-LABEL: @one_pmulh_256(
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> [[A0:%.*]], <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
; CHECK-NEXT: ret <16 x i16> [[TMP1]]
;
%1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
ret <16 x i16> %1
}
define <16 x i16> @one_pmulh_256_commute(<16 x i16> %a0) {
; CHECK-LABEL: @one_pmulh_256_commute(
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <16 x i16> [[A0:%.*]])
; CHECK-NEXT: ret <16 x i16> [[TMP1]]
;
%1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <16 x i16> %a0)
ret <16 x i16> %1
}
define <32 x i16> @one_pmulh_512(<32 x i16> %a0) {
; CHECK-LABEL: @one_pmulh_512(
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> [[A0:%.*]], <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
; CHECK-NEXT: ret <32 x i16> [[TMP1]]
;
%1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> %a0, <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
ret <32 x i16> %1
}
define <32 x i16> @one_pmulh_512_commute(<32 x i16> %a0) {
; CHECK-LABEL: @one_pmulh_512_commute(
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <32 x i16> [[A0:%.*]])
; CHECK-NEXT: ret <32 x i16> [[TMP1]]
;
%1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <32 x i16> %a0)
ret <32 x i16> %1
}
;
; Constant Folding
;
define <8 x i16> @fold_pmulh_128() {
; CHECK-LABEL: @fold_pmulh_128(
; CHECK-NEXT: ret <8 x i16> <i16 0, i16 0, i16 -3, i16 -4, i16 0, i16 0, i16 -7, i16 -8>
;
%1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> <i16 -1, i16 2, i16 3, i16 -4, i16 -5, i16 6, i16 7, i16 -8>, <8 x i16> <i16 -5, i16 7, i16 -32768, i16 32765, i16 -9, i16 -11, i16 -32763, i16 32761>)
ret <8 x i16> %1
}
define <16 x i16> @fold_pmulh_256() {
; CHECK-LABEL: @fold_pmulh_256(
; CHECK-NEXT: ret <16 x i16> <i16 0, i16 0, i16 -2, i16 3, i16 0, i16 0, i16 -6, i16 7, i16 0, i16 0, i16 10, i16 11, i16 0, i16 0, i16 14, i16 -15>
;
%1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> <i16 0, i16 -1, i16 2, i16 3, i16 -4, i16 -5, i16 6, i16 7, i16 -8, i16 9, i16 -10, i16 11, i16 -12, i16 13, i16 -14, i16 -15>, <16 x i16> <i16 -5, i16 7, i16 -32768, i16 32766, i16 -9, i16 -11, i16 -32764, i16 32762, i16 13, i16 -15, i16 -32760, i16 32758, i16 17, i16 -19, i16 -32756, i16 32756>)
ret <16 x i16> %1
}
define <32 x i16> @fold_pmulh_512() {
; CHECK-LABEL: @fold_pmulh_512(
; CHECK-NEXT: ret <32 x i16> <i16 0, i16 0, i16 -2, i16 3, i16 0, i16 0, i16 -6, i16 7, i16 0, i16 0, i16 10, i16 11, i16 0, i16 0, i16 14, i16 -15, i16 0, i16 0, i16 -2, i16 3, i16 0, i16 0, i16 -6, i16 7, i16 0, i16 0, i16 10, i16 11, i16 0, i16 0, i16 14, i16 -15>
;
%1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> <i16 0, i16 -1, i16 2, i16 3, i16 -4, i16 -5, i16 6, i16 7, i16 -8, i16 9, i16 -10, i16 11, i16 -12, i16 13, i16 -14, i16 -15, i16 -5, i16 7, i16 -32768, i16 32766, i16 -9, i16 -11, i16 -32764, i16 32762, i16 13, i16 -15, i16 -32760, i16 32758, i16 17, i16 -19, i16 -32756, i16 32756>, <32 x i16> <i16 -5, i16 7, i16 -32768, i16 32766, i16 -9, i16 -11, i16 -32764, i16 32762, i16 13, i16 -15, i16 -32760, i16 32758, i16 17, i16 -19, i16 -32756, i16 32756, i16 0, i16 -1, i16 2, i16 3, i16 -4, i16 -5, i16 6, i16 7, i16 -8, i16 9, i16 -10, i16 11, i16 -12, i16 13, i16 -14, i16 -15>)
ret <32 x i16> %1
}
;
; Demanded Elts
;
define <8 x i16> @elts_pmulh_128(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: @elts_pmulh_128(
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: ret <8 x i16> [[TMP2]]
;
%1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2>
%2 = shufflevector <8 x i16> %a1, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
%3 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %1, <8 x i16> %2)
%4 = shufflevector <8 x i16> %3, <8 x i16> poison, <8 x i32> zeroinitializer
ret <8 x i16> %4
}
define <16 x i16> @elts_pmulh_256(<16 x i16> %a0, <16 x i16> %a1) {
; CHECK-LABEL: @elts_pmulh_256(
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: ret <16 x i16> [[TMP2]]
;
%1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%2 = shufflevector <16 x i16> %a1, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%3 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %1, <16 x i16> %2)
%4 = shufflevector <16 x i16> %3, <16 x i16> poison, <16 x i32> zeroinitializer
ret <16 x i16> %4
}
define <32 x i16> @elts_pmulh_512(<32 x i16> %a0, <32 x i16> %a1) {
; CHECK-LABEL: @elts_pmulh_512(
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> [[A0:%.*]], <32 x i16> [[A1:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i16> [[TMP1]], <32 x i16> poison, <32 x i32> zeroinitializer
; CHECK-NEXT: ret <32 x i16> [[TMP2]]
;
%1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%2 = shufflevector <32 x i16> %a1, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%3 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> %1, <32 x i16> %2)
%4 = shufflevector <32 x i16> %3, <32 x i16> poison, <32 x i32> zeroinitializer
ret <32 x i16> %4
}