; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop | FileCheck %s
;
; SimplifyDemandedVectorEltsForTargetNode Handling
;
define <16 x i8> @demandedelts_vpshab(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: demandedelts_vpshab:
; CHECK: # %bb.0:
; CHECK-NEXT: vpshab %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%shuffle = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%shift = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %shuffle, <16 x i8> %a1)
%res = shufflevector <16 x i8> %shift, <16 x i8> undef, <16 x i32> zeroinitializer
ret <16 x i8> %res
}
define <4 x i32> @demandedelts_vpshld(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: demandedelts_vpshld:
; CHECK: # %bb.0:
; CHECK-NEXT: vpshld %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; CHECK-NEXT: retq
%shuffle = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> zeroinitializer
%shift = call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %a0, <4 x i32> %shuffle)
%result = shufflevector <4 x i32> %shift, <4 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %result
}
;
; isBinOp Handling
;
define <8 x i16> @binop_shuffle_vpshaw(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: binop_shuffle_vpshaw:
; CHECK: # %bb.0:
; CHECK-NEXT: vpshlw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%shuffle0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
%shuffle1 = shufflevector <8 x i16> %a1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
%shift = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %shuffle0, <8 x i16> %shuffle1)
%result = shufflevector <8 x i16> %shift, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %result
}
; TODO - canonicalizeShuffleWithBinOps - handle scaled shuffle masks.
define <2 x i64> @binop_shuffle_vpshlq(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK-LABEL: binop_shuffle_vpshlq:
; CHECK: # %bb.0:
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
; CHECK-NEXT: vpshlq %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-NEXT: retq
%shuffle0 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
%shuffle1 = shufflevector <2 x i64> %a1, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%shift = call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %shuffle0, <2 x i64> %shuffle1)
%result = shufflevector <2 x i64> %shift, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
ret <2 x i64> %result
}
declare <16 x i8> @llvm.x86.xop.vpshab(<16 x i8>, <16 x i8>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpshad(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64>, <2 x i64>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16>, <8 x i16>) nounwind readnone
declare <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8>, <16 x i8>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpshld(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64>, <2 x i64>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone