llvm/llvm/test/CodeGen/X86/freeze-vector.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-- -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X64

define <2 x i64> @freeze_insert_vector_elt(<2 x i64> %a0) {
; CHECK-LABEL: freeze_insert_vector_elt:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT:    ret{{[l|q]}}
  %idx0 = insertelement <2 x i64> %a0, i64 0, i64 0
  %freeze0 = freeze <2 x i64> %idx0
  %idx1 = insertelement <2 x i64> %freeze0, i64 0, i64 1
  %freeze1 = freeze <2 x i64> %idx1
  ret <2 x i64> %freeze1
}

define <4 x i32> @freeze_insert_subvector(<8 x i32> %a0) nounwind {
; CHECK-LABEL: freeze_insert_subvector:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT:    ret{{[l|q]}}
  %x = shufflevector <8 x i32> %a0, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
  %y = freeze <8 x i32> %x
  %z = shufflevector <8 x i32> %y, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  ret <4 x i32> %z
}

define <2 x i64> @freeze_sign_extend_vector_inreg(<16 x i8> %a0) nounwind {
; CHECK-LABEL: freeze_sign_extend_vector_inreg:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vpmovsxbq %xmm0, %xmm0
; CHECK-NEXT:    ret{{[l|q]}}
  %x = sext <16 x i8> %a0 to <16 x i32>
  %y = shufflevector <16 x i32> %x, <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %z = freeze <4 x i32> %y
  %w = sext <4 x i32> %z to <4 x i64>
  %r = shufflevector <4 x i64> %w, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
  ret <2 x i64> %r
}

define <2 x i64> @freeze_zero_extend_vector_inreg(<16 x i8> %a0) nounwind {
; CHECK-LABEL: freeze_zero_extend_vector_inreg:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT:    ret{{[l|q]}}
  %x = zext <16 x i8> %a0 to <16 x i32>
  %y = shufflevector <16 x i32> %x, <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %z = freeze <4 x i32> %y
  %w = zext <4 x i32> %z to <4 x i64>
  %r = shufflevector <4 x i64> %w, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
  ret <2 x i64> %r
}

define <4 x i32> @freeze_pshufd(<4 x i32> %a0) nounwind {
; CHECK-LABEL: freeze_pshufd:
; CHECK:       # %bb.0:
; CHECK-NEXT:    ret{{[l|q]}}
  %x = shufflevector <4 x i32> %a0, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
  %y = freeze <4 x i32> %x
  %z = shufflevector <4 x i32> %y, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
  ret <4 x i32> %z
}

define <4 x float> @freeze_permilps(<4 x float> %a0) nounwind {
; CHECK-LABEL: freeze_permilps:
; CHECK:       # %bb.0:
; CHECK-NEXT:    ret{{[l|q]}}
  %x = shufflevector <4 x float> %a0, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
  %y = freeze <4 x float> %x
  %z = shufflevector <4 x float> %y, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
  ret <4 x float> %z
}

define void @freeze_bitcast_from_wider_elt(ptr %origin, ptr %dst) nounwind {
; X86-LABEL: freeze_bitcast_from_wider_elt:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-NEXT:    vmovsd %xmm0, (%eax)
; X86-NEXT:    retl
;
; X64-LABEL: freeze_bitcast_from_wider_elt:
; X64:       # %bb.0:
; X64-NEXT:    movq (%rdi), %rax
; X64-NEXT:    movq %rax, (%rsi)
; X64-NEXT:    retq
  %i0 = load <4 x i16>, ptr %origin
  %i1 = bitcast <4 x i16> %i0 to <8 x i8>
  %i2 = freeze <8 x i8> %i1
  %i3 = bitcast <8 x i8> %i2 to i64
  store i64 %i3, ptr %dst
  ret void
}
define void @freeze_bitcast_from_wider_elt_escape(ptr %origin, ptr %escape, ptr %dst) nounwind {
; X86-LABEL: freeze_bitcast_from_wider_elt_escape:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-NEXT:    vmovsd %xmm0, (%ecx)
; X86-NEXT:    vmovsd %xmm0, (%eax)
; X86-NEXT:    retl
;
; X64-LABEL: freeze_bitcast_from_wider_elt_escape:
; X64:       # %bb.0:
; X64-NEXT:    movq (%rdi), %rax
; X64-NEXT:    movq %rax, (%rsi)
; X64-NEXT:    movq %rax, (%rdx)
; X64-NEXT:    retq
  %i0 = load <4 x i16>, ptr %origin
  %i1 = bitcast <4 x i16> %i0 to <8 x i8>
  store <8 x i8> %i1, ptr %escape
  %i2 = freeze <8 x i8> %i1
  %i3 = bitcast <8 x i8> %i2 to i64
  store i64 %i3, ptr %dst
  ret void
}

define void @freeze_bitcast_to_wider_elt(ptr %origin, ptr %dst) nounwind {
; X86-LABEL: freeze_bitcast_to_wider_elt:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-NEXT:    vmovsd %xmm0, (%eax)
; X86-NEXT:    retl
;
; X64-LABEL: freeze_bitcast_to_wider_elt:
; X64:       # %bb.0:
; X64-NEXT:    movq (%rdi), %rax
; X64-NEXT:    movq %rax, (%rsi)
; X64-NEXT:    retq
  %i0 = load <8 x i8>, ptr %origin
  %i1 = bitcast <8 x i8> %i0 to <4 x i16>
  %i2 = freeze <4 x i16> %i1
  %i3 = bitcast <4 x i16> %i2 to i64
  store i64 %i3, ptr %dst
  ret void
}
define void @freeze_bitcast_to_wider_elt_escape(ptr %origin, ptr %escape, ptr %dst) nounwind {
; X86-LABEL: freeze_bitcast_to_wider_elt_escape:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
; X86-NEXT:    vmovsd %xmm0, (%ecx)
; X86-NEXT:    vmovsd %xmm0, (%eax)
; X86-NEXT:    retl
;
; X64-LABEL: freeze_bitcast_to_wider_elt_escape:
; X64:       # %bb.0:
; X64-NEXT:    movq (%rdi), %rax
; X64-NEXT:    movq %rax, (%rsi)
; X64-NEXT:    movq %rax, (%rdx)
; X64-NEXT:    retq
  %i0 = load <8 x i8>, ptr %origin
  %i1 = bitcast <8 x i8> %i0 to <4 x i16>
  store <4 x i16> %i1, ptr %escape
  %i2 = freeze <4 x i16> %i1
  %i3 = bitcast <4 x i16> %i2 to i64
  store i64 %i3, ptr %dst
  ret void
}

define void @freeze_extractelement(ptr %origin0, ptr %origin1, ptr %dst) nounwind {
; X86-LABEL: freeze_extractelement:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
; X86-NEXT:    vmovdqa (%edx), %xmm0
; X86-NEXT:    vpand (%ecx), %xmm0, %xmm0
; X86-NEXT:    vpextrb $6, %xmm0, (%eax)
; X86-NEXT:    retl
;
; X64-LABEL: freeze_extractelement:
; X64:       # %bb.0:
; X64-NEXT:    vmovdqa (%rdi), %xmm0
; X64-NEXT:    vpand (%rsi), %xmm0, %xmm0
; X64-NEXT:    vpextrb $6, %xmm0, (%rdx)
; X64-NEXT:    retq
  %i0 = load <16 x i8>, ptr %origin0
  %i1 = load <16 x i8>, ptr %origin1
  %i2 = and <16 x i8> %i0, %i1
  %i3 = freeze <16 x i8> %i2
  %i4 = extractelement <16 x i8> %i3, i64 6
  store i8 %i4, ptr %dst
  ret void
}
define void @freeze_extractelement_escape(ptr %origin0, ptr %origin1, ptr %dst, ptr %escape) nounwind {
; X86-LABEL: freeze_extractelement_escape:
; X86:       # %bb.0:
; X86-NEXT:    pushl %esi
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
; X86-NEXT:    vmovdqa (%esi), %xmm0
; X86-NEXT:    vpand (%edx), %xmm0, %xmm0
; X86-NEXT:    vmovdqa %xmm0, (%ecx)
; X86-NEXT:    vpextrb $6, %xmm0, (%eax)
; X86-NEXT:    popl %esi
; X86-NEXT:    retl
;
; X64-LABEL: freeze_extractelement_escape:
; X64:       # %bb.0:
; X64-NEXT:    vmovdqa (%rdi), %xmm0
; X64-NEXT:    vpand (%rsi), %xmm0, %xmm0
; X64-NEXT:    vmovdqa %xmm0, (%rcx)
; X64-NEXT:    vpextrb $6, %xmm0, (%rdx)
; X64-NEXT:    retq
  %i0 = load <16 x i8>, ptr %origin0
  %i1 = load <16 x i8>, ptr %origin1
  %i2 = and <16 x i8> %i0, %i1
  %i3 = freeze <16 x i8> %i2
  store <16 x i8> %i3, ptr %escape
  %i4 = extractelement <16 x i8> %i3, i64 6
  store i8 %i4, ptr %dst
  ret void
}

; It would be a miscompilation to pull freeze out of extractelement here.
define void @freeze_extractelement_extra_use(ptr %origin0, ptr %origin1, i64 %idx0, i64 %idx1, ptr %dst, ptr %escape) nounwind {
; X86-LABEL: freeze_extractelement_extra_use:
; X86:       # %bb.0:
; X86-NEXT:    pushl %ebp
; X86-NEXT:    movl %esp, %ebp
; X86-NEXT:    pushl %edi
; X86-NEXT:    pushl %esi
; X86-NEXT:    andl $-16, %esp
; X86-NEXT:    subl $16, %esp
; X86-NEXT:    movl 24(%ebp), %eax
; X86-NEXT:    andl $15, %eax
; X86-NEXT:    movl 16(%ebp), %ecx
; X86-NEXT:    andl $15, %ecx
; X86-NEXT:    movl 32(%ebp), %edx
; X86-NEXT:    movl 12(%ebp), %esi
; X86-NEXT:    movl 8(%ebp), %edi
; X86-NEXT:    vmovaps (%edi), %xmm0
; X86-NEXT:    vandps (%esi), %xmm0, %xmm0
; X86-NEXT:    vmovaps %xmm0, (%esp)
; X86-NEXT:    movzbl (%esp,%ecx), %ecx
; X86-NEXT:    cmpb (%esp,%eax), %cl
; X86-NEXT:    sete (%edx)
; X86-NEXT:    leal -8(%ebp), %esp
; X86-NEXT:    popl %esi
; X86-NEXT:    popl %edi
; X86-NEXT:    popl %ebp
; X86-NEXT:    retl
;
; X64-LABEL: freeze_extractelement_extra_use:
; X64:       # %bb.0:
; X64-NEXT:    andl $15, %ecx
; X64-NEXT:    andl $15, %edx
; X64-NEXT:    vmovaps (%rdi), %xmm0
; X64-NEXT:    vandps (%rsi), %xmm0, %xmm0
; X64-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; X64-NEXT:    movzbl -24(%rsp,%rdx), %eax
; X64-NEXT:    cmpb -24(%rsp,%rcx), %al
; X64-NEXT:    sete (%r8)
; X64-NEXT:    retq
  %i0 = load <16 x i8>, ptr %origin0
  %i1 = load <16 x i8>, ptr %origin1
  %i2 = and <16 x i8> %i0, %i1
  %i3 = freeze <16 x i8> %i2
  %i4 = extractelement <16 x i8> %i3, i64 %idx0
  %i5 = extractelement <16 x i8> %i3, i64 %idx1
  %i6 = icmp eq i8 %i4, %i5
  store i1 %i6, ptr %dst
  ret void
}

define void @freeze_buildvector_single_maybe_poison_operand(ptr %origin, ptr %dst) nounwind {
; X86-LABEL: freeze_buildvector_single_maybe_poison_operand:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    vbroadcastss {{.*#+}} xmm0 = [42,42,42,42]
; X86-NEXT:    vpinsrd $0, (%ecx), %xmm0, %xmm0
; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-NEXT:    vmovdqa %xmm0, (%eax)
; X86-NEXT:    retl
;
; X64-LABEL: freeze_buildvector_single_maybe_poison_operand:
; X64:       # %bb.0:
; X64-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [42,42,42,42]
; X64-NEXT:    vpinsrd $0, (%rdi), %xmm0, %xmm0
; X64-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
; X64-NEXT:    vpand %xmm1, %xmm0, %xmm0
; X64-NEXT:    vmovdqa %xmm0, (%rsi)
; X64-NEXT:    retq
  %i0.src = load i32, ptr %origin
  %i0 = and i32 %i0.src, 15
  %i1 = insertelement <4 x i32> poison, i32 %i0, i64 0
  %i2 = insertelement <4 x i32> %i1, i32 42, i64 1
  %i3 = insertelement <4 x i32> %i2, i32 42, i64 2
  %i4 = insertelement <4 x i32> %i3, i32 42, i64 3
  %i5 = freeze <4 x i32> %i4
  %i6 = and <4 x i32> %i5, <i32 7, i32 7, i32 7, i32 7>
  store <4 x i32> %i6, ptr %dst
  ret void
}

define void @freeze_buildvector_single_repeated_maybe_poison_operand(ptr %origin, ptr %dst) nounwind {
; X86-LABEL: freeze_buildvector_single_repeated_maybe_poison_operand:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    movl (%ecx), %ecx
; X86-NEXT:    andl $15, %ecx
; X86-NEXT:    vbroadcastss {{.*#+}} xmm0 = [42,42,42,42]
; X86-NEXT:    vpinsrd $0, %ecx, %xmm0, %xmm0
; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-NEXT:    vmovdqa %xmm0, (%eax)
; X86-NEXT:    retl
;
; X64-LABEL: freeze_buildvector_single_repeated_maybe_poison_operand:
; X64:       # %bb.0:
; X64-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [42,42,42,42]
; X64-NEXT:    vpinsrd $0, (%rdi), %xmm0, %xmm0
; X64-NEXT:    vpbroadcastq %xmm0, %xmm0
; X64-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
; X64-NEXT:    vpand %xmm1, %xmm0, %xmm0
; X64-NEXT:    vmovdqa %xmm0, (%rsi)
; X64-NEXT:    retq
  %i0.src = load i32, ptr %origin
  %i0 = and i32 %i0.src, 15
  %i1 = insertelement <4 x i32> poison, i32 %i0, i64 0
  %i2 = insertelement <4 x i32> %i1, i32 42, i64 1
  %i3 = insertelement <4 x i32> %i2, i32 %i0, i64 2
  %i4 = insertelement <4 x i32> %i3, i32 42, i64 3
  %i5 = freeze <4 x i32> %i4
  %i6 = and <4 x i32> %i5, <i32 7, i32 7, i32 7, i32 7>
  store <4 x i32> %i6, ptr %dst
  ret void
}

define void @freeze_two_frozen_buildvectors(ptr %origin0, ptr %origin1, ptr %dst0, ptr %dst1) nounwind {
; X86-LABEL: freeze_two_frozen_buildvectors:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
; X86-NEXT:    movl (%edx), %edx
; X86-NEXT:    andl $15, %edx
; X86-NEXT:    vpinsrd $1, %edx, %xmm0, %xmm0
; X86-NEXT:    vbroadcastss {{.*#+}} xmm1 = [7,7,7,7]
; X86-NEXT:    vpand %xmm1, %xmm0, %xmm0
; X86-NEXT:    vmovdqa %xmm0, (%ecx)
; X86-NEXT:    vmovd %edx, %xmm0
; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; X86-NEXT:    vpxor %xmm2, %xmm2, %xmm2
; X86-NEXT:    vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5],xmm2[6,7]
; X86-NEXT:    vpand %xmm1, %xmm0, %xmm0
; X86-NEXT:    vmovdqa %xmm0, (%eax)
; X86-NEXT:    retl
;
; X64-LABEL: freeze_two_frozen_buildvectors:
; X64:       # %bb.0:
; X64-NEXT:    movl (%rdi), %eax
; X64-NEXT:    andl $15, %eax
; X64-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
; X64-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
; X64-NEXT:    vpand %xmm1, %xmm0, %xmm0
; X64-NEXT:    vmovdqa %xmm0, (%rdx)
; X64-NEXT:    vmovd %eax, %xmm0
; X64-NEXT:    vpbroadcastd %xmm0, %xmm0
; X64-NEXT:    vpxor %xmm2, %xmm2, %xmm2
; X64-NEXT:    vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3]
; X64-NEXT:    vpand %xmm1, %xmm0, %xmm0
; X64-NEXT:    vmovdqa %xmm0, (%rcx)
; X64-NEXT:    retq
  %i0.src = load i32, ptr %origin0
  %i0 = and i32 %i0.src, 15
  %i1.src = load i32, ptr %origin1
  %i1 = and i32 %i0.src, 15
  %i2 = insertelement <4 x i32> poison, i32 %i0, i64 1
  %i3 = and <4 x i32> %i2, <i32 7, i32 7, i32 7, i32 7>
  %i4 = freeze <4 x i32> %i3
  store <4 x i32> %i4, ptr %dst0
  %i5 = insertelement <4 x i32> poison, i32 %i1, i64 2
  %i6 = and <4 x i32> %i5, <i32 7, i32 7, i32 7, i32 7>
  %i7 = freeze <4 x i32> %i6
  store <4 x i32> %i7, ptr %dst1
  ret void
}

define void @freeze_two_buildvectors_only_one_frozen(ptr %origin0, ptr %origin1, ptr %dst0, ptr %dst1) nounwind {
; X86-LABEL: freeze_two_buildvectors_only_one_frozen:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
; X86-NEXT:    movl (%edx), %edx
; X86-NEXT:    andl $15, %edx
; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0
; X86-NEXT:    vmovd %edx, %xmm1
; X86-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[0,0,1,1]
; X86-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5,6,7]
; X86-NEXT:    vbroadcastss {{.*#+}} xmm2 = [7,7,7,7]
; X86-NEXT:    vpand %xmm2, %xmm0, %xmm0
; X86-NEXT:    vmovdqa %xmm0, (%ecx)
; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
; X86-NEXT:    vpand %xmm2, %xmm0, %xmm0
; X86-NEXT:    vmovdqa %xmm0, (%eax)
; X86-NEXT:    retl
;
; X64-LABEL: freeze_two_buildvectors_only_one_frozen:
; X64:       # %bb.0:
; X64-NEXT:    movl (%rdi), %eax
; X64-NEXT:    andl $15, %eax
; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0
; X64-NEXT:    vmovd %eax, %xmm1
; X64-NEXT:    vpbroadcastd %xmm1, %xmm1
; X64-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
; X64-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [7,7,7,7]
; X64-NEXT:    vpand %xmm2, %xmm0, %xmm0
; X64-NEXT:    vmovdqa %xmm0, (%rdx)
; X64-NEXT:    vpand %xmm2, %xmm1, %xmm0
; X64-NEXT:    vmovdqa %xmm0, (%rcx)
; X64-NEXT:    retq
  %i0.src = load i32, ptr %origin0
  %i0 = and i32 %i0.src, 15
  %i1.src = load i32, ptr %origin1
  %i1 = and i32 %i0.src, 15
  %i2 = insertelement <4 x i32> poison, i32 %i0, i64 1
  %i3 = and <4 x i32> %i2, <i32 7, i32 7, i32 7, i32 7>
  %i4 = freeze <4 x i32> %i3
  store <4 x i32> %i4, ptr %dst0
  %i5 = insertelement <4 x i32> poison, i32 %i1, i64 2
  %i6 = and <4 x i32> %i5, <i32 7, i32 7, i32 7, i32 7>
  store <4 x i32> %i6, ptr %dst1
  ret void
}

define void @freeze_two_buildvectors_one_undef_elt(ptr %origin0, ptr %origin1, ptr %dst0, ptr %dst1) nounwind {
; X86-LABEL: freeze_two_buildvectors_one_undef_elt:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
; X86-NEXT:    movl (%edx), %edx
; X86-NEXT:    andl $15, %edx
; X86-NEXT:    vmovddup {{.*#+}} xmm0 = [7,0,7,0]
; X86-NEXT:    # xmm0 = mem[0,0]
; X86-NEXT:    vmovd %edx, %xmm1
; X86-NEXT:    vpand %xmm0, %xmm1, %xmm2
; X86-NEXT:    vmovdqa %xmm2, (%ecx)
; X86-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
; X86-NEXT:    vpand %xmm0, %xmm1, %xmm0
; X86-NEXT:    vmovdqa %xmm0, (%eax)
; X86-NEXT:    retl
;
; X64-LABEL: freeze_two_buildvectors_one_undef_elt:
; X64:       # %bb.0:
; X64-NEXT:    movq (%rdi), %rax
; X64-NEXT:    vmovd %eax, %xmm0
; X64-NEXT:    vpbroadcastd %xmm0, %xmm0
; X64-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; X64-NEXT:    vmovdqa %xmm0, (%rdx)
; X64-NEXT:    vmovdqa %xmm0, (%rcx)
; X64-NEXT:    retq
  %i0.src = load i64, ptr %origin0
  %i0 = and i64 %i0.src, 15
  %i1.src = load i64, ptr %origin1
  %i1 = and i64 %i0.src, 15
  %i2 = insertelement <2 x i64> poison, i64 %i0, i64 0
  %i3 = and <2 x i64> %i2, <i64 7, i64 7>
  %i4 = freeze <2 x i64> %i3
  store <2 x i64> %i4, ptr %dst0
  %i5 = insertelement <2 x i64> poison, i64 %i1, i64 1
  %i6 = and <2 x i64> %i5, <i64 7, i64 7>
  store <2 x i64> %i6, ptr %dst1
  ret void
}

define void @freeze_buildvector(ptr %origin0, ptr %origin1, ptr %origin2, ptr %origin3, ptr %dst) nounwind {
; X86-LABEL: freeze_buildvector:
; X86:       # %bb.0:
; X86-NEXT:    pushl %edi
; X86-NEXT:    pushl %esi
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
; X86-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-NEXT:    vpinsrd $1, (%esi), %xmm0, %xmm0
; X86-NEXT:    vpinsrd $2, (%edx), %xmm0, %xmm0
; X86-NEXT:    vpinsrd $3, (%ecx), %xmm0, %xmm0
; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-NEXT:    vmovdqa %xmm0, (%eax)
; X86-NEXT:    popl %esi
; X86-NEXT:    popl %edi
; X86-NEXT:    retl
;
; X64-LABEL: freeze_buildvector:
; X64:       # %bb.0:
; X64-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm0
; X64-NEXT:    vpinsrd $2, (%rdx), %xmm0, %xmm0
; X64-NEXT:    vpinsrd $3, (%rcx), %xmm0, %xmm0
; X64-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
; X64-NEXT:    vpand %xmm1, %xmm0, %xmm0
; X64-NEXT:    vmovdqa %xmm0, (%r8)
; X64-NEXT:    retq
  %i0.src = load i32, ptr %origin0
  %i1.src = load i32, ptr %origin1
  %i2.src = load i32, ptr %origin2
  %i3.src = load i32, ptr %origin3
  %i0 = and i32 %i0.src, 15
  %i1 = and i32 %i1.src, 15
  %i2 = and i32 %i2.src, 15
  %i3 = and i32 %i3.src, 15
  %i4 = insertelement <4 x i32> poison, i32 %i0, i64 0
  %i5 = insertelement <4 x i32> %i4, i32 %i1, i64 1
  %i6 = insertelement <4 x i32> %i5, i32 %i2, i64 2
  %i7 = insertelement <4 x i32> %i6, i32 %i3, i64 3
  %i8 = freeze <4 x i32> %i7
  %i9 = and <4 x i32> %i8, <i32 7, i32 7, i32 7, i32 7>
  store <4 x i32> %i9, ptr %dst
  ret void
}

define void @freeze_buildvector_one_undef_elt(ptr %origin0, ptr %origin1, ptr %origin2, ptr %origin3, ptr %dst) nounwind {
; X86-LABEL: freeze_buildvector_one_undef_elt:
; X86:       # %bb.0:
; X86-NEXT:    pushl %esi
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
; X86-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-NEXT:    vpinsrd $1, (%edx), %xmm0, %xmm0
; X86-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
; X86-NEXT:    vpinsrd $3, (%ecx), %xmm0, %xmm0
; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-NEXT:    vmovdqa %xmm0, (%eax)
; X86-NEXT:    popl %esi
; X86-NEXT:    retl
;
; X64-LABEL: freeze_buildvector_one_undef_elt:
; X64:       # %bb.0:
; X64-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm0
; X64-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
; X64-NEXT:    vpinsrd $3, (%rcx), %xmm0, %xmm0
; X64-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
; X64-NEXT:    vpand %xmm1, %xmm0, %xmm0
; X64-NEXT:    vmovdqa %xmm0, (%r8)
; X64-NEXT:    retq
  %i0.src = load i32, ptr %origin0
  %i1.src = load i32, ptr %origin1
  %i3.src = load i32, ptr %origin3
  %i0 = and i32 %i0.src, 15
  %i1 = and i32 %i1.src, 15
  %i3 = and i32 %i3.src, 15
  %i4 = insertelement <4 x i32> poison, i32 %i0, i64 0
  %i5 = insertelement <4 x i32> %i4, i32 %i1, i64 1
  %i7 = insertelement <4 x i32> %i5, i32 %i3, i64 3
  %i8 = freeze <4 x i32> %i7
  %i9 = and <4 x i32> %i8, <i32 7, i32 7, i32 7, i32 7>
  store <4 x i32> %i9, ptr %dst
  ret void
}

define void @freeze_buildvector_extrause(ptr %origin0, ptr %origin1, ptr %origin2, ptr %origin3, ptr %dst, ptr %escape) nounwind {
; X86-LABEL: freeze_buildvector_extrause:
; X86:       # %bb.0:
; X86-NEXT:    pushl %ebx
; X86-NEXT:    pushl %edi
; X86-NEXT:    pushl %esi
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-NEXT:    vpinsrd $1, (%edi), %xmm0, %xmm0
; X86-NEXT:    vpinsrd $2, (%esi), %xmm0, %xmm0
; X86-NEXT:    vpinsrd $3, (%edx), %xmm0, %xmm0
; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-NEXT:    vmovdqa %xmm0, (%ecx)
; X86-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; X86-NEXT:    vmovdqa %xmm0, (%eax)
; X86-NEXT:    popl %esi
; X86-NEXT:    popl %edi
; X86-NEXT:    popl %ebx
; X86-NEXT:    retl
;
; X64-LABEL: freeze_buildvector_extrause:
; X64:       # %bb.0:
; X64-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm0
; X64-NEXT:    vpinsrd $2, (%rdx), %xmm0, %xmm0
; X64-NEXT:    vpinsrd $3, (%rcx), %xmm0, %xmm0
; X64-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
; X64-NEXT:    vpand %xmm1, %xmm0, %xmm0
; X64-NEXT:    vmovdqa %xmm0, (%r9)
; X64-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
; X64-NEXT:    vpand %xmm1, %xmm0, %xmm0
; X64-NEXT:    vmovdqa %xmm0, (%r8)
; X64-NEXT:    retq
  %i0.src = load i32, ptr %origin0
  %i1.src = load i32, ptr %origin1
  %i2.src = load i32, ptr %origin2
  %i3.src = load i32, ptr %origin3
  %i0 = and i32 %i0.src, 15
  %i1 = and i32 %i1.src, 15
  %i2 = and i32 %i2.src, 15
  %i3 = and i32 %i3.src, 15
  %i4 = insertelement <4 x i32> poison, i32 %i0, i64 0
  %i5 = insertelement <4 x i32> %i4, i32 %i1, i64 1
  %i6 = insertelement <4 x i32> %i5, i32 %i2, i64 2
  %i7 = insertelement <4 x i32> %i6, i32 %i3, i64 3
  store <4 x i32> %i7, ptr %escape
  %i8 = freeze <4 x i32> %i7
  %i9 = and <4 x i32> %i8, <i32 7, i32 7, i32 7, i32 7>
  store <4 x i32> %i9, ptr %dst
  ret void
}

define void @pr59677(i32 %x, ptr %out) nounwind {
; X86-LABEL: pr59677:
; X86:       # %bb.0:
; X86-NEXT:    pushl %esi
; X86-NEXT:    pushl %eax
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
; X86-NEXT:    vmovd %eax, %xmm0
; X86-NEXT:    orl $1, %eax
; X86-NEXT:    vmovd %eax, %xmm1
; X86-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; X86-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
; X86-NEXT:    vcvtdq2ps %xmm0, %xmm0
; X86-NEXT:    vmovss %xmm0, (%esp)
; X86-NEXT:    calll sinf
; X86-NEXT:    fstps (%esi)
; X86-NEXT:    addl $4, %esp
; X86-NEXT:    popl %esi
; X86-NEXT:    retl
;
; X64-LABEL: pr59677:
; X64:       # %bb.0:
; X64-NEXT:    pushq %rbx
; X64-NEXT:    movq %rsi, %rbx
; X64-NEXT:    vmovd %edi, %xmm0
; X64-NEXT:    orl $1, %edi
; X64-NEXT:    vmovd %edi, %xmm1
; X64-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X64-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
; X64-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
; X64-NEXT:    callq sinf@PLT
; X64-NEXT:    vmovss %xmm0, (%rbx)
; X64-NEXT:    popq %rbx
; X64-NEXT:    retq
  %i0 = or i32 %x, 1
  %i1 = insertelement <4 x i32> zeroinitializer, i32 %x, i64 0
  %i2 = insertelement <4 x i32> %i1, i32 %i0, i64 1
  %i3 = shl <4 x i32> %i2, <i32 1, i32 1, i32 1, i32 1>
  %i4 = sitofp <4 x i32> %i3 to <4 x float>
  %i5 = tail call <4 x float> @llvm.sin.v4f32(<4 x float> %i4)
  %i6 = extractelement <4 x float> %i5, i64 0
  store float %i6, ptr %out, align 4
  ret void
}
declare <4 x float> @llvm.sin.v4f32(<4 x float>)

; Test that we can eliminate freeze by changing the BUILD_VECTOR to a splat
; zero vector.
define void @freeze_buildvector_not_simple_type(ptr %dst) nounwind {
; X86-LABEL: freeze_buildvector_not_simple_type:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movb $0, 4(%eax)
; X86-NEXT:    movl $0, (%eax)
; X86-NEXT:    retl
;
; X64-LABEL: freeze_buildvector_not_simple_type:
; X64:       # %bb.0:
; X64-NEXT:    movb $0, 4(%rdi)
; X64-NEXT:    movl $0, (%rdi)
; X64-NEXT:    retq
  %i0 = freeze <5 x i8> <i8 poison, i8 0, i8 0, i8 undef, i8 0>
  store <5 x i8> %i0, ptr %dst
  ret void
}