; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mcpu=pentium4 -O0 | FileCheck %s
target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128"
target triple = "i386-unknown-linux-unknown"
define <4 x half> @doTheTestMod(<4 x half> %0, <4 x half> %1) nounwind {
; CHECK-LABEL: doTheTestMod:
; CHECK: # %bb.0: # %Entry
; CHECK-NEXT: subl $140, %esp
; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movaps %xmm0, %xmm6
; CHECK-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: movaps %xmm0, %xmm1
; CHECK-NEXT: movaps %xmm0, %xmm3
; CHECK-NEXT: psrlq $48, %xmm3
; CHECK-NEXT: movaps %xmm0, %xmm2
; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,1,1]
; CHECK-NEXT: psrld $16, %xmm0
; CHECK-NEXT: movaps %xmm6, %xmm7
; CHECK-NEXT: movaps %xmm6, %xmm4
; CHECK-NEXT: psrlq $48, %xmm4
; CHECK-NEXT: movaps %xmm6, %xmm5
; CHECK-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1,1,1]
; CHECK-NEXT: psrld $16, %xmm6
; CHECK-NEXT: pextrw $0, %xmm7, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp)
; CHECK-NEXT: pextrw $0, %xmm6, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp)
; CHECK-NEXT: pextrw $0, %xmm5, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp)
; CHECK-NEXT: pextrw $0, %xmm4, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp)
; CHECK-NEXT: pextrw $0, %xmm3, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp)
; CHECK-NEXT: pextrw $0, %xmm2, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp)
; CHECK-NEXT: pextrw $0, %xmm0, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp)
; CHECK-NEXT: pextrw $0, %xmm1, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp)
; CHECK-NEXT: # implicit-def: $xmm0
; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: # implicit-def: $xmm0
; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: # implicit-def: $xmm0
; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: # implicit-def: $xmm0
; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
; CHECK-NEXT: # implicit-def: $xmm1
; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm1
; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: # implicit-def: $xmm1
; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm1
; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: # implicit-def: $xmm1
; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm1
; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: # implicit-def: $xmm1
; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm1
; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: pextrw $0, %xmm0, %eax
; CHECK-NEXT: movw %ax, %cx
; CHECK-NEXT: movl %esp, %eax
; CHECK-NEXT: movw %cx, (%eax)
; CHECK-NEXT: calll __extendhfsf2
; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; CHECK-NEXT: pextrw $0, %xmm0, %eax
; CHECK-NEXT: movw %ax, %cx
; CHECK-NEXT: movl %esp, %eax
; CHECK-NEXT: movw %cx, (%eax)
; CHECK-NEXT: calll __extendhfsf2
; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; CHECK-NEXT: movl %esp, %eax
; CHECK-NEXT: fxch %st(1)
; CHECK-NEXT: fstps 4(%eax)
; CHECK-NEXT: fstps (%eax)
; CHECK-NEXT: calll fmodf
; CHECK-NEXT: movl %esp, %eax
; CHECK-NEXT: fstps (%eax)
; CHECK-NEXT: calll __truncsfhf2
; CHECK-NEXT: movaps %xmm0, %xmm1
; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: pextrw $0, %xmm0, %eax
; CHECK-NEXT: movw %ax, %cx
; CHECK-NEXT: movl %esp, %eax
; CHECK-NEXT: movw %cx, (%eax)
; CHECK-NEXT: calll __extendhfsf2
; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; CHECK-NEXT: pextrw $0, %xmm0, %eax
; CHECK-NEXT: movw %ax, %cx
; CHECK-NEXT: movl %esp, %eax
; CHECK-NEXT: movw %cx, (%eax)
; CHECK-NEXT: calll __extendhfsf2
; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; CHECK-NEXT: movl %esp, %eax
; CHECK-NEXT: fxch %st(1)
; CHECK-NEXT: fstps 4(%eax)
; CHECK-NEXT: fstps (%eax)
; CHECK-NEXT: calll fmodf
; CHECK-NEXT: movl %esp, %eax
; CHECK-NEXT: fstps (%eax)
; CHECK-NEXT: calll __truncsfhf2
; CHECK-NEXT: movaps %xmm0, %xmm1
; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: pextrw $0, %xmm0, %eax
; CHECK-NEXT: movw %ax, %cx
; CHECK-NEXT: movl %esp, %eax
; CHECK-NEXT: movw %cx, (%eax)
; CHECK-NEXT: calll __extendhfsf2
; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; CHECK-NEXT: pextrw $0, %xmm0, %eax
; CHECK-NEXT: movw %ax, %cx
; CHECK-NEXT: movl %esp, %eax
; CHECK-NEXT: movw %cx, (%eax)
; CHECK-NEXT: calll __extendhfsf2
; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; CHECK-NEXT: movl %esp, %eax
; CHECK-NEXT: fxch %st(1)
; CHECK-NEXT: fstps 4(%eax)
; CHECK-NEXT: fstps (%eax)
; CHECK-NEXT: calll fmodf
; CHECK-NEXT: movl %esp, %eax
; CHECK-NEXT: fstps (%eax)
; CHECK-NEXT: calll __truncsfhf2
; CHECK-NEXT: movaps %xmm0, %xmm1
; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: pextrw $0, %xmm0, %eax
; CHECK-NEXT: movw %ax, %cx
; CHECK-NEXT: movl %esp, %eax
; CHECK-NEXT: movw %cx, (%eax)
; CHECK-NEXT: calll __extendhfsf2
; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; CHECK-NEXT: pextrw $0, %xmm0, %eax
; CHECK-NEXT: movw %ax, %cx
; CHECK-NEXT: movl %esp, %eax
; CHECK-NEXT: movw %cx, (%eax)
; CHECK-NEXT: calll __extendhfsf2
; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; CHECK-NEXT: movl %esp, %eax
; CHECK-NEXT: fxch %st(1)
; CHECK-NEXT: fstps 4(%eax)
; CHECK-NEXT: fstps (%eax)
; CHECK-NEXT: calll fmodf
; CHECK-NEXT: movl %esp, %eax
; CHECK-NEXT: fstps (%eax)
; CHECK-NEXT: calll __truncsfhf2
; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 4-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 4-byte Reload
; CHECK-NEXT: # xmm2 = mem[0],zero,zero,zero
; CHECK-NEXT: movaps %xmm0, %xmm3
; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-NEXT: addl $140, %esp
; CHECK-NEXT: retl
Entry:
%x = alloca <4 x half>, align 8
%y = alloca <4 x half>, align 8
store <4 x half> %0, ptr %x, align 8
store <4 x half> %1, ptr %y, align 8
%2 = load <4 x half>, ptr %x, align 8
%3 = load <4 x half>, ptr %y, align 8
%4 = frem <4 x half> %2, %3
ret <4 x half> %4
}