llvm/llvm/test/CodeGen/X86/pr63475.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp --version 2
; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s

define void @caller() nounwind {
; CHECK-LABEL: caller:
; CHECK:       # %bb.0:
; CHECK-NEXT:    pushq %rax
; CHECK-NEXT:    subq $8, %rsp
; CHECK-NEXT:    xorl %edi, %edi
; CHECK-NEXT:    xorl %esi, %esi
; CHECK-NEXT:    xorl %edx, %edx
; CHECK-NEXT:    xorl %ecx, %ecx
; CHECK-NEXT:    xorl %r8d, %r8d
; CHECK-NEXT:    xorl %r9d, %r9d
; CHECK-NEXT:    pushq $42
; CHECK-NEXT:    pushq $42
; CHECK-NEXT:    pushq $42
; CHECK-NEXT:    pushq $42
; CHECK-NEXT:    pushq $42
; CHECK-NEXT:    pushq $42
; CHECK-NEXT:    pushq $42
; CHECK-NEXT:    callq callee@PLT
; CHECK-NEXT:    addq $64, %rsp
; CHECK-NEXT:    popq %rax
; CHECK-NEXT:    retq
  call void @callee(ptr null, ptr null, ptr null, ptr null, ptr null, ptr null, <7 x i32> <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>)
  ret void
}

; Make sure the stack offsets are correct. The distance between them should
; be 8, not 4.
define void @callee(ptr %p0, ptr %p1, ptr %p2, ptr %p3, ptr %p4, ptr %p5, <7 x i32> %arg) nounwind {
; CHECK-LABEL: callee:
; CHECK:       # %bb.0: # %start
; CHECK-NEXT:    pushq %rbp
; CHECK-NEXT:    pushq %r15
; CHECK-NEXT:    pushq %r14
; CHECK-NEXT:    pushq %r13
; CHECK-NEXT:    pushq %r12
; CHECK-NEXT:    pushq %rbx
; CHECK-NEXT:    subq $40, %rsp
; CHECK-NEXT:    movl 120(%rsp), %ebx
; CHECK-NEXT:    movd %ebx, %xmm0
; CHECK-NEXT:    movl 112(%rsp), %ebp
; CHECK-NEXT:    movd %ebp, %xmm1
; CHECK-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; CHECK-NEXT:    movl 104(%rsp), %r15d
; CHECK-NEXT:    movd %r15d, %xmm0
; CHECK-NEXT:    movl 96(%rsp), %edi
; CHECK-NEXT:    movd %edi, %xmm2
; CHECK-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
; CHECK-NEXT:    movl 136(%rsp), %r14d
; CHECK-NEXT:    movd %r14d, %xmm0
; CHECK-NEXT:    movl 128(%rsp), %r12d
; CHECK-NEXT:    movd %r12d, %xmm1
; CHECK-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; CHECK-NEXT:    movl 144(%rsp), %r13d
; CHECK-NEXT:    movl %r13d, 36(%rsp)
; CHECK-NEXT:    movq %xmm1, 28(%rsp)
; CHECK-NEXT:    movdqu %xmm2, 12(%rsp)
; CHECK-NEXT:    callq use@PLT
; CHECK-NEXT:    movl %r15d, %edi
; CHECK-NEXT:    callq use@PLT
; CHECK-NEXT:    movl %ebp, %edi
; CHECK-NEXT:    callq use@PLT
; CHECK-NEXT:    movl %ebx, %edi
; CHECK-NEXT:    callq use@PLT
; CHECK-NEXT:    movl %r12d, %edi
; CHECK-NEXT:    callq use@PLT
; CHECK-NEXT:    movl %r14d, %edi
; CHECK-NEXT:    callq use@PLT
; CHECK-NEXT:    movl %r13d, %edi
; CHECK-NEXT:    callq use@PLT
; CHECK-NEXT:    addq $40, %rsp
; CHECK-NEXT:    popq %rbx
; CHECK-NEXT:    popq %r12
; CHECK-NEXT:    popq %r13
; CHECK-NEXT:    popq %r14
; CHECK-NEXT:    popq %r15
; CHECK-NEXT:    popq %rbp
; CHECK-NEXT:    retq
start:
  %alloca = alloca [7 x i32], align 4
  store <7 x i32> %arg, ptr %alloca, align 4
  %extract0 = extractelement <7 x i32> %arg, i64 0
  call void @use(i32 %extract0)
  %extract1 = extractelement <7 x i32> %arg, i64 1
  call void @use(i32 %extract1)
  %extract2 = extractelement <7 x i32> %arg, i64 2
  call void @use(i32 %extract2)
  %extract3 = extractelement <7 x i32> %arg, i64 3
  call void @use(i32 %extract3)
  %extract4 = extractelement <7 x i32> %arg, i64 4
  call void @use(i32 %extract4)
  %extract5 = extractelement <7 x i32> %arg, i64 5
  call void @use(i32 %extract5)
  %extract6 = extractelement <7 x i32> %arg, i64 6
  call void @use(i32 %extract6)
  ret void
}

declare void @use(i32)