llvm/llvm/test/CodeGen/X86/win_coreclr_chkstk.ll

; RUN: llc < %s -mtriple=x86_64-pc-win32-coreclr -verify-machineinstrs | FileCheck %s -check-prefix=WIN_X64
; RUN: llc < %s -mtriple=x86_64-pc-linux         | FileCheck %s -check-prefix=LINUX

; By default, windows CoreCLR requires an inline prologue stack expansion check
; if more than 4096 bytes are allocated on the stack.

; Prolog stack allocation >= 4096 bytes will require the probe sequence
define i32 @main4k() nounwind {
entry:
; WIN_X64-LABEL:main4k:
; WIN_X64: # %bb.0:
; WIN_X64:      movl    $4096, %eax
; WIN_X64:	xorq	%rcx, %rcx
; WIN_X64:	movq	%rsp, %rdx
; WIN_X64:	subq	%rax, %rdx
; WIN_X64:	cmovbq	%rcx, %rdx
; WIN_X64:	movq	%gs:16, %rcx
; WIN_X64:	cmpq	%rcx, %rdx
; WIN_X64:	jae	.LBB0_3
; WIN_X64:# %bb.1:
; WIN_X64:	andq	$-4096, %rdx
; WIN_X64:.LBB0_2:
; WIN_X64:	addq	$-4096, %rcx
; WIN_X64:	movb	$0, (%rcx)
; WIN_X64:	cmpq	%rcx, %rdx
; WIN_X64:	jne	.LBB0_2
; WIN_X64:.LBB0_3:
; WIN_X64:	subq	%rax, %rsp
; WIN_X64:	xorl	%eax, %eax
; WIN_X64:	addq	$4096, %rsp
; WIN_X64:	retq
; LINUX-LABEL:main4k:
; LINUX-NOT:    movq    %gs:16, %rcx
; LINUX: 	retq
  %a = alloca [4096 x i8]
  ret i32 0
}

; Prolog stack allocation >= 4096 bytes will require the probe sequence
; Case with frame pointer
define i32 @main4k_frame() nounwind "frame-pointer"="all" {
entry:
; WIN_X64-LABEL:main4k_frame:
; WIN_X64:      movq    %gs:16, %rcx
; LINUX-LABEL:main4k_frame:
; LINUX-NOT:    movq    %gs:16, %rcx
; LINUX: 	retq
  %a = alloca [4096 x i8]
  ret i32 0
}

; Prolog stack allocation >= 4096 bytes will require the probe sequence
; Case with INT args
define i32 @main4k_intargs(i32 %x, i32 %y) nounwind {
entry:
; WIN_X64:      movq    %gs:16, %rcx
; LINUX-NOT:    movq    %gs:16, %rcx
; LINUX: 	retq
  %a = alloca [4096 x i8]
  %t = add i32 %x, %y
  ret i32 %t
}

; Prolog stack allocation >= 4096 bytes will require the probe sequence
; Case with FP regs
define i32 @main4k_fpargs(double %x, double %y) nounwind {
entry:
; WIN_X64:      movq    %gs:16, %rcx
; LINUX-NOT:    movq    %gs:16, %rcx
; LINUX: 	retq
  %a = alloca [4096 x i8]
  ret i32 0
}

; Prolog stack allocation >= 4096 bytes will require the probe sequence
; Case with mixed regs
define i32 @main4k_mixargs(double %x, i32 %y) nounwind {
entry:
; WIN_X64:      movq    %gs:16, %rcx
; LINUX-NOT:    movq    %gs:16, %rcx
; LINUX: 	retq
  %a = alloca [4096 x i8]
  ret i32 %y
}

; Make sure we don't emit the probe for a smaller prolog stack allocation.
define i32 @main128() nounwind {
entry:
; WIN_X64-NOT:  movq    %gs:16, %rcx
; WIN_X64:      retq
; LINUX-NOT:    movq    %gs:16, %rcx
; LINUX: 	retq
  %a = alloca [128 x i8]
  ret i32 0
}

; Make sure we don't emit the probe sequence if not on windows even if the
; caller has the Win64 calling convention.
define win64cc i32 @main4k_win64() nounwind {
entry:
; WIN_X64:      movq    %gs:16, %rcx
; LINUX-NOT:    movq    %gs:16, %rcx
; LINUX: 	retq
  %a = alloca [4096 x i8]
  ret i32 0
}

declare i32 @bar(ptr) nounwind

; Within-body inline probe expansion
define win64cc i32 @main4k_alloca(i64 %n) nounwind {
entry:
; WIN_X64: 	callq	bar
; WIN_X64:  	movq	%gs:16, [[R:%r.*]]
; WIN_X64: 	callq	bar
; LINUX: 	callq	bar
; LINUX-NOT:  	movq	%gs:16, [[R:%r.*]]
; LINUX: 	callq	bar
  %a = alloca i8, i64 1024
  %ra = call i32 @bar(ptr %a) nounwind
  %b = alloca i8, i64 %n
  %rb = call i32 @bar(ptr %b) nounwind
  %r = add i32 %ra, %rb
  ret i32 %r
}

; Influence of stack-probe-size attribute
; Note this is not exposed in coreclr
define i32 @test_probe_size() "stack-probe-size"="8192" nounwind {
; WIN_X64-NOT:  movq    %gs:16, %rcx
; WIN_X64: 	retq
; LINUX-NOT:    movq    %gs:16, %rcx
; LINUX: 	retq
  %a = alloca [4096 x i8]
  ret i32 0
}