; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 -O3 -verify-machineinstrs | FileCheck %s
;
; Test stack clash protection probing for static allocas.
; Small: one probe.
define i32 @fun0() #0 {
; CHECK-LABEL: fun0:
; CHECK: # %bb.0:
; CHECK-NEXT: aghi %r15, -560
; CHECK-NEXT: .cfi_def_cfa_offset 720
; CHECK-NEXT: cg %r0, 552(%r15)
; CHECK-NEXT: mvhi 552(%r15), 1
; CHECK-NEXT: l %r2, 160(%r15)
; CHECK-NEXT: aghi %r15, 560
; CHECK-NEXT: br %r14
%a = alloca i32, i64 100
%b = getelementptr inbounds i32, ptr %a, i64 98
store volatile i32 1, ptr %b
%c = load volatile i32, ptr %a
ret i32 %c
}
; Medium: two probes.
define i32 @fun1() #0 {
; CHECK-LABEL: fun1:
; CHECK: # %bb.0:
; CHECK-NEXT: aghi %r15, -4096
; CHECK-NEXT: .cfi_def_cfa_offset 4256
; CHECK-NEXT: cg %r0, 4088(%r15)
; CHECK-NEXT: aghi %r15, -4080
; CHECK-NEXT: .cfi_def_cfa_offset 8336
; CHECK-NEXT: cg %r0, 4072(%r15)
; CHECK-NEXT: mvhi 976(%r15), 1
; CHECK-NEXT: l %r2, 176(%r15)
; CHECK-NEXT: aghi %r15, 8176
; CHECK-NEXT: br %r14
%a = alloca i32, i64 2000
%b = getelementptr inbounds i32, ptr %a, i64 200
store volatile i32 1, ptr %b
%c = load volatile i32, ptr %a
ret i32 %c
}
; Large: Use a loop to allocate and probe in steps.
define i32 @fun2() #0 {
; CHECK-LABEL: fun2:
; CHECK: # %bb.0:
; CHECK-NEXT: lgr %r0, %r15
; CHECK-NEXT: .cfi_def_cfa_register %r0
; CHECK-NEXT: agfi %r0, -69632
; CHECK-NEXT: .cfi_def_cfa_offset 69792
; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: aghi %r15, -4096
; CHECK-NEXT: cg %r0, 4088(%r15)
; CHECK-NEXT: clgrjh %r15, %r0, .LBB2_1
; CHECK-NEXT: # %bb.2:
; CHECK-NEXT: .cfi_def_cfa_register %r15
; CHECK-NEXT: aghi %r15, -2544
; CHECK-NEXT: .cfi_def_cfa_offset 72336
; CHECK-NEXT: cg %r0, 2536(%r15)
; CHECK-NEXT: lhi %r0, 1
; CHECK-NEXT: mvhi 568(%r15), 1
; CHECK-NEXT: sty %r0, 28968(%r15)
; CHECK-NEXT: l %r2, 176(%r15)
; CHECK-NEXT: agfi %r15, 72176
; CHECK-NEXT: br %r14
%a = alloca i32, i64 18000
%b0 = getelementptr inbounds i32, ptr %a, i64 98
%b1 = getelementptr inbounds i32, ptr %a, i64 7198
store volatile i32 1, ptr %b0
store volatile i32 1, ptr %b1
%c = load volatile i32, ptr %a
ret i32 %c
}
; Ends evenly on the step so no remainder needed.
define void @fun3() #0 {
; CHECK-LABEL: fun3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lgr %r0, %r15
; CHECK-NEXT: .cfi_def_cfa_register %r0
; CHECK-NEXT: aghi %r0, -28672
; CHECK-NEXT: .cfi_def_cfa_offset 28832
; CHECK-NEXT: .LBB3_1: # %entry
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: aghi %r15, -4096
; CHECK-NEXT: cg %r0, 4088(%r15)
; CHECK-NEXT: clgrjh %r15, %r0, .LBB3_1
; CHECK-NEXT: # %bb.2: # %entry
; CHECK-NEXT: .cfi_def_cfa_register %r15
; CHECK-NEXT: mvhi 180(%r15), 0
; CHECK-NEXT: l %r0, 180(%r15)
; CHECK-NEXT: aghi %r15, 28672
; CHECK-NEXT: br %r14
entry:
%stack = alloca [7122 x i32], align 4
%i = alloca i32, align 4
store volatile i32 0, ptr %i, align 4
%i.0.i.0.6 = load volatile i32, ptr %i, align 4
ret void
}
; Loop with bigger step.
define void @fun4() #0 "stack-probe-size"="8192" {
; CHECK-LABEL: fun4:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lgr %r0, %r15
; CHECK-NEXT: .cfi_def_cfa_register %r0
; CHECK-NEXT: aghi %r0, -24576
; CHECK-NEXT: .cfi_def_cfa_offset 24736
; CHECK-NEXT: .LBB4_1: # %entry
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: aghi %r15, -8192
; CHECK-NEXT: cg %r0, 8184(%r15)
; CHECK-NEXT: clgrjh %r15, %r0, .LBB4_1
; CHECK-NEXT: # %bb.2: # %entry
; CHECK-NEXT: .cfi_def_cfa_register %r15
; CHECK-NEXT: aghi %r15, -7608
; CHECK-NEXT: .cfi_def_cfa_offset 32344
; CHECK-NEXT: cg %r0, 7600(%r15)
; CHECK-NEXT: mvhi 180(%r15), 0
; CHECK-NEXT: l %r0, 180(%r15)
; CHECK-NEXT: aghi %r15, 32184
; CHECK-NEXT: br %r14
entry:
%stack = alloca [8000 x i32], align 4
%i = alloca i32, align 4
store volatile i32 0, ptr %i, align 4
%i.0.i.0.6 = load volatile i32, ptr %i, align 4
ret void
}
; Probe size should be modulo stack alignment.
define void @fun5() #0 "stack-probe-size"="4100" {
; CHECK-LABEL: fun5:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: aghi %r15, -4096
; CHECK-NEXT: .cfi_def_cfa_offset 4256
; CHECK-NEXT: cg %r0, 4088(%r15)
; CHECK-NEXT: aghi %r15, -88
; CHECK-NEXT: .cfi_def_cfa_offset 4344
; CHECK-NEXT: cg %r0, 80(%r15)
; CHECK-NEXT: mvhi 180(%r15), 0
; CHECK-NEXT: l %r0, 180(%r15)
; CHECK-NEXT: aghi %r15, 4184
; CHECK-NEXT: br %r14
entry:
%stack = alloca [1000 x i32], align 4
%i = alloca i32, align 4
store volatile i32 0, ptr %i, align 4
%i.0.i.0.6 = load volatile i32, ptr %i, align 4
ret void
}
; The minimum probe size is the stack alignment.
define void @fun6() #0 "stack-probe-size"="5" {
; CHECK-LABEL: fun6:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lgr %r0, %r15
; CHECK-NEXT: .cfi_def_cfa_register %r0
; CHECK-NEXT: aghi %r0, -4184
; CHECK-NEXT: .cfi_def_cfa_offset 4344
; CHECK-NEXT: .LBB6_1: # %entry
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: aghi %r15, -8
; CHECK-NEXT: cg %r0, 0(%r15)
; CHECK-NEXT: clgrjh %r15, %r0, .LBB6_1
; CHECK-NEXT: # %bb.2: # %entry
; CHECK-NEXT: .cfi_def_cfa_register %r15
; CHECK-NEXT: mvhi 180(%r15), 0
; CHECK-NEXT: l %r0, 180(%r15)
; CHECK-NEXT: aghi %r15, 4184
; CHECK-NEXT: br %r14
entry:
%stack = alloca [1000 x i32], align 4
%i = alloca i32, align 4
store volatile i32 0, ptr %i, align 4
%i.0.i.0.6 = load volatile i32, ptr %i, align 4
ret void
}
; Small with a natural probe (STMG) - needs no extra probe.
define i32 @fun7() #0 {
; CHECK-LABEL: fun7:
; CHECK: # %bb.0:
; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
; CHECK-NEXT: .cfi_offset %r14, -48
; CHECK-NEXT: .cfi_offset %r15, -40
; CHECK-NEXT: aghi %r15, -3976
; CHECK-NEXT: .cfi_def_cfa_offset 4136
; CHECK-NEXT: brasl %r14, foo@PLT
; CHECK-NEXT: st %r2, 568(%r15)
; CHECK-NEXT: l %r2, 176(%r15)
; CHECK-NEXT: lmg %r14, %r15, 4088(%r15)
; CHECK-NEXT: br %r14
%v = call i32 @foo()
%a = alloca i32, i64 950
%b = getelementptr inbounds i32, ptr %a, i64 98
store volatile i32 %v, ptr %b
%c = load volatile i32, ptr %a
ret i32 %c
}
; Medium with an STMG - still needs probing.
define i32 @fun8() #0 {
; CHECK-LABEL: fun8:
; CHECK: # %bb.0:
; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
; CHECK-NEXT: .cfi_offset %r14, -48
; CHECK-NEXT: .cfi_offset %r15, -40
; CHECK-NEXT: aghi %r15, -3984
; CHECK-NEXT: .cfi_def_cfa_offset 4144
; CHECK-NEXT: cg %r0, 3976(%r15)
; CHECK-NEXT: brasl %r14, foo@PLT
; CHECK-NEXT: st %r2, 976(%r15)
; CHECK-NEXT: l %r2, 176(%r15)
; CHECK-NEXT: lmg %r14, %r15, 4096(%r15)
; CHECK-NEXT: br %r14
%v = call i32 @foo()
%a = alloca i32, i64 952
%b = getelementptr inbounds i32, ptr %a, i64 200
store volatile i32 %v, ptr %b
%c = load volatile i32, ptr %a
ret i32 %c
}
define void @fun9() #0 "backchain" {
; CHECK-LABEL: fun9:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lgr %r1, %r15
; CHECK-NEXT: lgr %r0, %r15
; CHECK-NEXT: .cfi_def_cfa_register %r0
; CHECK-NEXT: aghi %r0, -28672
; CHECK-NEXT: .cfi_def_cfa_offset 28832
; CHECK-NEXT: .LBB9_1: # %entry
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: aghi %r15, -4096
; CHECK-NEXT: cg %r0, 4088(%r15)
; CHECK-NEXT: clgrjh %r15, %r0, .LBB9_1
; CHECK-NEXT: # %bb.2: # %entry
; CHECK-NEXT: .cfi_def_cfa_register %r15
; CHECK-NEXT: stg %r1, 0(%r15)
; CHECK-NEXT: mvhi 180(%r15), 0
; CHECK-NEXT: l %r0, 180(%r15)
; CHECK-NEXT: aghi %r15, 28672
; CHECK-NEXT: br %r14
entry:
%stack = alloca [7122 x i32], align 4
%i = alloca i32, align 4
store volatile i32 0, ptr %i, align 4
%i.0.i.0.6 = load volatile i32, ptr %i, align 4
ret void
}
declare i32 @foo()
attributes #0 = { "probe-stack"="inline-asm" }