# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass=prologepilog -o - %s | FileCheck -check-prefix=GCN %s
--- |
declare amdgpu_cs_chain void @callee()
declare amdgpu_gfx void @gfx_callee()
define amdgpu_cs_chain_preserve void @preserve_active_lanes_above_args() {ret void}
define amdgpu_cs_chain_preserve void @preserve_all_lanes_wwm_above_args() {ret void}
define amdgpu_cs_chain_preserve void @dont_preserve_args() {ret void}
define amdgpu_cs_chain_preserve void @preserve_inactive_lanes_wwm_args() {ret void}
define amdgpu_cs_chain_preserve void @dont_preserve_if_no_chain_calls() {ret void}
define amdgpu_cs_chain_preserve void @dont_preserve_v0_v7() {ret void}
define amdgpu_cs_chain_preserve void @dont_preserve_sgpr() {ret void}
...
---
# NOTE: Since we don't know what the args are, we rely on the fact that we can't
# call llvm.amdgcn.cs.chain with more parameters than we received - so anything
# that is used by the SI_CS_CHAIN_TC_W32 is assumed to have been an arg and therefore
# not preserved.
---
name: preserve_active_lanes_above_args
tracksRegLiveness: true
frameInfo:
hasTailCall: true
machineFunctionInfo:
stackPtrOffsetReg: '$sgpr32'
isChainFunction: true
returnsVoid: true
body: |
bb.0:
liveins: $sgpr0, $vgpr8, $vgpr9
; GCN-LABEL: name: preserve_active_lanes_above_args
; GCN: liveins: $sgpr0, $vgpr8, $vgpr9, $vgpr10
; GCN-NEXT: {{ $}}
; GCN-NEXT: SCRATCH_STORE_DWORD_ST killed $vgpr10, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
; GCN-NEXT: renamable $vgpr10 = V_MOV_B32_e32 10, implicit $exec
; GCN-NEXT: $vgpr8 = COPY killed renamable $vgpr10
; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
; GCN-NEXT: $vgpr10 = SCRATCH_LOAD_DWORD_ST 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
renamable $vgpr10 = V_MOV_B32_e32 10, implicit $exec
$vgpr8 = COPY renamable killed $vgpr10
renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
...
---
name: preserve_all_lanes_wwm_above_args
tracksRegLiveness: true
frameInfo:
hasTailCall: true
machineFunctionInfo:
stackPtrOffsetReg: '$sgpr32'
isChainFunction: true
returnsVoid: true
wwmReservedRegs:
- '$vgpr10'
body: |
bb.0:
liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10
; GCN-LABEL: name: preserve_all_lanes_wwm_above_args
; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
; GCN-NEXT: $sgpr35 = S_MOV_B32 5
; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 10, implicit $exec
; GCN-NEXT: $vgpr8 = COPY killed $vgpr0
; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
$vgpr10 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr10
$sgpr35 = S_MOV_B32 5
$sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr10, 0
$vgpr10 = V_MOV_B32_e32 10, implicit $exec
$vgpr8 = COPY killed $vgpr10
renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
...
---
name: dont_preserve_args
tracksRegLiveness: true
frameInfo:
hasTailCall: true
machineFunctionInfo:
stackPtrOffsetReg: '$sgpr32'
isChainFunction: true
returnsVoid: true
body: |
bb.0:
liveins: $sgpr0, $vgpr8, $vgpr9
; GCN-LABEL: name: dont_preserve_args
; GCN: liveins: $sgpr0, $vgpr8, $vgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
...
---
name: preserve_inactive_lanes_wwm_args
tracksRegLiveness: true
frameInfo:
hasTailCall: true
machineFunctionInfo:
stackPtrOffsetReg: '$sgpr32'
isChainFunction: true
returnsVoid: true
wwmReservedRegs:
- '$vgpr9'
body: |
bb.0:
liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10
; GCN-LABEL: name: preserve_inactive_lanes_wwm_args
; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr10
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
; GCN-NEXT: $sgpr35 = S_MOV_B32 5
; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr0
renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
$sgpr35 = S_MOV_B32 5
$sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
...
---
name: dont_preserve_if_no_chain_calls
tracksRegLiveness: true
frameInfo:
hasTailCall: false
machineFunctionInfo:
stackPtrOffsetReg: '$sgpr32'
isChainFunction: true
returnsVoid: true
wwmReservedRegs:
- '$vgpr9'
body: |
bb.0:
liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8, $vgpr9
; GCN-LABEL: name: dont_preserve_if_no_chain_calls
; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8, $vgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
; GCN-NEXT: $sgpr35 = S_MOV_B32 5
; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
; GCN-NEXT: $vgpr9 = V_MOV_B32_e32 20, implicit $exec
; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 30, implicit $exec
; GCN-NEXT: S_ENDPGM 0
renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
$sgpr35 = S_MOV_B32 5
$sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
$vgpr9 = V_MOV_B32_e32 20, implicit $exec
$vgpr10 = V_MOV_B32_e32 30, implicit $exec
S_ENDPGM 0
...
---
name: dont_preserve_v0_v7
tracksRegLiveness: true
frameInfo:
hasTailCall: true
machineFunctionInfo:
stackPtrOffsetReg: '$sgpr32'
isChainFunction: true
returnsVoid: true
wwmReservedRegs:
- '$vgpr1'
body: |
bb.0:
liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8
; GCN-LABEL: name: dont_preserve_v0_v7
; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
; GCN-NEXT: $sgpr35 = S_MOV_B32 5
; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 10, implicit $exec
; GCN-NEXT: renamable $vgpr7 = V_MOV_B32_e32 16, implicit $exec
; GCN-NEXT: renamable $vgpr8 = COPY killed renamable $vgpr0
; GCN-NEXT: renamable $vgpr9 = COPY killed renamable $vgpr7
; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
$sgpr35 = S_MOV_B32 5
$sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
renamable $vgpr0 = V_MOV_B32_e32 10, implicit $exec
renamable $vgpr7 = V_MOV_B32_e32 16, implicit $exec
renamable $vgpr8 = COPY killed renamable $vgpr0
renamable $vgpr9 = COPY killed renamable $vgpr7
renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
...
---
name: dont_preserve_sgpr
tracksRegLiveness: true
frameInfo:
hasTailCall: true
machineFunctionInfo:
stackPtrOffsetReg: '$sgpr32'
isChainFunction: true
returnsVoid: true
body: |
bb.0:
liveins: $sgpr0
; GCN-LABEL: name: dont_preserve_sgpr
; GCN: liveins: $sgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: renamable $sgpr1 = S_ADD_I32 killed renamable $sgpr0, renamable $sgpr0, implicit-def dead $scc
; GCN-NEXT: $sgpr0 = COPY killed renamable $sgpr1
; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0
renamable $sgpr1 = S_ADD_I32 killed renamable $sgpr0, renamable $sgpr0, implicit-def dead $scc
$sgpr0 = COPY killed renamable $sgpr1
renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0
...