# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -run-pass=prologepilog,machine-cp -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
# The COPY that moves the return value to VGPR0 should not be removed during machine-cp. The spill restore of the same register that follows,
# meant to only reload its inactive lanes. By marking the reg itself as the tied-op in the spill reload prevents the undesired optimization.
---
name: wwm_scratch_reg_spill_reload_of_outgoing_reg
tracksRegLiveness: true
machineFunctionInfo:
wwmReservedRegs: ['$vgpr0']
isEntryFunction: false
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
frameOffsetReg: '$sgpr33'
body: |
bb.0:
liveins: $sgpr20, $vgpr1
; GCN-LABEL: name: wwm_scratch_reg_spill_reload_of_outgoing_reg
; GCN: liveins: $sgpr20, $vgpr0, $vgpr1
; GCN-NEXT: {{ $}}
; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GCN-NEXT: $vgpr0 = IMPLICIT_DEF
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0
; GCN-NEXT: $vgpr0 = COPY killed renamable $vgpr1, implicit $exec
; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $vgpr0(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GCN-NEXT: SI_RETURN implicit $vgpr0
$vgpr0 = IMPLICIT_DEF
$vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0
$vgpr0 = COPY killed renamable $vgpr1, implicit $exec
SI_RETURN implicit $vgpr0
...
# The reload of vgpr0 require the tied-op as it is a subreg in the outgoing tuple register vgpr0_vgpr1.
# The vgpr2 doesn't need the tied-op in the reload as it isn't holding any return value.
---
name: wwm_scratch_reg_spill_reload_of_outgoing_tuple_subreg
tracksRegLiveness: true
machineFunctionInfo:
wwmReservedRegs: ['$vgpr0', '$vgpr2']
isEntryFunction: false
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
frameOffsetReg: '$sgpr33'
body: |
bb.0:
liveins: $sgpr20, $sgpr21, $vgpr1
; GCN-LABEL: name: wwm_scratch_reg_spill_reload_of_outgoing_tuple_subreg
; GCN: liveins: $sgpr20, $sgpr21, $vgpr0, $vgpr1, $vgpr2
; GCN-NEXT: {{ $}}
; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GCN-NEXT: $vgpr0 = IMPLICIT_DEF
; GCN-NEXT: $vgpr2 = IMPLICIT_DEF
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0
; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr21, 0, $vgpr2
; GCN-NEXT: $vgpr0 = COPY $vgpr1, implicit $exec
; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $vgpr0(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1
$vgpr0 = IMPLICIT_DEF
$vgpr2 = IMPLICIT_DEF
$vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0
$vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr21, 0, $vgpr2
$vgpr0 = COPY $vgpr1, implicit $exec
SI_RETURN implicit $vgpr0_vgpr1
...
# Tied op not required in the spill reload of vgpr2.
---
name: wwm_scratch_reg_spill_reload_different_outgoing_reg
tracksRegLiveness: true
machineFunctionInfo:
wwmReservedRegs: ['$vgpr2']
isEntryFunction: false
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
frameOffsetReg: '$sgpr33'
body: |
bb.0:
liveins: $sgpr20, $vgpr1
; GCN-LABEL: name: wwm_scratch_reg_spill_reload_different_outgoing_reg
; GCN: liveins: $sgpr20, $vgpr1, $vgpr2
; GCN-NEXT: {{ $}}
; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GCN-NEXT: $vgpr2 = IMPLICIT_DEF
; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr2
; GCN-NEXT: $vgpr0 = COPY $vgpr1, implicit $exec
; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1
$vgpr2 = IMPLICIT_DEF
$vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr2
$vgpr0 = COPY $vgpr1, implicit $exec
SI_RETURN implicit $vgpr0_vgpr1
...
# Tied op not required in the spill reload of vgpr40 which is in the CSR range.
---
name: wwm_csr_spill_reload
tracksRegLiveness: true
machineFunctionInfo:
wwmReservedRegs: ['$vgpr40']
isEntryFunction: false
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
frameOffsetReg: '$sgpr33'
body: |
bb.0:
liveins: $sgpr20, $vgpr1
; GCN-LABEL: name: wwm_csr_spill_reload
; GCN: liveins: $sgpr20, $vgpr1, $vgpr40
; GCN-NEXT: {{ $}}
; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GCN-NEXT: $vgpr40 = IMPLICIT_DEF
; GCN-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr40
; GCN-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 0, implicit $exec
; GCN-NEXT: $vgpr0 = COPY killed $vgpr1, implicit $exec
; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; GCN-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GCN-NEXT: SI_RETURN implicit $vgpr0
$vgpr40 = IMPLICIT_DEF
$vgpr40 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr40
$sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 0, implicit $exec
$vgpr0 = COPY killed $vgpr1, implicit $exec
SI_RETURN implicit $vgpr0
...