# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=greedy -verify-machineinstrs -o - %s |FileCheck %s
# Testcase is limited to 24 VGPRs. Only a maximum of 6 vreg_128s can
# be allocated at the same time.
# This testcase is intended to stress the heuristic in
# RAGreedy::enqueue to switch from local to global. If an interval is
# in one basic block, the usual preference is to allocate registers in
# instruction order. If the estimated live range length is more than
# twice the number of registers in the class, the global heuristic is
# used which increases the priority of the longest live ranges. By
# accounting for the number of reserved registers in vreg_128, the
# heuristic changes end up avoiding a spill of %0.
--- |
define void @use_global_assign() #0 {
entry:
unreachable
}
attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
...
---
name: use_global_assign
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_128, preferred-register: '%0' }
- { id: 1, class: vreg_128, preferred-register: '%0' }
- { id: 2, class: vreg_128, preferred-register: '%0' }
- { id: 3, class: vreg_128, preferred-register: '%0' }
- { id: 4, class: vreg_128, preferred-register: '%0' }
- { id: 5, class: vreg_128, preferred-register: '%0' }
- { id: 6, class: vreg_128, preferred-register: '%0' }
- { id: 7, class: vreg_128, preferred-register: '%0' }
- { id: 8, class: vreg_128, preferred-register: '%0' }
- { id: 9, class: vreg_128, preferred-register: '%0' }
- { id: 10, class: vreg_128, preferred-register: '%0' }
- { id: 11, class: vreg_128, preferred-register: '%0' }
- { id: 12, class: vreg_128, preferred-register: '%0' }
- { id: 13, class: vreg_128, preferred-register: '%0' }
- { id: 14, class: vreg_128, preferred-register: '%0' }
- { id: 15, class: vreg_128, preferred-register: '%0' }
machineFunctionInfo:
waveLimiter: true
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
body: |
; CHECK-LABEL: name: use_global_assign
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_NOP 0, implicit-def %0
; CHECK-NEXT: S_NOP 0, implicit-def %18
; CHECK-NEXT: SI_SPILL_V128_SAVE %18, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5)
; CHECK-NEXT: S_NOP 0, implicit-def %35
; CHECK-NEXT: S_NOP 0, implicit-def %27
; CHECK-NEXT: S_NOP 0, implicit-def %29
; CHECK-NEXT: S_NOP 0, implicit-def %31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_NOP 0, implicit %31
; CHECK-NEXT: S_NOP 0, implicit %29
; CHECK-NEXT: S_NOP 0, implicit %27
; CHECK-NEXT: S_NOP 0, implicit %35
; CHECK-NEXT: SI_SPILL_V128_SAVE %35, %stack.1, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.1, align 4, addrspace 5)
; CHECK-NEXT: [[SI_SPILL_V128_RESTORE:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V128_RESTORE]]
; CHECK-NEXT: S_NOP 0, implicit %0
; CHECK-NEXT: S_NOP 0, implicit-def %10
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0, implicit %0
; CHECK-NEXT: S_NOP 0, implicit-def %33
; CHECK-NEXT: SI_SPILL_V128_SAVE %33, %stack.2, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.2, align 4, addrspace 5)
; CHECK-NEXT: S_NOP 0, implicit %10
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0, implicit-def %40
; CHECK-NEXT: SI_SPILL_V128_SAVE %40, %stack.4, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.4, align 4, addrspace 5)
; CHECK-NEXT: S_NOP 0, implicit %33
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0, implicit-def %42
; CHECK-NEXT: SI_SPILL_V128_SAVE %42, %stack.3, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.3, align 4, addrspace 5)
; CHECK-NEXT: S_NOP 0, implicit %40
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %31
; CHECK-NEXT: S_NOP 0, implicit %31
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY %29
; CHECK-NEXT: S_NOP 0, implicit %29
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128 = COPY %27
; CHECK-NEXT: S_NOP 0, implicit %27
; CHECK-NEXT: [[SI_SPILL_V128_RESTORE1:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.1, align 4, addrspace 5)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128 = COPY [[SI_SPILL_V128_RESTORE1]]
; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V128_RESTORE1]]
; CHECK-NEXT: [[SI_SPILL_V128_RESTORE2:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V128_RESTORE2]]
; CHECK-NEXT: S_NOP 0, implicit %0
; CHECK-NEXT: [[SI_SPILL_V128_RESTORE3:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.2, align 4, addrspace 5)
; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V128_RESTORE3]]
; CHECK-NEXT: [[SI_SPILL_V128_RESTORE4:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.4, align 4, addrspace 5)
; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V128_RESTORE4]]
; CHECK-NEXT: [[SI_SPILL_V128_RESTORE5:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.3, align 4, addrspace 5)
; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V128_RESTORE5]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: S_NOP 0, implicit %0
; CHECK-NEXT: [[SI_SPILL_V128_RESTORE6:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V128_RESTORE6]]
; CHECK-NEXT: S_NOP 0, implicit [[COPY3]]
; CHECK-NEXT: S_NOP 0, implicit [[COPY2]]
; CHECK-NEXT: S_NOP 0, implicit [[COPY1]]
; CHECK-NEXT: S_NOP 0, implicit [[COPY]]
bb.0:
S_NOP 0, implicit-def %0:vreg_128
S_NOP 0, implicit-def %1:vreg_128
S_NOP 0, implicit-def %2:vreg_128
S_NOP 0, implicit-def %3:vreg_128
S_NOP 0, implicit-def %4:vreg_128
S_NOP 0, implicit-def %5:vreg_128
bb.1:
S_NOP 0, implicit %5
S_NOP 0, implicit %4
S_NOP 0, implicit %3
S_NOP 0, implicit %2
S_NOP 0, implicit %1
S_NOP 0, implicit %0
S_NOP 0, implicit-def %10:vreg_128
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0, implicit %0
S_NOP 0, implicit-def %11:vreg_128
S_NOP 0, implicit %10
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0, implicit-def %12:vreg_128
S_NOP 0, implicit %11
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0, implicit-def %13:vreg_128
S_NOP 0, implicit %12
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0, implicit %5
S_NOP 0, implicit %4
S_NOP 0, implicit %3
S_NOP 0, implicit %2
S_NOP 0, implicit %1
S_NOP 0, implicit %0
S_NOP 0, implicit %11
S_NOP 0, implicit %12
S_NOP 0, implicit %13
bb.2:
S_NOP 0, implicit %0
S_NOP 0, implicit %1
S_NOP 0, implicit %2
S_NOP 0, implicit %3
S_NOP 0, implicit %4
S_NOP 0, implicit %5
...