llvm/llvm/test/CodeGen/AMDGPU/greedy-global-heuristic.mir

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=greedy -verify-machineinstrs -o - %s |FileCheck %s

# Testcase is limited to 24 VGPRs. Only a maximum of 6 vreg_128s can
# be allocated at the same time.

# This testcase is intended to stress the heuristic in
# RAGreedy::enqueue to switch from local to global. If an interval is
# in one basic block, the usual preference is to allocate registers in
# instruction order. If the estimated live range length is more than
# twice the number of registers in the class, the global heuristic is
# used which increases the priority of the longest live ranges. By
# accounting for the number of reserved registers in vreg_128, the
# heuristic changes end up avoiding a spill of %0.

--- |

  define void @use_global_assign() #0 {
  entry:
    unreachable
  }

  attributes #0 = { "amdgpu-waves-per-eu"="10,10" }

...
---
name: use_global_assign
tracksRegLiveness: true
registers:
  - { id: 0, class: vreg_128, preferred-register: '%0' }
  - { id: 1, class: vreg_128, preferred-register: '%0' }
  - { id: 2, class: vreg_128, preferred-register: '%0' }
  - { id: 3, class: vreg_128, preferred-register: '%0' }
  - { id: 4, class: vreg_128, preferred-register: '%0' }
  - { id: 5, class: vreg_128, preferred-register: '%0' }
  - { id: 6, class: vreg_128, preferred-register: '%0' }
  - { id: 7, class: vreg_128, preferred-register: '%0' }
  - { id: 8, class: vreg_128, preferred-register: '%0' }
  - { id: 9, class: vreg_128, preferred-register: '%0' }
  - { id: 10, class: vreg_128, preferred-register: '%0' }
  - { id: 11, class: vreg_128, preferred-register: '%0' }
  - { id: 12, class: vreg_128, preferred-register: '%0' }
  - { id: 13, class: vreg_128, preferred-register: '%0' }
  - { id: 14, class: vreg_128, preferred-register: '%0' }
  - { id: 15, class: vreg_128, preferred-register: '%0' }

machineFunctionInfo:
  waveLimiter:     true
  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
  stackPtrOffsetReg: '$sgpr32'
body:             |
  ; CHECK-LABEL: name: use_global_assign
  ; CHECK: bb.0:
  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   S_NOP 0, implicit-def %0
  ; CHECK-NEXT:   S_NOP 0, implicit-def %18
  ; CHECK-NEXT:   SI_SPILL_V128_SAVE %18, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5)
  ; CHECK-NEXT:   S_NOP 0, implicit-def %35
  ; CHECK-NEXT:   S_NOP 0, implicit-def %27
  ; CHECK-NEXT:   S_NOP 0, implicit-def %29
  ; CHECK-NEXT:   S_NOP 0, implicit-def %31
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.1:
  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   S_NOP 0, implicit %31
  ; CHECK-NEXT:   S_NOP 0, implicit %29
  ; CHECK-NEXT:   S_NOP 0, implicit %27
  ; CHECK-NEXT:   S_NOP 0, implicit %35
  ; CHECK-NEXT:   SI_SPILL_V128_SAVE %35, %stack.1, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.1, align 4, addrspace 5)
  ; CHECK-NEXT:   [[SI_SPILL_V128_RESTORE:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
  ; CHECK-NEXT:   S_NOP 0, implicit [[SI_SPILL_V128_RESTORE]]
  ; CHECK-NEXT:   S_NOP 0, implicit %0
  ; CHECK-NEXT:   S_NOP 0, implicit-def %10
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0, implicit %0
  ; CHECK-NEXT:   S_NOP 0, implicit-def %33
  ; CHECK-NEXT:   SI_SPILL_V128_SAVE %33, %stack.2, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.2, align 4, addrspace 5)
  ; CHECK-NEXT:   S_NOP 0, implicit %10
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0, implicit-def %40
  ; CHECK-NEXT:   SI_SPILL_V128_SAVE %40, %stack.4, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.4, align 4, addrspace 5)
  ; CHECK-NEXT:   S_NOP 0, implicit %33
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0, implicit-def %42
  ; CHECK-NEXT:   SI_SPILL_V128_SAVE %42, %stack.3, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.3, align 4, addrspace 5)
  ; CHECK-NEXT:   S_NOP 0, implicit %40
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   S_NOP 0
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vreg_128 = COPY %31
  ; CHECK-NEXT:   S_NOP 0, implicit %31
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vreg_128 = COPY %29
  ; CHECK-NEXT:   S_NOP 0, implicit %29
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vreg_128 = COPY %27
  ; CHECK-NEXT:   S_NOP 0, implicit %27
  ; CHECK-NEXT:   [[SI_SPILL_V128_RESTORE1:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.1, align 4, addrspace 5)
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:vreg_128 = COPY [[SI_SPILL_V128_RESTORE1]]
  ; CHECK-NEXT:   S_NOP 0, implicit [[SI_SPILL_V128_RESTORE1]]
  ; CHECK-NEXT:   [[SI_SPILL_V128_RESTORE2:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
  ; CHECK-NEXT:   S_NOP 0, implicit [[SI_SPILL_V128_RESTORE2]]
  ; CHECK-NEXT:   S_NOP 0, implicit %0
  ; CHECK-NEXT:   [[SI_SPILL_V128_RESTORE3:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.2, align 4, addrspace 5)
  ; CHECK-NEXT:   S_NOP 0, implicit [[SI_SPILL_V128_RESTORE3]]
  ; CHECK-NEXT:   [[SI_SPILL_V128_RESTORE4:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.4, align 4, addrspace 5)
  ; CHECK-NEXT:   S_NOP 0, implicit [[SI_SPILL_V128_RESTORE4]]
  ; CHECK-NEXT:   [[SI_SPILL_V128_RESTORE5:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.3, align 4, addrspace 5)
  ; CHECK-NEXT:   S_NOP 0, implicit [[SI_SPILL_V128_RESTORE5]]
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.2:
  ; CHECK-NEXT:   S_NOP 0, implicit %0
  ; CHECK-NEXT:   [[SI_SPILL_V128_RESTORE6:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
  ; CHECK-NEXT:   S_NOP 0, implicit [[SI_SPILL_V128_RESTORE6]]
  ; CHECK-NEXT:   S_NOP 0, implicit [[COPY3]]
  ; CHECK-NEXT:   S_NOP 0, implicit [[COPY2]]
  ; CHECK-NEXT:   S_NOP 0, implicit [[COPY1]]
  ; CHECK-NEXT:   S_NOP 0, implicit [[COPY]]
  bb.0:
    S_NOP 0, implicit-def %0:vreg_128
    S_NOP 0, implicit-def %1:vreg_128
    S_NOP 0, implicit-def %2:vreg_128
    S_NOP 0, implicit-def %3:vreg_128
    S_NOP 0, implicit-def %4:vreg_128
    S_NOP 0, implicit-def %5:vreg_128

  bb.1:
    S_NOP 0, implicit %5
    S_NOP 0, implicit %4
    S_NOP 0, implicit %3
    S_NOP 0, implicit %2
    S_NOP 0, implicit %1
    S_NOP 0, implicit %0
    S_NOP 0, implicit-def %10:vreg_128
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0, implicit %0
    S_NOP 0, implicit-def %11:vreg_128
    S_NOP 0, implicit %10
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0, implicit-def %12:vreg_128
    S_NOP 0, implicit %11
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0, implicit-def %13:vreg_128
    S_NOP 0, implicit %12
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0
    S_NOP 0, implicit %5
    S_NOP 0, implicit %4
    S_NOP 0, implicit %3
    S_NOP 0, implicit %2
    S_NOP 0, implicit %1
    S_NOP 0, implicit %0
    S_NOP 0, implicit %11
    S_NOP 0, implicit %12
    S_NOP 0, implicit %13

  bb.2:
    S_NOP 0, implicit %0
    S_NOP 0, implicit %1
    S_NOP 0, implicit %2
    S_NOP 0, implicit %3
    S_NOP 0, implicit %4
    S_NOP 0, implicit %5

...