llvm/llvm/test/CodeGen/AMDGPU/si-init-whole-wave.mir

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-wqm -o -  %s | FileCheck %s

---
# Test that we don't do silly things when there is no whole wave mode in the
# shader (aka bb.1).
#
name:            test_no_wwm
alignment:       1
exposesReturnsTwice: false
tracksRegLiveness: true
body:             |
  ; CHECK-LABEL: name: test_no_wwm
  ; CHECK: bb.0:
  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
  ; CHECK-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[S_OR_SAVEEXEC_B32_:%[0-9]+]]:sreg_32 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def $scc, implicit $exec
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
  ; CHECK-NEXT:   undef [[COPY1:%[0-9]+]].sub0:ccr_sgpr_64 = COPY $sgpr1
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]].sub1:ccr_sgpr_64 = COPY $sgpr2
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr8
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY $exec_lo, implicit-def $exec_lo
  ; CHECK-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY3]], [[S_OR_SAVEEXEC_B32_]], implicit-def dead $scc
  ; CHECK-NEXT:   $exec_lo = S_MOV_B32_term [[S_AND_B32_]]
  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
  ; CHECK-NEXT:   S_BRANCH %bb.1
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.1:
  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 5, [[COPY2]], 0, implicit $exec
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.2:
  ; CHECK-NEXT:   $exec_lo = S_OR_B32 $exec_lo, [[COPY3]], implicit-def $scc
  ; CHECK-NEXT:   $vgpr8 = COPY [[COPY2]]
  ; CHECK-NEXT:   $sgpr0 = COPY [[COPY]]
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr0
  ; CHECK-NEXT:   SI_CS_CHAIN_TC_W32 [[COPY1]], 0, 0, [[COPY4]], amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
  bb.0:
    successors: %bb.1, %bb.2
    liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8
    %9:sreg_32 = COPY $sgpr0
    undef %1.sub0:ccr_sgpr_64 = COPY $sgpr1
    %1.sub1:ccr_sgpr_64 = COPY $sgpr2
    %37:vgpr_32 = COPY $vgpr8
    %14:sreg_32_xm0_xexec = SI_INIT_WHOLE_WAVE implicit-def $exec, implicit $exec
    %16:sreg_32_xm0_xexec = COPY $exec_lo, implicit-def $exec_lo
    %38:sreg_32 = S_AND_B32 %16:sreg_32_xm0_xexec, %14:sreg_32_xm0_xexec, implicit-def dead $scc
    $exec_lo = S_MOV_B32_term %38:sreg_32
    S_CBRANCH_EXECZ %bb.2, implicit $exec
    S_BRANCH %bb.1

  bb.1:
    %37:vgpr_32 = V_ADD_U32_e64 5, %37:vgpr_32, 0, implicit $exec

  bb.2:
    $exec_lo = S_OR_B32 $exec_lo, %16:sreg_32_xm0_xexec, implicit-def $scc
    $vgpr8 = COPY %37:vgpr_32
    $sgpr0 = COPY %9:sreg_32
    %2:sreg_32 = COPY $sgpr0
    SI_CS_CHAIN_TC_W32 %1:ccr_sgpr_64, 0, 0, %2:sreg_32, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
...

---
# Test that we handle WWM in the shader correctly.
#
name:            test_wwm_bb1
alignment:       1
exposesReturnsTwice: false
tracksRegLiveness: true
body:             |
  ; CHECK-LABEL: name: test_wwm_bb1
  ; CHECK: bb.0:
  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
  ; CHECK-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[S_OR_SAVEEXEC_B32_:%[0-9]+]]:sreg_32 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def $scc, implicit $exec
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
  ; CHECK-NEXT:   undef [[COPY1:%[0-9]+]].sub0:ccr_sgpr_64 = COPY $sgpr1
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]].sub1:ccr_sgpr_64 = COPY $sgpr2
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY $exec_lo, implicit-def $exec_lo
  ; CHECK-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY4]], [[S_OR_SAVEEXEC_B32_]], implicit-def dead $scc
  ; CHECK-NEXT:   $exec_lo = S_MOV_B32_term [[S_AND_B32_]]
  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
  ; CHECK-NEXT:   S_BRANCH %bb.1
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.1:
  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 24, [[COPY3]], 0, implicit $exec
  ; CHECK-NEXT:   [[ENTER_STRICT_WWM:%[0-9]+]]:sreg_32_xm0_xexec = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
  ; CHECK-NEXT:   dead [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
  ; CHECK-NEXT:   [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 0, [[COPY3]], 0, 71, undef [[ENTER_STRICT_WWM]], implicit $exec, implicit-def $scc
  ; CHECK-NEXT:   [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 42, [[V_SET_INACTIVE_B32_]], 0, implicit $exec
  ; CHECK-NEXT:   $exec_lo = EXIT_STRICT_WWM [[ENTER_STRICT_WWM]]
  ; CHECK-NEXT:   early-clobber [[COPY2]]:vgpr_32 = V_MOV_B32_e32 [[V_ADD_U32_e64_]], implicit $exec
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.2:
  ; CHECK-NEXT:   $exec_lo = S_OR_B32 $exec_lo, [[COPY4]], implicit-def $scc
  ; CHECK-NEXT:   $vgpr8 = COPY [[COPY2]]
  ; CHECK-NEXT:   $vgpr9 = COPY [[COPY3]]
  ; CHECK-NEXT:   $sgpr0 = COPY [[COPY]]
  ; CHECK-NEXT:   SI_CS_CHAIN_TC_W32 [[COPY1]], 0, 0, [[COPY]], amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
  bb.0:
   successors: %bb.1, %bb.2
   liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9
   %9:sreg_32 = COPY $sgpr0
   undef %1.sub0:ccr_sgpr_64 = COPY $sgpr1
   %1.sub1:ccr_sgpr_64 = COPY $sgpr2
   %40:vgpr_32 = COPY $vgpr9
   %36:vgpr_32 = COPY $vgpr8
   %14:sreg_32_xm0_xexec = SI_INIT_WHOLE_WAVE implicit-def $exec, implicit $exec
   %16:sreg_32_xm0_xexec = COPY $exec_lo, implicit-def $exec_lo
   %38:sreg_32 = S_AND_B32 %16:sreg_32_xm0_xexec, %14:sreg_32_xm0_xexec, implicit-def dead $scc
   $exec_lo = S_MOV_B32_term %38:sreg_32
   S_CBRANCH_EXECZ %bb.2, implicit $exec
   S_BRANCH %bb.1

  bb.1:
   %36:vgpr_32 = V_ADD_U32_e64 24, %36:vgpr_32, 0, implicit $exec
   %20:sreg_32_xm0_xexec = IMPLICIT_DEF
   %19:vgpr_32 = V_SET_INACTIVE_B32 0, %36:vgpr_32, 0, 71, undef %20, implicit $exec, implicit-def $scc
   %18:vgpr_32 = V_ADD_U32_e64 42, %19:vgpr_32, 0, implicit $exec
   %40:vgpr_32 = STRICT_WWM %18:vgpr_32, implicit $exec

  bb.2:
   $exec_lo = S_OR_B32 $exec_lo, %16:sreg_32_xm0_xexec, implicit-def $scc
   $vgpr8 = COPY %40:vgpr_32
   $vgpr9 = COPY %36:vgpr_32
   $sgpr0 = COPY %9:sreg_32
   SI_CS_CHAIN_TC_W32 %1:ccr_sgpr_64, 0, 0, %9:sreg_32, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
...