# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s
---
# Test that we don't do silly things when there is no whole wave mode in the
# shader (aka bb.1).
#
name: test_no_wwm
alignment: 1
exposesReturnsTwice: false
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: test_no_wwm
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[S_OR_SAVEEXEC_B32_:%[0-9]+]]:sreg_32 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:ccr_sgpr_64 = COPY $sgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:ccr_sgpr_64 = COPY $sgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr8
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY $exec_lo, implicit-def $exec_lo
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY3]], [[S_OR_SAVEEXEC_B32_]], implicit-def dead $scc
; CHECK-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_]]
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 5, [[COPY2]], 0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[COPY3]], implicit-def $scc
; CHECK-NEXT: $vgpr8 = COPY [[COPY2]]
; CHECK-NEXT: $sgpr0 = COPY [[COPY]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr0
; CHECK-NEXT: SI_CS_CHAIN_TC_W32 [[COPY1]], 0, 0, [[COPY4]], amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
bb.0:
successors: %bb.1, %bb.2
liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8
%9:sreg_32 = COPY $sgpr0
undef %1.sub0:ccr_sgpr_64 = COPY $sgpr1
%1.sub1:ccr_sgpr_64 = COPY $sgpr2
%37:vgpr_32 = COPY $vgpr8
%14:sreg_32_xm0_xexec = SI_INIT_WHOLE_WAVE implicit-def $exec, implicit $exec
%16:sreg_32_xm0_xexec = COPY $exec_lo, implicit-def $exec_lo
%38:sreg_32 = S_AND_B32 %16:sreg_32_xm0_xexec, %14:sreg_32_xm0_xexec, implicit-def dead $scc
$exec_lo = S_MOV_B32_term %38:sreg_32
S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1:
%37:vgpr_32 = V_ADD_U32_e64 5, %37:vgpr_32, 0, implicit $exec
bb.2:
$exec_lo = S_OR_B32 $exec_lo, %16:sreg_32_xm0_xexec, implicit-def $scc
$vgpr8 = COPY %37:vgpr_32
$sgpr0 = COPY %9:sreg_32
%2:sreg_32 = COPY $sgpr0
SI_CS_CHAIN_TC_W32 %1:ccr_sgpr_64, 0, 0, %2:sreg_32, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
...
---
# Test that we handle WWM in the shader correctly.
#
name: test_wwm_bb1
alignment: 1
exposesReturnsTwice: false
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: test_wwm_bb1
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[S_OR_SAVEEXEC_B32_:%[0-9]+]]:sreg_32 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:ccr_sgpr_64 = COPY $sgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:ccr_sgpr_64 = COPY $sgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY $exec_lo, implicit-def $exec_lo
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY4]], [[S_OR_SAVEEXEC_B32_]], implicit-def dead $scc
; CHECK-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_]]
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 24, [[COPY3]], 0, implicit $exec
; CHECK-NEXT: [[ENTER_STRICT_WWM:%[0-9]+]]:sreg_32_xm0_xexec = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: dead [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
; CHECK-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 0, [[COPY3]], 0, 71, undef [[ENTER_STRICT_WWM]], implicit $exec, implicit-def $scc
; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 42, [[V_SET_INACTIVE_B32_]], 0, implicit $exec
; CHECK-NEXT: $exec_lo = EXIT_STRICT_WWM [[ENTER_STRICT_WWM]]
; CHECK-NEXT: early-clobber [[COPY2]]:vgpr_32 = V_MOV_B32_e32 [[V_ADD_U32_e64_]], implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[COPY4]], implicit-def $scc
; CHECK-NEXT: $vgpr8 = COPY [[COPY2]]
; CHECK-NEXT: $vgpr9 = COPY [[COPY3]]
; CHECK-NEXT: $sgpr0 = COPY [[COPY]]
; CHECK-NEXT: SI_CS_CHAIN_TC_W32 [[COPY1]], 0, 0, [[COPY]], amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
bb.0:
successors: %bb.1, %bb.2
liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9
%9:sreg_32 = COPY $sgpr0
undef %1.sub0:ccr_sgpr_64 = COPY $sgpr1
%1.sub1:ccr_sgpr_64 = COPY $sgpr2
%40:vgpr_32 = COPY $vgpr9
%36:vgpr_32 = COPY $vgpr8
%14:sreg_32_xm0_xexec = SI_INIT_WHOLE_WAVE implicit-def $exec, implicit $exec
%16:sreg_32_xm0_xexec = COPY $exec_lo, implicit-def $exec_lo
%38:sreg_32 = S_AND_B32 %16:sreg_32_xm0_xexec, %14:sreg_32_xm0_xexec, implicit-def dead $scc
$exec_lo = S_MOV_B32_term %38:sreg_32
S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1:
%36:vgpr_32 = V_ADD_U32_e64 24, %36:vgpr_32, 0, implicit $exec
%20:sreg_32_xm0_xexec = IMPLICIT_DEF
%19:vgpr_32 = V_SET_INACTIVE_B32 0, %36:vgpr_32, 0, 71, undef %20, implicit $exec, implicit-def $scc
%18:vgpr_32 = V_ADD_U32_e64 42, %19:vgpr_32, 0, implicit $exec
%40:vgpr_32 = STRICT_WWM %18:vgpr_32, implicit $exec
bb.2:
$exec_lo = S_OR_B32 $exec_lo, %16:sreg_32_xm0_xexec, implicit-def $scc
$vgpr8 = COPY %40:vgpr_32
$vgpr9 = COPY %36:vgpr_32
$sgpr0 = COPY %9:sreg_32
SI_CS_CHAIN_TC_W32 %1:ccr_sgpr_64, 0, 0, %9:sreg_32, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
...