llvm/llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass=si-lower-control-flow -o - %s | FileCheck %s

# Test si-lower-control-flow insertion points when other terminator
# instructions are present besides the control flow pseudo and a
# branch.


# There's another terminator instruction between SI_IF and
# S_BRANCH. The S_CBRANCH_EXECZ should be inserted immediately before
# S_BRANCH.
---
name: other_terminator_sbranch_after_si_if
tracksRegLiveness: true
body:             |
  ; CHECK-LABEL: name: other_terminator_sbranch_after_si_if
  ; CHECK: bb.0:
  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr4_sgpr5
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 0, [[COPY]], implicit $exec
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
  ; CHECK-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
  ; CHECK-NEXT:   [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc
  ; CHECK-NEXT:   $exec = S_MOV_B64_term killed [[S_AND_B64_]]
  ; CHECK-NEXT:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[COPY1]], implicit $exec
  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.1, implicit $exec
  ; CHECK-NEXT:   S_BRANCH %bb.2
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.1:
  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   S_BRANCH %bb.2
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.2:
  ; CHECK-NEXT:   S_ENDPGM 0, implicit [[S_MOV_B64_term]]
  bb.0:
    successors: %bb.2, %bb.1
    liveins: $vgpr0, $vgpr1, $sgpr4_sgpr5

    %0:vgpr_32 = COPY killed $vgpr0
    %1:sreg_64_xexec = COPY $sgpr4_sgpr5
    %2:sreg_64_xexec = V_CMP_EQ_U32_e64 0, %0, implicit $exec
    %3:sreg_64_xexec = SI_IF %2, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec
    %4:sreg_64_xexec = S_MOV_B64_term killed %1, implicit $exec
    S_BRANCH %bb.2

  bb.1:
    S_BRANCH %bb.2

  bb.2:
    S_ENDPGM 0, implicit %4

...

# S_CBRANCH_EXECZ should be inserted after the other terminator
---
name: other_terminator_fallthrough_after_si_if
tracksRegLiveness: true
body:             |
  ; CHECK-LABEL: name: other_terminator_fallthrough_after_si_if
  ; CHECK: bb.0:
  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr4_sgpr5
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 0, [[COPY]], implicit $exec
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
  ; CHECK-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
  ; CHECK-NEXT:   [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc
  ; CHECK-NEXT:   $exec = S_MOV_B64_term killed [[S_AND_B64_]]
  ; CHECK-NEXT:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[COPY1]], implicit $exec
  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.1:
  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   S_BRANCH %bb.2
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.2:
  ; CHECK-NEXT:   S_ENDPGM 0, implicit [[S_MOV_B64_term]]
  bb.0:
    successors: %bb.2, %bb.1
    liveins: $vgpr0, $vgpr1, $sgpr4_sgpr5

    %0:vgpr_32 = COPY killed $vgpr0
    %1:sreg_64_xexec = COPY $sgpr4_sgpr5
    %2:sreg_64_xexec = V_CMP_EQ_U32_e64 0, %0, implicit $exec
    %3:sreg_64_xexec = SI_IF %2, %bb.2, implicit-def $exec, implicit-def dead $scc, implicit $exec
    %4:sreg_64_xexec = S_MOV_B64_term killed %1, implicit $exec

  bb.1:
    S_BRANCH %bb.2

  bb.2:
    S_ENDPGM 0, implicit %4

...

---
name: other_terminator_sbranch_after_si_else
tracksRegLiveness: true
body:             |
  ; CHECK-LABEL: name: other_terminator_sbranch_after_si_else
  ; CHECK: bb.0:
  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr4_sgpr5
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[S_OR_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_OR_SAVEEXEC_B64 %2, implicit-def $exec, implicit-def $scc, implicit $exec
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 0, [[COPY]], implicit $exec
  ; CHECK-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 $exec, [[S_OR_SAVEEXEC_B64_]], implicit-def $scc
  ; CHECK-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_B64_]], implicit-def $scc
  ; CHECK-NEXT:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[COPY1]], implicit $exec
  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.1, implicit $exec
  ; CHECK-NEXT:   S_BRANCH %bb.2
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.1:
  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   S_BRANCH %bb.2
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.2:
  ; CHECK-NEXT:   S_ENDPGM 0, implicit [[S_MOV_B64_term]]
  bb.0:
    successors: %bb.2, %bb.1
    liveins: $vgpr0, $vgpr1, $sgpr4_sgpr5

    %0:vgpr_32 = COPY killed $vgpr0
    %1:sreg_64_xexec = COPY $sgpr4_sgpr5
    %2:sreg_64_xexec = V_CMP_EQ_U32_e64 0, %0, implicit $exec
    %3:sreg_64_xexec = SI_ELSE %2, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec
    %4:sreg_64_xexec = S_MOV_B64_term killed %1, implicit $exec
    S_BRANCH %bb.2

  bb.1:
    S_BRANCH %bb.2

  bb.2:
    S_ENDPGM 0, implicit %4

...

---
name: other_terminator_sbranch_after_si_loop
tracksRegLiveness: true
body:             |
  ; CHECK-LABEL: name: other_terminator_sbranch_after_si_loop
  ; CHECK: bb.0:
  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr4_sgpr5
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5
  ; CHECK-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 0, [[COPY]], implicit $exec
  ; CHECK-NEXT:   $exec = S_ANDN2_B64_term $exec, [[V_CMP_EQ_U32_e64_]], implicit-def $scc
  ; CHECK-NEXT:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[COPY1]], implicit $exec
  ; CHECK-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
  ; CHECK-NEXT:   S_BRANCH %bb.2
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.1:
  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   S_BRANCH %bb.2
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.2:
  ; CHECK-NEXT:   S_ENDPGM 0, implicit [[S_MOV_B64_term]]
  bb.0:
    successors: %bb.2, %bb.1
    liveins: $vgpr0, $vgpr1, $sgpr4_sgpr5

    %0:vgpr_32 = COPY killed $vgpr0
    %1:sreg_64_xexec = COPY $sgpr4_sgpr5
    %2:sreg_64_xexec = V_CMP_EQ_U32_e64 0, %0, implicit $exec
    SI_LOOP %2, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec
    %4:sreg_64_xexec = S_MOV_B64_term killed %1, implicit $exec
    S_BRANCH %bb.2

  bb.1:
    S_BRANCH %bb.2

  bb.2:
    S_ENDPGM 0, implicit %4

...

# The save exec result register of SI_IF is used by other terminators
# inserted to behave as a lowered phi. The output register of SI_IF
# was ignored, and the def was removed, so the S_MOV_B64_term uses
# would fail the verifier.

---
name:            si_if_use
alignment:       1
legalized:       true
regBankSelected: true
selected:        true
tracksRegLiveness: true
body:             |
  ; CHECK-LABEL: name: si_if_use
  ; CHECK: bb.0:
  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1
  ; CHECK-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 killed [[COPY]], killed [[COPY1]], implicit $exec
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
  ; CHECK-NEXT:   [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
  ; CHECK-NEXT:   [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc
  ; CHECK-NEXT:   $exec = S_MOV_B64_term killed [[S_AND_B64_]]
  ; CHECK-NEXT:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec
  ; CHECK-NEXT:   [[S_MOV_B64_term1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec
  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.1, implicit $exec
  ; CHECK-NEXT:   S_BRANCH %bb.2
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.1:
  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term1]]
  ; CHECK-NEXT:   dead [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, implicit $exec :: (volatile load (s32), addrspace 1)
  ; CHECK-NEXT:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = COPY [[COPY3]]
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.2:
  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]]
  ; CHECK-NEXT:   $exec = S_OR_B64_term $exec, killed [[COPY4]], implicit-def $scc
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.3:
  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   S_SLEEP 1
  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
  ; CHECK-NEXT:   [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY5]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
  ; CHECK-NEXT:   [[S_XOR_B64_1:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_1]], [[COPY5]], implicit-def dead $scc
  ; CHECK-NEXT:   $exec = S_MOV_B64_term killed [[S_AND_B64_1]]
  ; CHECK-NEXT:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_1]], implicit $exec
  ; CHECK-NEXT:   [[S_MOV_B64_term1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_1]], implicit $exec
  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.1, implicit $exec
  ; CHECK-NEXT:   S_BRANCH %bb.2
  bb.0:
    liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31

    %0:vgpr_32 = COPY killed $vgpr0
    %1:vgpr_32 = COPY killed $vgpr1
    %3:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %1, implicit $exec
    %10:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec
    %14:sreg_64_xexec = S_MOV_B64_term %10, implicit $exec
    %13:sreg_64_xexec = S_MOV_B64_term %10, implicit $exec
    S_BRANCH %bb.2

  bb.1:
    %11:sreg_64_xexec = COPY %13
    dead %6:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, implicit $exec :: (volatile load (s32), addrspace 1)
    %14:sreg_64_xexec = COPY %11

  bb.2:
    %12:sreg_64_xexec = COPY %14
    SI_END_CF killed %12, implicit-def $exec, implicit-def dead $scc, implicit $exec
    S_SLEEP 1
    %9:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec
    %14:sreg_64_xexec = S_MOV_B64_term %9, implicit $exec
    %13:sreg_64_xexec = S_MOV_B64_term %9, implicit $exec
    S_BRANCH %bb.2

...