llvm/llvm/test/CodeGen/AMDGPU/early-tailduplicator-terminator.mir

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=early-tailduplication -verify-machineinstrs -o - %s | FileCheck %s

# Early tail duplication should not merge bb.6 into bb.5, adding a
# non-terminator (S_SLEEP) after the terminator S_MOV_B32_term.

---
name:           tail_duplicate_terminator
tracksRegLiveness: true
body:             |
  ; CHECK-LABEL: name: tail_duplicate_terminator
  ; CHECK: bb.0:
  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   S_BRANCH %bb.2
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.1:
  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   S_SLEEP 1
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.2:
  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[S_MOV_B32_term:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32_term $exec_lo
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.3:
  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.4(0x40000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   SI_WATERFALL_LOOP %bb.3, implicit $exec
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.4:
  ; CHECK-NEXT:   successors: %bb.5(0x80000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   $exec_lo = S_MOV_B32_term [[S_MOV_B32_term]]
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.5:
  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   S_SLEEP 2
  ; CHECK-NEXT:   S_BRANCH %bb.1
  bb.1:
    S_BRANCH %bb.3

  bb.2:
    S_SLEEP 1

  bb.3:
    %0:sreg_32_xm0_xexec = S_MOV_B32_term $exec_lo

  bb.4:
    SI_WATERFALL_LOOP %bb.4, implicit $exec

  bb.5:
    $exec_lo = S_MOV_B32_term %0

  bb.6:
    S_SLEEP 2
    S_BRANCH %bb.2
...