llvm/llvm/test/CodeGen/AMDGPU/waitcnt-global-inv-wb.mir

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX12 %s

# Check that we correctly track that GLOBAL_INV increases LOAD_cnt.
# We use a straightforward dependency between a GLOBAL_LOAD and an instruction
# that uses its result - the S_WAIT_LOADCNT introduced before the use should
# reflect the fact that there is a GLOBAL_INV between them.
# FIXME: We could get away with a S_WAIT_LOADCNT 1 here.
---
name: waitcnt-global-inv
machineFunctionInfo:
  isEntryFunction: true
body: |
  bb.0:
    liveins: $vgpr0, $vgpr1, $sgpr2_sgpr3

    ; GFX12-LABEL: name: waitcnt-global-inv
    ; GFX12: liveins: $vgpr0, $vgpr1, $sgpr2_sgpr3
    ; GFX12-NEXT: {{  $}}
    ; GFX12-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr2_sgpr3, killed $vgpr0, 0, 0, implicit $exec :: (load (s32), addrspace 1)
    ; GFX12-NEXT: GLOBAL_INV 16, implicit $exec
    ; GFX12-NEXT: S_WAIT_LOADCNT 0
    ; GFX12-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec, implicit $exec
    renamable $vgpr0 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr2_sgpr3, killed $vgpr0, 0, 0, implicit $exec :: (load (s32), addrspace 1)
    GLOBAL_INV 16, implicit $exec
    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec, implicit $exec
...

# TODO: Test for GLOBAL_WB, GLOBAL_WBINV