llvm/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir

# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefixes=GCN,GFX10 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefixes=GCN %s

# GCN-LABEL: name: vmem_write_sgpr
# GCN:        BUFFER_LOAD_DWORD_OFFEN
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT:   S_MOV_B32
---
name:            vmem_write_sgpr
body:             |
  bb.0:
    $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
    $sgpr4 = IMPLICIT_DEF
    $vgpr0 = IMPLICIT_DEF
    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
    $sgpr0 = S_MOV_B32 0
...
# GCN-LABEL: name: vmem_write_exec
# GCN:        BUFFER_STORE_DWORD_OFFEN_exact
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT:   S_MOV_B32
---
name:            vmem_write_exec
body:             |
  bb.0:
    $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
    $sgpr4 = IMPLICIT_DEF
    $vgpr0 = IMPLICIT_DEF
    $vgpr1 = IMPLICIT_DEF
    BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr0, renamable $vgpr1, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
    $exec_lo = S_MOV_B32 -1
...
# GCN-LABEL: name: vmem_write_sgpr_chain
# GCN:        BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT:   S_MOV_B32
# GCN-NEXT:   S_MOV_B32
# GCN-NEXT:   S_MOV_B32
# GCN-NEXT:   S_MOV_B32
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT:   S_MOV_B32
---
name:            vmem_write_sgpr_chain
body:             |
  bb.0:
    successors:
    $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
    $sgpr4 = IMPLICIT_DEF
    $vgpr0 = IMPLICIT_DEF
    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
    $sgpr5 = S_MOV_B32 $sgpr0
    $sgpr6 = S_MOV_B32 $sgpr1
    $sgpr7 = S_MOV_B32 $sgpr2
    $sgpr8 = S_MOV_B32 $sgpr3
    $sgpr0 = S_MOV_B32 0
...
# GCN-LABEL: name: vmem_smem_write_sgpr
# GCN:        BUFFER_LOAD_DWORD_OFFEN
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT:   S_LOAD_DWORD_IMM
---
name:            vmem_smem_write_sgpr
body:             |
  bb.0:
    $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
    $sgpr4 = IMPLICIT_DEF
    $vgpr0 = IMPLICIT_DEF
    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
    $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
...
# GCN-LABEL: name: vmem_snop_write_sgpr
# GCN:        BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT:   S_NOP
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT:   S_MOV_B32
---
name:            vmem_snop_write_sgpr
body:             |
  bb.0:
    $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
    $sgpr4 = IMPLICIT_DEF
    $vgpr0 = IMPLICIT_DEF
    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
    S_NOP 0
    $sgpr0 = S_MOV_B32 0
...
# GCN-LABEL: name: vmem_valu_write_sgpr
# GCN:      BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT: V_ADD_F32
# GCN-NEXT: S_MOV_B32
---
name:            vmem_valu_write_sgpr
body:             |
  bb.0:
    $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
    $sgpr4 = IMPLICIT_DEF
    $vgpr0 = IMPLICIT_DEF
    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
    $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
    $sgpr0 = S_MOV_B32 0
...
# GCN-LABEL: name: vmem_swait0_write_sgpr
# GCN:      BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT: S_WAITCNT
# GCN-NEXT: S_MOV_B32
---
name:            vmem_swait0_write_sgpr
body:             |
  bb.0:
    $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
    $sgpr4 = IMPLICIT_DEF
    $vgpr0 = IMPLICIT_DEF
    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
    S_WAITCNT 0
    $sgpr0 = S_MOV_B32 0
...
# GCN-LABEL: name: vmem_swait_any_write_sgpr
# GCN:        BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT:   S_WAITCNT
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT:   S_MOV_B32
---
name:            vmem_swait_any_write_sgpr
body:             |
  bb.0:
    $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
    $sgpr4 = IMPLICIT_DEF
    $vgpr0 = IMPLICIT_DEF
    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
    S_WAITCNT 1
    $sgpr0 = S_MOV_B32 0
...
# GCN-LABEL: name: vmem_write_exec_impread
# GCN:        BUFFER_LOAD_DWORD_OFFEN
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT:   S_MOV_B64
---
name:            vmem_write_exec_impread
body:             |
  bb.0:
    $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
    $sgpr4 = IMPLICIT_DEF
    $vgpr0 = IMPLICIT_DEF
    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
    $exec = S_MOV_B64 7
...
# GCN-LABEL: name: vmem_write_exec_expread
# GCN:        BUFFER_LOAD_DWORD_OFFEN
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT:   S_MOV_B64
---
name:            vmem_write_exec_expread
body:             |
  bb.0:
    $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
    $vgpr0 = IMPLICIT_DEF
    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $exec_lo, 0, 0, 0, implicit $exec
    $exec = S_MOV_B64 7
...
# GCN-LABEL: name: ds_write_m0
# GCN:        DS_READ_B32
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT:   S_MOV_B32
---
name:            ds_write_m0
body:             |
  bb.0:
    $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
    $sgpr4 = IMPLICIT_DEF
    $vgpr0 = IMPLICIT_DEF
    $vgpr1 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec
    $m0 = S_MOV_B32 7
...
# GCN-LABEL: name: vmem_write_sgpr_fall_through
# GCN:        BUFFER_LOAD_DWORD_OFFEN
# GCN:        bb.1:
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT:   S_MOV_B32
---
name:            vmem_write_sgpr_fall_through
body:             |
  bb.0:
    successors: %bb.1
    $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
    $sgpr4 = IMPLICIT_DEF
    $vgpr0 = IMPLICIT_DEF
    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec

  bb.1:
    $sgpr0 = S_MOV_B32 0
...
# GCN-LABEL: name: vmem_write_sgpr_branch
# GCN:        BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT:   S_BRANCH
# GCN:        bb.1:
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT:   S_MOV_B32
---
name:            vmem_write_sgpr_branch
body:             |
  bb.0:
    successors: %bb.1
    $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
    $sgpr4 = IMPLICIT_DEF
    $vgpr0 = IMPLICIT_DEF
    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
    S_BRANCH %bb.1

  bb.1:
    $sgpr0 = S_MOV_B32 0
...
# GCN-LABEL: name: vmem_write_sgpr_branch_around
# GCN:        BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT:   S_BRANCH
# GCN:        bb.2:
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT:   S_MOV_B32
---
name:            vmem_write_sgpr_branch_around
body:             |
  bb.0:
    successors: %bb.2
    $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
    $sgpr4 = IMPLICIT_DEF
    $vgpr0 = IMPLICIT_DEF
    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
    S_BRANCH %bb.2

  bb.1:
    successors: %bb.2
    S_WAITCNT 0

  bb.2:
    $sgpr0 = S_MOV_B32 0
...
# GCN-LABEL: name: vmem_write_sgpr_cbranch_around
# GCN:        BUFFER_LOAD_DWORD_OFFEN
# GCN-NEXT:   S_CBRANCH
# GCN-NEXT:   S_BRANCH
# GCN:        bb.1:
# GCN:        S_WAITCNT
# GCN:        V_ADD_CO_U32
# GCN:        bb.2:
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT:   S_MOV_B32
---
name:            vmem_write_sgpr_cbranch_around
body:             |
  bb.0:
    successors: %bb.1, %bb.2
    $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
    $sgpr4 = IMPLICIT_DEF
    $vgpr0 = IMPLICIT_DEF
    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
    S_CBRANCH_SCC0 %bb.2, implicit $scc
    S_BRANCH %bb.1

  bb.1:
    successors: %bb.2
    S_WAITCNT 0
    $vgpr2, $vcc_lo = V_ADD_CO_U32_e64 $vgpr1, $vgpr1, 0, implicit $exec
    S_BRANCH %bb.2

  bb.2:
    $sgpr0 = S_MOV_B32 0
...
# GCN-LABEL: name: vmem_write_sgpr_branch_backedge
# GCN:        $vgpr0 = IMPLICIT_DEF
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT:   S_MOV_B32
---
name:            vmem_write_sgpr_branch_backedge
body:             |
  bb.0:
    successors: %bb.1
    $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
    $sgpr4 = IMPLICIT_DEF
    $vgpr0 = IMPLICIT_DEF
    $sgpr0 = S_MOV_B32 0

  bb.1:
    $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
    S_BRANCH %bb.0
...
# GCN-LABEL: name: ds_write_exec
# GCN:        DS_WRITE_B32_gfx9
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT:   S_MOV_B32
---
name:            ds_write_exec
body:             |
  bb.0:
    $vgpr0 = IMPLICIT_DEF
    $vgpr1 = IMPLICIT_DEF
    DS_WRITE_B32_gfx9 $vgpr0, $vgpr1, 0, 0, implicit $exec
    $exec_lo = S_MOV_B32 -1
...
# GCN-LABEL: name: vmem_scratch_exec
# GCN:        SCRATCH_LOAD_DWORD
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT:   S_MOV_B32
---
name:            vmem_scratch_exec
body:             |
  bb.0:
    $vgpr0 = IMPLICIT_DEF
    $vgpr1 = SCRATCH_LOAD_DWORD undef $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
    $exec_lo = S_MOV_B32 -1
...
# GCN-LABEL: name: vmem_flat_exec
# GCN:        FLAT_LOAD_DWORD
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT:   S_MOV_B32
---
name:            vmem_flat_exec
body:             |
  bb.0:
    $vgpr0 = IMPLICIT_DEF
    $vgpr1 = IMPLICIT_DEF
    $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
    $exec_lo = S_MOV_B32 -1
...
# GCN-LABEL: name: vmem_global_exec
# GCN:        GLOBAL_LOAD_DWORD
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT:   S_MOV_B32
---
name:            vmem_global_exec
body:             |
  bb.0:
    $vgpr0 = IMPLICIT_DEF
    $vgpr1 = IMPLICIT_DEF
    $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
    $exec_lo = S_MOV_B32 -1
...
# GCN-LABEL: name: vmem_global_atomic_exec
# GCN:        GLOBAL_ATOMIC_ADD_RTN
# GFX10-NEXT: S_WAITCNT_DEPCTR 65507
# GCN-NEXT:   S_MOV_B32
---
name:            vmem_global_atomic_exec
body:             |
  bb.0:
    $vgpr0 = IMPLICIT_DEF
    $vgpr1 = IMPLICIT_DEF
    $vgpr2 = IMPLICIT_DEF
    $vgpr3 = GLOBAL_ATOMIC_ADD_RTN $vgpr0_vgpr1, $vgpr2, 0, 1, implicit $exec :: (load store syncscope("agent") seq_cst (s32), addrspace 1)
    $exec_lo = S_MOV_B32 -1
...