llvm/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir

# RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass post-RA-hazard-rec  %s -o - | FileCheck %s -check-prefixes=GCN,SICI
# RUN: llc -mtriple=amdgcn -mcpu=hawaii -run-pass post-RA-hazard-rec  %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,SICI
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass post-RA-hazard-rec  %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass post-RA-hazard-rec  %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI,GFX9

--- |
  define amdgpu_kernel void @div_fmas() { ret void }
  define amdgpu_kernel void @s_getreg() { ret void }
  define amdgpu_kernel void @s_setreg() { ret void }
  define amdgpu_kernel void @vmem_gt_8dw_store() { ret void }
  define amdgpu_kernel void @readwrite_lane() { ret void }
  define amdgpu_kernel void @rfe() { ret void }
  define amdgpu_kernel void @s_movrel() { ret void }
  define amdgpu_kernel void @v_interp() { ret void }
  define amdgpu_kernel void @dpp() { ret void }
...
---
# GCN-LABEL: name: div_fmas

# GCN-LABEL: bb.0:
# GCN: S_MOV_B64
# GCN-NOT: S_NOP
# GCN: V_DIV_FMAS

# GCN-LABEL: bb.1:
# GCN: V_CMP_EQ_I32
# GCN: S_NOP 3
# GCN: V_DIV_FMAS_F32

# GCN-LABEL: bb.2:
# GCN: V_CMP_EQ_I32
# GCN: S_NOP 3
# GCN: V_DIV_FMAS_F32

# GCN-LABEL: bb.3:
# GCN: V_DIV_SCALE_F32
# GCN: S_NOP 3
# GCN: V_DIV_FMAS_F32
name: div_fmas

body: |
  bb.0:
    $vcc = S_MOV_B64 0
    $vgpr0 = V_DIV_FMAS_F32_e64 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $mode, implicit $vcc, implicit $exec
    S_BRANCH %bb.1

  bb.1:
    implicit $vcc = V_CMP_EQ_I32_e32 $vgpr1, $vgpr2, implicit $exec
    $vgpr0 = V_DIV_FMAS_F32_e64 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $mode, implicit $vcc, implicit $exec
    S_BRANCH %bb.2

  bb.2:
    $vcc = V_CMP_EQ_I32_e64 $vgpr1, $vgpr2, implicit $exec
    $vgpr0 = V_DIV_FMAS_F32_e64 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $mode, implicit $vcc, implicit $exec
    S_BRANCH %bb.3

  bb.3:
    $vgpr4, $vcc = V_DIV_SCALE_F32_e64 0, $vgpr1, 0, $vgpr1, 0, $vgpr3, 0, 0, implicit $mode, implicit $exec
    $vgpr0 = V_DIV_FMAS_F32_e64 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $mode, implicit $vcc, implicit $exec
    S_ENDPGM 0

...

...
---
# GCN-LABEL: name: s_getreg

# GCN-LABEL: bb.0:
# GCN: S_SETREG
# GCN: S_NOP 1
# GCN: S_GETREG

# GCN-LABEL: bb.1:
# GCN: S_SETREG_IMM32
# GCN: S_NOP 1
# GCN: S_GETREG

# GCN-LABEL: bb.2:
# GCN: S_SETREG
# GCN: S_NOP 0
# GCN: S_GETREG

# GCN-LABEL: bb.3:
# GCN: S_SETREG
# GCN-NEXT: S_GETREG

name: s_getreg

body: |
  bb.0:
    S_SETREG_B32 $sgpr0, 1, implicit-def $mode, implicit $mode
    $sgpr1 = S_GETREG_B32 1, implicit-def $mode, implicit $mode
    S_BRANCH %bb.1

  bb.1:
    S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
    $sgpr1 = S_GETREG_B32 1, implicit-def $mode, implicit $mode
    S_BRANCH %bb.2

  bb.2:
    S_SETREG_B32 $sgpr0, 1, implicit-def $mode, implicit $mode
    $sgpr1 = S_MOV_B32 0
    $sgpr2 = S_GETREG_B32 1, implicit-def $mode, implicit $mode
    S_BRANCH %bb.3

  bb.3:
    S_SETREG_B32 $sgpr0, 0, implicit-def $mode, implicit $mode
    $sgpr1 = S_GETREG_B32 1, implicit-def $mode, implicit $mode
    S_ENDPGM 0
...

...
---
# GCN-LABEL: name: s_setreg

# GCN-LABEL: bb.0:
# GCN: S_SETREG
# SICI: S_NOP 0
# VI: S_NOP 1
# GCN: S_SETREG

# GCN-LABEL: bb.1:
# GCN: S_SETREG
# SICI: S_NOP 0
# VI: S_NOP 1
# GCN: S_SETREG

# GCN-LABEL: bb.2:
# GCN: S_SETREG
# GCN-NEXT: S_SETREG

name: s_setreg

body: |
  bb.0:
    S_SETREG_B32 $sgpr0, 1, implicit-def $mode, implicit $mode
    S_SETREG_B32 $sgpr1, 1, implicit-def $mode, implicit $mode
    S_BRANCH %bb.1

  bb.1:
    S_SETREG_B32 $sgpr0, 64, implicit-def $mode, implicit $mode
    S_SETREG_B32 $sgpr1, 128, implicit-def $mode, implicit $mode
    S_BRANCH %bb.2

  bb.2:
    S_SETREG_B32 $sgpr0, 1, implicit-def $mode, implicit $mode
    S_SETREG_B32 $sgpr1, 0, implicit-def $mode, implicit $mode
    S_ENDPGM 0
...

...
---
# GCN-LABEL: name: vmem_gt_8dw_store

# GCN-LABEL: bb.0:
# GCN: BUFFER_STORE_DWORD_OFFSET
# GCN-NEXT: V_MOV_B32
# GCN: BUFFER_STORE_DWORDX3_OFFSET
# CIVI: S_NOP
# GCN-NEXT: V_MOV_B32
# GCN: BUFFER_STORE_DWORDX4_OFFSET
# GCN-NEXT: V_MOV_B32
# GCN: BUFFER_STORE_DWORDX4_OFFSET
# CIVI: S_NOP
# GCN-NEXT: V_MOV_B32
# GCN: BUFFER_STORE_FORMAT_XYZ_OFFSET
# CIVI: S_NOP
# GCN-NEXT: V_MOV_B32
# GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET
# CIVI: S_NOP
# GCN-NEXT: V_MOV_B32

# GCN-LABEL: bb.1:
# GCN: FLAT_STORE_DWORDX2
# GCN-NEXT: V_MOV_B32
# GCN: FLAT_STORE_DWORDX3
# CIVI: S_NOP
# GCN-NEXT: V_MOV_B32
# GCN: FLAT_STORE_DWORDX4
# CIVI: S_NOP
# GCN-NEXT: V_MOV_B32
# GCN: FLAT_ATOMIC_CMPSWAP_X2
# CIVI: S_NOP
# GCN-NEXT: V_MOV_B32
# GCN: FLAT_ATOMIC_FCMPSWAP_X2
# CIVI: S_NOP
# GCN: V_MOV_B32

name: vmem_gt_8dw_store

body: |
  bb.0:
    BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
    $vgpr3 = V_MOV_B32_e32 0, implicit $exec
    BUFFER_STORE_DWORDX3_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
    $vgpr3 = V_MOV_B32_e32 0, implicit $exec
    BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
    $vgpr3 = V_MOV_B32_e32 0, implicit $exec
    BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
    $vgpr3 = V_MOV_B32_e32 0, implicit $exec
    BUFFER_STORE_FORMAT_XYZ_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
    $vgpr3 = V_MOV_B32_e32 0, implicit $exec
    BUFFER_STORE_FORMAT_XYZW_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
    $vgpr3 = V_MOV_B32_e32 0, implicit $exec
    BUFFER_ATOMIC_CMPSWAP_X2_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec
    $vgpr3 = V_MOV_B32_e32 0, implicit $exec
    S_BRANCH %bb.1

  bb.1:
    FLAT_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
    $vgpr3 = V_MOV_B32_e32 0, implicit $exec
    FLAT_STORE_DWORDX3 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, 0, 0, implicit $exec, implicit $flat_scr
    $vgpr3 = V_MOV_B32_e32 0, implicit $exec
    FLAT_STORE_DWORDX4 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
    $vgpr3 = V_MOV_B32_e32 0, implicit $exec
    FLAT_ATOMIC_CMPSWAP_X2 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
    $vgpr3 = V_MOV_B32_e32 0, implicit $exec
    FLAT_ATOMIC_FCMPSWAP_X2 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
    $vgpr3 = V_MOV_B32_e32 0, implicit $exec
    S_ENDPGM 0

...

...
---

# GCN-LABEL: name: readwrite_lane

# GCN-LABEL: bb.0:
# GCN: V_ADD_CO_U32
# GCN: S_NOP 3
# GCN: V_READLANE_B32

# GCN-LABEL: bb.1:
# GCN: V_ADD_CO_U32
# GCN: S_NOP 3
# GCN: V_WRITELANE_B32

# GCN-LABEL: bb.2:
# GCN: V_ADD_CO_U32
# GCN: S_NOP 3
# GCN: V_READLANE_B32

# GCN-LABEL: bb.3:
# GCN: V_ADD_CO_U32
# GCN: S_NOP 3
# GCN: V_WRITELANE_B32

name: readwrite_lane

body: |
  bb.0:
    $vgpr0,$sgpr0_sgpr1 = V_ADD_CO_U32_e64 $vgpr1, $vgpr2, implicit $vcc, 0, implicit $exec
    $sgpr4 = V_READLANE_B32 $vgpr4, $sgpr0
    S_BRANCH %bb.1

  bb.1:
    $vgpr0,$sgpr0_sgpr1 = V_ADD_CO_U32_e64 $vgpr1, $vgpr2, implicit $vcc, 0, implicit $exec
    $vgpr4 = V_WRITELANE_B32 $sgpr0, $sgpr0, $vgpr4
    S_BRANCH %bb.2

  bb.2:
    $vgpr0,implicit $vcc = V_ADD_CO_U32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
    $sgpr4 = V_READLANE_B32 $vgpr4, $vcc_lo
    S_BRANCH %bb.3

  bb.3:
    $m0 = S_MOV_B32 $sgpr4
    $vgpr0,implicit $vcc = V_ADD_CO_U32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
    $vgpr4 = V_WRITELANE_B32 $m0, $vcc_lo, $vgpr4
    S_ENDPGM 0

...

...
---

# GCN-LABEL: name: rfe

# GCN-LABEL: bb.0:
# GCN: S_SETREG
# VI: S_NOP
# GCN-NEXT: S_RFE_B64

# GCN-LABEL: bb.1:
# GCN: S_SETREG
# GCN-NEXT: S_RFE_B64

name: rfe

body: |
  bb.0:
    S_SETREG_B32 $sgpr0, 3, implicit-def $mode, implicit $mode
    S_RFE_B64 $sgpr2_sgpr3
    S_BRANCH %bb.1

  bb.1:
    S_SETREG_B32 $sgpr0, 0, implicit-def $mode, implicit $mode
    S_RFE_B64 $sgpr2_sgpr3
    S_ENDPGM 0

...

...
---

# GCN-LABEL: name: s_movrel

# GCN-LABEL: bb.0:
# GCN: S_MOV_B32
# GFX9: S_NOP
# GCN-NEXT: S_MOVRELS_B32

# GCN-LABEL: bb.1:
# GCN: S_MOV_B32
# GFX9: S_NOP
# GCN-NEXT: S_MOVRELS_B64

# GCN-LABEL: bb.2:
# GCN: S_MOV_B32
# GFX9: S_NOP
# GCN-NEXT: S_MOVRELD_B32

# GCN-LABEL: bb.3:
# GCN: S_MOV_B32
# GFX9: S_NOP
# GCN-NEXT: S_MOVRELD_B64

name: s_movrel

body: |
  bb.0:
    $m0 = S_MOV_B32 0
    $sgpr0 = S_MOVRELS_B32 $sgpr0, implicit $m0
    S_BRANCH %bb.1

  bb.1:
    $m0 = S_MOV_B32 0
    $sgpr0_sgpr1 = S_MOVRELS_B64 $sgpr0_sgpr1, implicit $m0
    S_BRANCH %bb.2

  bb.2:
    $m0 = S_MOV_B32 0
    S_MOVRELD_B32 $sgpr0, $sgpr0, implicit $m0
    S_BRANCH %bb.3

  bb.3:
    $m0 = S_MOV_B32 0
    S_MOVRELD_B64 $sgpr0_sgpr1, $sgpr0_sgpr1, implicit $m0
    S_ENDPGM 0
...

...
---

# GCN-LABEL: name: v_interp

# GCN-LABEL: bb.0:
# GCN: S_MOV_B32
# GFX9-NEXT: S_NOP
# GCN-NEXT: V_INTERP_P1_F32

# GCN-LABEL: bb.1:
# GCN: S_MOV_B32
# GFX9-NEXT: S_NOP
# GCN-NEXT: V_INTERP_P2_F32

# GCN-LABEL: bb.2:
# GCN: S_MOV_B32
# GFX9-NEXT: S_NOP
# GCN-NEXT: V_INTERP_P1_F32_16bank

# GCN-LABEL: bb.3:
# GCN: S_MOV_B32
# GFX9-NEXT: S_NOP
# GCN-NEXT: V_INTERP_MOV_F32

name: v_interp

body: |
  bb.0:
    $m0 = S_MOV_B32 0
    $vgpr0 = V_INTERP_P1_F32 $vgpr0, 0, 0, implicit $mode, implicit $m0, implicit $exec
    S_BRANCH %bb.1

  bb.1:
    $m0 = S_MOV_B32 0
    $vgpr0 = V_INTERP_P2_F32 $vgpr0, $vgpr1, 0, 0, implicit $mode, implicit $m0, implicit $exec
    S_BRANCH %bb.2

  bb.2:
    $m0 = S_MOV_B32 0
    $vgpr0 = V_INTERP_P1_F32_16bank $vgpr0, 0, 0, implicit $mode, implicit $m0, implicit $exec
    S_BRANCH %bb.3

  bb.3:
    $m0 = S_MOV_B32 0
    $vgpr0 = V_INTERP_MOV_F32 0, 0, 0, implicit $mode, implicit $m0, implicit $exec
    S_ENDPGM 0
...

...
---

# GCN-LABEL: name: dpp

# VI-LABEL: bb.0:
# VI: V_MOV_B32_e32
# VI-NEXT: S_NOP 1
# VI-NEXT: V_MOV_B32_dpp

# VI-LABEL: bb.1:
# VI: V_CMPX_EQ_I32_e32
# VI-NEXT: S_NOP 4
# VI-NEXT: V_MOV_B32_dpp

name: dpp

body: |
  bb.0:
    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
    $vgpr1 = V_MOV_B32_dpp $vgpr1, $vgpr0, 0, 15, 15, 0, implicit $exec
    S_BRANCH %bb.1

  bb.1:
    implicit $exec, implicit $vcc = V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit $exec
    $vgpr3 = V_MOV_B32_dpp $vgpr3, $vgpr0, 0, 15, 15, 0, implicit $exec
    S_ENDPGM 0
...