llvm/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/never-uniform.mir

# RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s
# loads from flat non uniform
---
name:            flatloads
tracksRegLiveness: true
machineFunctionInfo:
  isEntryFunction: true

body:             |
  bb.0:
    ; CHECK-LABEL: MachineUniformityInfo for function: flatloads
    ; CHECK: DIVERGENT: %1
    ; CHECK-NOT: DIVERGENT: %2
    %0:vreg_64 = IMPLICIT_DEF
    %1:vgpr_32(s32) = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
    %2:vgpr_32(s32) = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 1)
    %3:sreg_32 = V_READFIRSTLANE_B32 %1(s32), implicit $exec
    S_ENDPGM 0
...

# loads from scratch non uniform
---
name:            scratchloads
tracksRegLiveness: true
machineFunctionInfo:
  isEntryFunction: true

body:             |
  bb.0:
    ; CHECK-LABEL: MachineUniformityInfo for function: scratchloads
    ; CHECK: DIVERGENT: %1
    %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %1:vgpr_32 = SCRATCH_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32), addrspace 5)
    S_ENDPGM 0
...

# Global load
---
name:            globalloads
tracksRegLiveness: true
machineFunctionInfo:
  isEntryFunction: true

body:             |
  bb.0:
    ; CHECK-LABEL: MachineUniformityInfo for function: globalloads
    ; CHECK: DIVERGENT: %2
    ; CHECK-NOT: DIVERGENT: %3
    %0:vreg_64 = IMPLICIT_DEF
    %1:vreg_64 = IMPLICIT_DEF
    %2:vgpr_32(s32) = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
    %3:vreg_64 = GLOBAL_LOAD_DWORDX2 %1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
    %4:sreg_32 = V_READFIRSTLANE_B32 %2(s32), implicit $exec
    S_ENDPGM 0
...

# FIXME:: ADDTID might instruction incorrectly marked uniform
---
name:            dsreads
tracksRegLiveness: true
machineFunctionInfo:
  isEntryFunction: true

body:             |
  bb.0:
    ; CHECK-LABEL: MachineUniformityInfo for function: dsreads
    ; CHECK-NEXT: ALL VALUES UNIFORM
    %0:vreg_64 = IMPLICIT_DEF
    $m0 = S_MOV_B32 0
    %1:vgpr_32 = DS_READ_ADDTID_B32 0, 0, implicit $m0, implicit $exec
    S_ENDPGM 0
...

# copy source == $sgpr => uniform, $vgpr => divergent
---
name:            sgprcopy
tracksRegLiveness: true
machineFunctionInfo:
  isEntryFunction: true
body:             |
  bb.0:
    ; CHECK-LABEL: MachineUniformityInfo for function: sgprcopy
    ; CHECK: DIVERGENT: %2
    liveins: $sgpr0,$sgpr1,$vgpr0
    %0:sgpr_32 = COPY $sgpr0
    %1:vgpr_32 = COPY $sgpr1
    %2:vgpr_32 = COPY $vgpr0
    S_ENDPGM 0
...

# writelane is not uniform
---
name:            writelane
machineFunctionInfo:
  isEntryFunction: true
body:             |
  bb.0:
    ; CHECK-LABEL: MachineUniformityInfo for function: writelane
    ; CHECK: DIVERGENT: %4

    ; Note how %5 is the result of a vector compare, but it is reported as
    ; uniform because it is stored in an sreg.
    ; CHECK-NOT: DIVERGENT: %5

    %0:vgpr_32 = IMPLICIT_DEF
    %1:vgpr_32 = IMPLICIT_DEF
    %2:sgpr_32 = V_READFIRSTLANE_B32 %0, implicit $exec
    %3:sgpr_32 = V_READLANE_B32 %1, 0, implicit $exec
    $sgpr0 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
    $sgpr1 = V_READLANE_B32 $vgpr1, $sgpr0, implicit $exec

    %4:vgpr_32 = V_WRITELANE_B32 0, 0, %0, implicit $exec
    %5:sreg_64 = V_CMP_EQ_U32_e64 %0, %4, implicit $exec
    S_CBRANCH_VCCZ %bb.1, implicit $vcc

  bb.1:
    %16:vgpr_32 = IMPLICIT_DEF
    S_ENDPGM 0
...
# Directly reading physical vgpr not uniform
---
name:            physicalreg
tracksRegLiveness: true
body:             |
  bb.0:
    ; CHECK-LABEL: MachineUniformityInfo for function: physicalreg
    ; CHECK: DIVERGENT: %0
    ; CHECK: DIVERGENT: %1
    ; CHECK: DIVERGENT: %2
    ; CHECK: DIVERGENT: %3
    ; CHECK: DIVERGENT: %4
    ; CHECK-NOT: DIVERGENT
    ; CHECK: DIVERGENT: %5
    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
    %0:vgpr_32 = COPY $vgpr0
    %1:vgpr_32 = COPY $vgpr1
    %2:vgpr_32 = V_AND_B32_e32 %1, $vgpr3, implicit $exec
    %3:vgpr_32 = V_ADD_U32_e32 $vgpr2, $vgpr3, implicit $exec
    %4:vgpr_32 = V_SUB_CO_U32_e32 $vgpr2, $vgpr3, implicit $exec, implicit-def $vcc
    %5:vgpr_32 = V_AND_B32_e32 $vgpr4, $vgpr5, implicit $exec
    S_ENDPGM 0
...
# mbcnt instructions are not uniform
---
name:            mbcnt_lo
machineFunctionInfo:
  isEntryFunction: true
body:             |
  bb.0:
    ; CHECK-LABEL: MachineUniformityInfo for function: mbcnt_lo
    ; CHECK: DIVERGENT: %0
    %0:vgpr_32 = V_MBCNT_LO_U32_B32_e64 -1, 0, implicit $exec
    S_ENDPGM 0
...
---
name:            mbcnt_hi
machineFunctionInfo:
  isEntryFunction: true
body:             |
  bb.0:
    ; CHECK-LABEL: MachineUniformityInfo for function: mbcnt_hi
    ; CHECK: DIVERGENT: %0
    %0:vgpr_32 = V_MBCNT_HI_U32_B32_e64 -1, 0, implicit $exec
    S_ENDPGM 0
...