llvm/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir

# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1100
# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150

---

# GCN-LABEL: name: vop3
# GCN: %6:vgpr_32, %7:sreg_32_xm0_xexec = V_SUBBREV_U32_e64_dpp %3, %0, %1, %5, 1, 1, 15, 15, 1, implicit $exec
# GCN: %8:vgpr_32 = V_CVT_PK_U8_F32_e64_dpp %3, 4, %0, 2, %2, 2, %1, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
# GCN: %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %0, 0, 12345678, 0, 0, implicit $mode, implicit $exec
# GFX1100: %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 2, 0, %7, 0, 0, implicit $mode, implicit $exec
# GFX1150: %12:vgpr_32 = V_MED3_F32_e64_dpp %3, 0, %1, 0, 2, 0, %7, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
name: vop3
tracksRegLiveness: true
body:             |
  bb.0:
    liveins: $vgpr0, $vgpr1, $vgpr2

    %0:vgpr_32 = COPY $vgpr0
    %1:vgpr_32 = COPY $vgpr1
    %2:vgpr_32 = COPY $vgpr2
    %3:vgpr_32 = IMPLICIT_DEF
    %4:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec

    %5:sreg_32_xm0_xexec = IMPLICIT_DEF
    %6:vgpr_32, %7:sreg_32_xm0_xexec = V_SUBBREV_U32_e64 %4, %1, %5, 1, implicit $exec

    %8:vgpr_32 = V_CVT_PK_U8_F32_e64 4, %4, 2, %2, 2, %1, 1, implicit $mode, implicit $exec

    ; should not be combined because src2 literal is illegal
    %9:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
    %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %0, 0, 12345678, 0, 0, implicit $mode, implicit $exec

    ; should not be combined on subtargets where src1 imm is illegal
    %11:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
    %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 2, 0, %7, 0, 0, implicit $mode, implicit $exec
...
---

# GCN-LABEL: name: vop3_sgpr_src1
# GCN: %6:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %1, 0, %2, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
# GFX1100: %8:vgpr_32 = V_MED3_F32_e64 0, %7, 0, %2, 0, %1, 0, 0, implicit $mode, implicit $exec
# GFX1150: %8:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %1, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
# GFX1100: %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %2, 0, %3, 0, 0, implicit $mode, implicit $exec
# GFX1150: %10:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %3, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
# GFX1100: %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 42, 0, %2, 0, 0, implicit $mode, implicit $exec
# GFX1150: %12:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, 42, 0, %2, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
# GCN: %14:vgpr_32 = V_MED3_F32_e64 0, %13, 0, 4242, 0, %2, 0, 0, implicit $mode, implicit $exec
name: vop3_sgpr_src1
tracksRegLiveness: true
body:             |
  bb.0:
    liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1

    %0:vgpr_32 = COPY $vgpr0
    %1:vgpr_32 = COPY $vgpr1
    %2:sgpr_32 = COPY $sgpr0
    %3:sgpr_32 = COPY $sgpr1
    %4:vgpr_32 = IMPLICIT_DEF

    ; should be combined because src2 allows sgpr
    %5:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
    %6:vgpr_32 = V_MED3_F32_e64 0, %5, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec

    ; should be combined only on subtargets that allow sgpr for src1
    %7:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
    %8:vgpr_32 = V_MED3_F32_e64 0, %7, 0, %2, 0, %1, 0, 0, implicit $mode, implicit $exec

    ; should be combined only on subtargets that allow sgpr for src1
    %9:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
    %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %2, 0, %3, 0, 0, implicit $mode, implicit $exec

    ; should be combined only on subtargets that allow inlinable constants for src1
    %11:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
    %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 42, 0, %2, 0, 0, implicit $mode, implicit $exec

    ; should not be combined when literal constants are used
    %13:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
    %14:vgpr_32 = V_MED3_F32_e64 0, %13, 0, 4242, 0, %2, 0, 0, implicit $mode, implicit $exec
...
---

# Regression test for src_modifiers on base u16 opcode
# GCN-LABEL: name: vop3_u16
# GCN: %5:vgpr_32 = V_ADD_NC_U16_e64_dpp %3, 0, %1, 0, %3, 0, 0, 1, 15, 15, 1, implicit $exec
# GCN: %7:vgpr_32 = V_ADD_NC_U16_e64_dpp %3, 1, %5, 2, %5, 0, 0, 1, 15, 15, 1, implicit $exec
# GCN: %9:vgpr_32 = V_ADD_NC_U16_e64 4, %8, 8, %7, 0, 0, implicit $exec
name: vop3_u16
tracksRegLiveness: true
body:             |
  bb.0:
    liveins: $vgpr0, $vgpr1, $vgpr2

    %0:vgpr_32 = COPY $vgpr0
    %1:vgpr_32 = COPY $vgpr1
    %2:vgpr_32 = COPY $vgpr2
    %3:vgpr_32 = IMPLICIT_DEF
    %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
    %5:vgpr_32 = V_ADD_NC_U16_e64 0, %4, 0, %3, 0, 0, implicit $exec
    %6:vgpr_32 = V_MOV_B32_dpp %3, %5, 1, 15, 15, 1, implicit $exec
    %7:vgpr_32 = V_ADD_NC_U16_e64 1, %6, 2, %5, 0, 0, implicit $exec
    %8:vgpr_32 = V_MOV_B32_dpp %3, %7, 1, 15, 15, 1, implicit $exec
    %9:vgpr_32 = V_ADD_NC_U16_e64 4, %8, 8, %7, 0, 0, implicit $exec
...

name: vop3p
tracksRegLiveness: true
body:             |
  bb.0:
    liveins: $vgpr0, $vgpr1, $vgpr2

    ; GCN-LABEL: name: vop3p
    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2
    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
    ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
    ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
    ; GCN: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
    ; GCN: [[V_DOT2_F32_F16_:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16 0, [[V_MOV_B32_dpp]], 0, [[COPY]], 0, [[COPY2]], 0, 5, 0, 0, 0, implicit $mode, implicit $exec
    ; GCN: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
    ; GCN: [[V_DOT2_F32_F16_1:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16 0, [[V_MOV_B32_dpp1]], 0, [[COPY]], 0, [[COPY2]], 0, 0, 4, 0, 0, implicit $mode, implicit $exec
    ; GCN: [[V_DOT2_F32_F16_dpp:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16_dpp [[DEF]], 10, [[COPY1]], 8, [[COPY]], 9, [[COPY2]], 1, 0, 7, 4, 5, 1, 15, 15, 1, implicit $mode, implicit $exec
    ; GCN: [[V_FMA_MIX_F32_dpp:%[0-9]+]]:vgpr_32 = V_FMA_MIX_F32_dpp [[DEF]], 8, [[COPY1]], 8, [[COPY]], 8, [[COPY2]], 1, 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec
    ; GCN: [[V_FMA_MIXLO_F16_dpp:%[0-9]+]]:vgpr_32 = V_FMA_MIXLO_F16_dpp [[DEF]], 8, [[COPY1]], 8, [[COPY]], 8, [[COPY2]], 0, [[COPY2]], 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec
    ; GCN: [[V_FMA_MIXHI_F16_dpp:%[0-9]+]]:vgpr_32 = V_FMA_MIXHI_F16_dpp [[DEF]], 8, [[COPY1]], 8, [[COPY]], 8, [[COPY2]], 1, [[COPY]], 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec
    %0:vgpr_32 = COPY $vgpr0
    %1:vgpr_32 = COPY $vgpr1
    %2:vgpr_32 = COPY $vgpr2
    %3:vgpr_32 = IMPLICIT_DEF

    ; this should not be combined because op_sel is not zero
    %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
    %5:vgpr_32 = V_DOT2_F32_F16 0, %4, 0, %0, 0, %2, 0, 5, 0, 0, 0, implicit $mode, implicit $exec

    ; this should not be combined because op_sel_hi is not all set
    %6:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
    %7:vgpr_32 = V_DOT2_F32_F16 0, %6, 0, %0, 0, %2, 0, 0, 4, 0, 0, implicit $mode, implicit $exec

    %8:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
    %9:vgpr_32 = V_DOT2_F32_F16 10, %8, 8, %0, 9, %2, 1, 0, 7, 4, 5, implicit $mode, implicit $exec

    %10:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
    %11:vgpr_32 = V_FMA_MIX_F32 8, %10, 8, %0, 8, %2, 1, 0, 7, implicit $mode, implicit $exec

    %12:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
    %13:vgpr_32 = V_FMA_MIXLO_F16 8, %12, 8, %0, 8, %2, 0, %2, 0, 7, implicit $mode, implicit $exec

    %14:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
    %15:vgpr_32 = V_FMA_MIXHI_F16 8, %14, 8, %0, 8, %2, 1, %0, 0, 7, implicit $mode, implicit $exec

...

# GCN-LABEL: name: fmac_e64
# GCN: %5:vgpr_32 = V_FMAC_F32_e64_dpp %3, 2, %0, 2, %1, 2, %2, 1, 2, 1, 15, 15, 1, implicit $mode, implicit $exec
name: fmac_e64
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $vgpr0, $vgpr1, $vgpr2

    %0:vgpr_32 = COPY $vgpr0
    %1:vgpr_32 = COPY $vgpr1
    %2:vgpr_32 = COPY $vgpr2
    %3:vgpr_32 = IMPLICIT_DEF
    %4:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
    %6:vgpr_32 = V_FMAC_F32_e64 2, %4, 2, %1, 2, %2, 1, 2, implicit $mode, implicit $exec
...

# when the DPP source isn't a src0 operand the operation should be commuted if possible
# GCN-LABEL: name: dpp_commute_shrink
# GCN: %4:vgpr_32 = V_MUL_U32_U24_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
# GCN: %7:vgpr_32 = V_AND_B32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec
# GCN: %10:vgpr_32 = V_MAX_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
# GCN: %13:vgpr_32 = V_MIN_I32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec
# GCN: %16:vgpr_32 = V_SUBREV_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
name: dpp_commute_shrink
tracksRegLiveness: true
body:             |
  bb.0:
    liveins: $vgpr0, $vgpr1

    %0:vgpr_32 = COPY $vgpr0
    %1:vgpr_32 = COPY $vgpr1

    %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
    %4:vgpr_32 = V_MUL_U32_U24_e64 %1, %3, 0, implicit $exec

    %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
    %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec
    %7:vgpr_32 = V_AND_B32_e64 %1, %6, implicit $exec

    %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
    %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec
    %10:vgpr_32 = V_MAX_I32_e64 %1, %9, implicit $exec

    %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
    %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec
    %13:vgpr_32 = V_MIN_I32_e64 %1, %12, implicit $exec

    %14:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %15:vgpr_32 = V_MOV_B32_dpp %14, %0, 1, 14, 15, 0, implicit $exec
    %16:vgpr_32 = V_SUB_U32_e64 %1, %15, 0, implicit $exec

...

# do not combine, dpp arg used twice
# GCN-LABEL: name: dpp_arg_twice
# GCN: %4:vgpr_32 = V_FMA_F32_e64 1, %1, 2, %3, 2, %3, 1, 2, implicit $mode, implicit $exec
# GCN: %6:vgpr_32 = V_FMA_F32_e64 2, %5, 2, %1, 2, %5, 1, 2, implicit $mode, implicit $exec
# GCN: %8:vgpr_32 = V_FMA_F32_e64 2, %7, 2, %7, 2, %1, 1, 2, implicit $mode, implicit $exec
name: dpp_arg_twice
tracksRegLiveness: true
body:             |
  bb.0:
    liveins: $vgpr0, $vgpr1

    %0:vgpr_32 = COPY $vgpr0
    %1:vgpr_32 = COPY $vgpr1
    %2:vgpr_32 = IMPLICIT_DEF

    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
    %4:vgpr_32 = V_FMA_F32_e64 1, %1, 2, %3, 2, %3, 1, 2, implicit $mode, implicit $exec

    %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
    %6:vgpr_32 = V_FMA_F32_e64 2, %5, 2, %1, 2, %5, 1, 2, implicit $mode, implicit $exec

    %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
    %8:vgpr_32 = V_FMA_F32_e64 2, %7, 2, %7, 2, %1, 1, 2, implicit $mode, implicit $exec

...

# when the dpp source isn't a src0 operand the operation should be commuted if possible
# GCN-LABEL: name: dpp_commute_e64
# GCN: %4:vgpr_32  = V_MUL_U32_U24_e64_dpp %1, %0, %1, 1, 1, 14, 15, 0, implicit $exec
# GCN: %7:vgpr_32 = V_FMA_F32_e64_dpp %5, 2, %0, 1, %1, 2, %1, 1, 2, 1, 15, 15, 1, implicit $mode, implicit $exec
# GCN: %10:vgpr_32 = V_SUBREV_U32_e64_dpp %1, %0, %1, 1, 1, 14, 15, 0, implicit $exec
# GCN: %13:vgpr_32, %14:sreg_32_xm0_xexec = V_ADD_CO_U32_e64_dpp %1, %0, %1, 0, 1, 14, 15, 0, implicit $exec
# GCN: %17:vgpr_32, %18:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 5, %16, 0, implicit $exec
name: dpp_commute_e64
tracksRegLiveness: true
body:             |
  bb.0:
    liveins: $vgpr0, $vgpr1

    %0:vgpr_32 = COPY $vgpr0
    %1:vgpr_32 = COPY $vgpr1

    %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
    %4:vgpr_32 = V_MUL_U32_U24_e64 %1, %3, 1, implicit $exec

    %5:vgpr_32 = IMPLICIT_DEF
    %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 15, 1, implicit $exec
    %7:vgpr_32 = V_FMA_F32_e64 1, %1, 2, %6, 2, %1, 1, 2, implicit $mode, implicit $exec

    %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec
    %10:vgpr_32 = V_SUB_U32_e64 %1, %9, 1, implicit $exec

    %11:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 14, 15, 0, implicit $exec
    %13:vgpr_32, %14:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1, %12, 0, implicit $exec

    ; this cannot be combined because immediate as src0 isn't commutable
    %15:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %16:vgpr_32 = V_MOV_B32_dpp %15, %0, 1, 14, 15, 0, implicit $exec
    %17:vgpr_32, %18:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 5, %16, 0, implicit $exec
...

---

# check for floating point modifiers
# GCN-LABEL: name: add_f32_e64
# GCN:   %4:vgpr_32 = V_ADD_F32_e64_dpp %2, 0, %1, 0, %0, 0, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
# GCN:   %6:vgpr_32 = V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
# GCN:   %8:vgpr_32 = V_ADD_F32_dpp %2, 1, %1, 2, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
# GCN:   %10:vgpr_32 = V_ADD_F32_e64_dpp %2, 4, %1, 8, %0, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec

name: add_f32_e64
tracksRegLiveness: true
body:             |
  bb.0:
    liveins: $vgpr0, $vgpr1

    %0:vgpr_32 = COPY $vgpr0
    %1:vgpr_32 = COPY $vgpr1
    %2:vgpr_32 = IMPLICIT_DEF

    ; this should be combined as e64
    %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
    %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec

    ; this should be combined and shrunk as all modifiers are default
    %5:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
    %6:vgpr_32 = V_ADD_F32_e64 0, %5, 0, %0, 0, 0, implicit $mode, implicit $exec

    ; this should be combined and shrunk as modifiers other than abs|neg are default
    %7:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
    %8:vgpr_32 = V_ADD_F32_e64 1, %7, 2, %0, 0, 0, implicit $mode, implicit $exec

    ; this should be combined as e64
    %9:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
    %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec
...

# check for e64 modifiers
# GCN-LABEL: name: add_u32_e64
# GCN: %4:vgpr_32 = V_ADD_U32_dpp %2, %0, %1, 1, 15, 15, 1, implicit $exec
# GCN: %6:vgpr_32 = V_ADD_U32_e64_dpp %2, %0, %1, 1, 1, 15, 15, 1, implicit $exec

name: add_u32_e64
tracksRegLiveness: true
body:             |
  bb.0:
    liveins: $vgpr0, $vgpr1

    %0:vgpr_32 = COPY $vgpr0
    %1:vgpr_32 = COPY $vgpr1
    %2:vgpr_32 = IMPLICIT_DEF

    ; this should be combined  and shrunk as all modifiers are default
    %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
    %4:vgpr_32 = V_ADD_U32_e64 %3, %1, 0, implicit $exec

    ; this should be combined as _e64
    %5:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
    %6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec
...

# tests on sequences of dpp consumers
# GCN-LABEL: name: dpp_seq
# GCN: %4:vgpr_32 = V_ADD_U32_dpp %1, %0, %1, 1, 14, 15, 0,  implicit $exec
# GCN: %5:vgpr_32 = V_SUBREV_U32_dpp %1, %0, %1, 1, 14, 15, 0,  implicit $exec
# GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
# broken sequence:
# GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec

name: dpp_seq
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $vgpr0, $vgpr1
    %0:vgpr_32 = COPY $vgpr0
    %1:vgpr_32 = COPY $vgpr1
    %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec

    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
    %4:vgpr_32 = V_ADD_U32_e32 %3, %1,  implicit $exec
    %5:vgpr_32 = V_SUB_U32_e32 %1, %3,  implicit $exec
    %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec

    %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
    %8:vgpr_32 = V_ADD_U32_e32 %7, %1,  implicit $exec
    ; this breaks the sequence
    %9:vgpr_32 = V_SUB_U32_e32 5, %7,  implicit $exec
...

# tests on sequences of dpp consumers followed by control flow
# GCN-LABEL: name: dpp_seq_cf
# GCN: %4:vgpr_32 = V_ADD_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
# GCN: %5:vgpr_32 = V_SUBREV_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
# GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec

name: dpp_seq_cf
tracksRegLiveness: true
body: |
  bb.0:
    successors: %bb.1, %bb.2
    liveins: $vgpr0, $vgpr1
    %0:vgpr_32 = COPY $vgpr0
    %1:vgpr_32 = COPY $vgpr1
    %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec

    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
    %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
    %5:vgpr_32 = V_SUB_U32_e32 %1, %3, implicit $exec
    %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec

    %7:sreg_32 = V_CMP_EQ_U32_e64 %5, %6, implicit $exec
    %8:sreg_32 = SI_IF %7, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
    S_BRANCH %bb.1

  bb.1:
    successors: %bb.2

  bb.2:
    SI_END_CF %8, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
...

# GCN-LABEL: name: old_in_diff_bb
# GCN: %4:vgpr_32 = V_ADD_U32_dpp %0, %1, %0, 1, 1, 1, 0, implicit $exec

name: old_in_diff_bb
tracksRegLiveness: true
body: |
  bb.0:
    successors: %bb.1
    liveins: $vgpr0, $vgpr1

    %0:vgpr_32 = COPY $vgpr0
    %1:vgpr_32 = COPY $vgpr1
    %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    S_BRANCH %bb.1

  bb.1:
    %3:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 1, 1, 0, implicit $exec
    %4:vgpr_32 = V_ADD_U32_e32 %3, %0, implicit $exec
...

# old reg def is in diff BB but bound_ctrl:1 - can combine
# GCN-LABEL: name: old_in_diff_bb_bctrl_zero
# GCN: %4:vgpr_32 = V_ADD_U32_dpp {{%[0-9]}}, %0, %1, 1, 15, 15, 1, implicit $exec

name: old_in_diff_bb_bctrl_zero
tracksRegLiveness: true
body: |
  bb.0:
    successors: %bb.1
    liveins: $vgpr0, $vgpr1

    %0:vgpr_32 = COPY $vgpr0
    %1:vgpr_32 = COPY $vgpr1
    %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    S_BRANCH %bb.1

  bb.1:
    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
    %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
...

# EXEC mask changed between def and use - cannot combine
# GCN-LABEL: name: exec_changed
# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec

name: exec_changed
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $vgpr0, $vgpr1

    %0:vgpr_32 = COPY $vgpr0
    %1:vgpr_32 = COPY $vgpr1
    %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
    %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
    %5:sreg_64 = COPY $exec, implicit-def $exec
    %6:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
...

# test if $old definition is correctly tracked through subreg manipulation pseudos

# GCN-LABEL: name: mul_old_subreg
# GCN: %7:vgpr_32 = V_MUL_I32_I24_dpp %0.sub1, %1, %0.sub1, 1, 1, 1, 0, implicit $exec

name:            mul_old_subreg
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $vgpr0, $vgpr1

    %0:vreg_64 = COPY $vgpr0
    %1:vgpr_32 = COPY $vgpr1
    %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
    %3:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
    %5:vreg_64 = INSERT_SUBREG %4, %1, %subreg.sub1 ; %5.sub0 is taken from %4
    %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, %1, 1, 1, 1, 0, implicit $exec
    %7:vgpr_32 = V_MUL_I32_I24_e32 %6, %0.sub1, implicit $exec
...

# GCN-LABEL: name: add_old_subreg
# GCN: %5:vgpr_32 = V_ADD_U32_dpp %0.sub1, %1, %0.sub1, 1, 1, 1, 0, implicit $exec

name:            add_old_subreg
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $vgpr0, $vgpr1

    %0:vreg_64 = COPY $vgpr0
    %1:vgpr_32 = COPY $vgpr1
    %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %3:vreg_64 = INSERT_SUBREG %0, %2, %subreg.sub1 ; %3.sub1 is inserted
    %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 1, 1, 0, implicit $exec
    %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec
...

# GCN-LABEL: name: add_old_subreg_undef
# GCN: %5:vgpr_32 = V_ADD_U32_dpp undef %3.sub1, %1, %0.sub1, 1, 15, 15, 1, implicit $exec

name:            add_old_subreg_undef
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $vgpr0, $vgpr1

    %0:vreg_64 = COPY $vgpr0
    %1:vgpr_32 = COPY $vgpr1
    %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
    %3:vreg_64 = REG_SEQUENCE %2, %subreg.sub0 ; %3.sub1 is undef
    %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 15, 15, 1, implicit $exec
    %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec
...

# Test instruction which does not have modifiers in VOP1 form but does in DPP form.
# GCN-LABEL: name: dpp_vop1
# GCN: %3:vgpr_32 = V_CEIL_F32_dpp %0, 0, undef %2:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec
name: dpp_vop1
tracksRegLiveness: true
body: |
  bb.0:
    %1:vgpr_32 = IMPLICIT_DEF
    %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
    %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec
...

# Test instruction which does not have modifiers in VOP2 form but does in DPP form.
# GCN-LABEL: name: dpp_min
# GCN: %3:vgpr_32 = V_MIN_F32_dpp %0, 0, undef %2:vgpr_32, 0, undef %4:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec
name: dpp_min
tracksRegLiveness: true
body: |
  bb.0:
    %1:vgpr_32 = IMPLICIT_DEF
    %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
    %4:vgpr_32 = V_MIN_F32_e32 %2, undef %3:vgpr_32, implicit $mode, implicit $exec
...

# Test an undef old operand
# GCN-LABEL: name: dpp_undef_old
# GCN: %3:vgpr_32 = V_CEIL_F32_dpp undef %1:vgpr_32, 0, undef %2:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec
name: dpp_undef_old
tracksRegLiveness: true
body: |
  bb.0:
    %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
    %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec
...

# Do not combine a dpp mov which writes a physreg.
# GCN-LABEL: name: phys_dpp_mov_dst
# GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
# GCN: %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec
name: phys_dpp_mov_dst
tracksRegLiveness: true
body: |
  bb.0:
    $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
    %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec
...

# Do not combine a dpp mov which reads a physreg.
# GCN-LABEL: name: phys_dpp_mov_old_src
# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec
name: phys_dpp_mov_old_src
tracksRegLiveness: true
body: |
  bb.0:
    %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
    %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec
...

# Do not combine a dpp mov which reads a physreg.
# GCN-LABEL: name: phys_dpp_mov_src
# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec
# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec
name: phys_dpp_mov_src
tracksRegLiveness: true
body: |
  bb.0:
    %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec
    %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec
...

# GCN-LABEL: name: dpp_reg_sequence_both_combined
# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
# GCN: %9:vgpr_32 = IMPLICIT_DEF
# GCN: %8:vgpr_32 = IMPLICIT_DEF
# GCN: %6:vgpr_32 = V_ADD_U32_dpp %9, %1.sub0, %2, 1, 15, 15, 1, implicit $exec
# GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
name: dpp_reg_sequence_both_combined
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo

    %0:vreg_64 = COPY $vgpr0_vgpr1
    %1:vreg_64 = COPY $vgpr2_vgpr3
    %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
    %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
    %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
...

# GCN-LABEL: name: dpp_reg_sequence_first_combined
# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
# GCN: %8:vgpr_32 = IMPLICIT_DEF
# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
# GCN: %5:vreg_64 = REG_SEQUENCE undef %3:vgpr_32, %subreg.sub0, %4, %subreg.sub1
# GCN: %6:vgpr_32 = V_ADD_U32_dpp %8, %1.sub0, %2, 1, 15, 15, 1, implicit $exec
# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
name: dpp_reg_sequence_first_combined
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo

    %0:vreg_64 = COPY $vgpr0_vgpr1
    %1:vreg_64 = COPY $vgpr2_vgpr3
    %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
    %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
    %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
...

# GCN-LABEL: name: dpp_reg_sequence_second_combined
# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
# GCN: %8:vgpr_32 = IMPLICIT_DEF
# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, undef %4:vgpr_32, %subreg.sub1
# GCN: %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec
# GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
name: dpp_reg_sequence_second_combined
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo

    %0:vreg_64 = COPY $vgpr0_vgpr1
    %1:vreg_64 = COPY $vgpr2_vgpr3
    %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
    %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
    %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
...

# GCN-LABEL: name: dpp_reg_sequence_none_combined
# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
# GCN: %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec
# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
name: dpp_reg_sequence_none_combined
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo

    %0:vreg_64 = COPY $vgpr0_vgpr1
    %1:vreg_64 = COPY $vgpr2_vgpr3
    %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
    %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
    %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
...

# GCN-LABEL: name: dpp_reg_sequence_exec_changed
# GCN:   %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN:   %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN:   %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
# GCN:   %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
# GCN:   %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
# GCN:   %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
# GCN:   S_BRANCH %bb.1
# GCN: bb.1:
# GCN:   %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec
# GCN:   %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
name: dpp_reg_sequence_exec_changed
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo

    %0:vreg_64 = COPY $vgpr0_vgpr1
    %1:vreg_64 = COPY $vgpr2_vgpr3
    %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
    S_BRANCH %bb.1

  bb.1:
    liveins: $vcc_lo
    %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
    %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
...

# GCN-LABEL: name: dpp_reg_sequence_subreg
# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
# GCN: %6:vreg_64 = REG_SEQUENCE %5.sub0, %subreg.sub0, %5.sub1, %subreg.sub1
# GCN: %7:vgpr_32 = V_ADD_U32_e32 %6.sub0, %2, implicit $exec
# GCN: %8:vgpr_32 = V_ADDC_U32_e32 %6.sub1, %2, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
name: dpp_reg_sequence_subreg
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo

    %0:vreg_64 = COPY $vgpr0_vgpr1
    %1:vreg_64 = COPY $vgpr2_vgpr3
    %8:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
    %5:vreg_64 = REG_SEQUENCE %4.sub0, %subreg.sub0, %4.sub1, %subreg.sub1
    %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %8, implicit $exec
    %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %8, implicit-def $vcc, implicit $vcc, implicit $exec
...

# GCN-LABEL: name: dpp_reg_sequence_src2_reject
#GCN: %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
#GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
#GCN: %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
#GCN: %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
#GCN: %6:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub0, 1, 2, implicit $mode, implicit $exec
#GCN: %7:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub1, 1, 2, implicit $mode, implicit $exec
name: dpp_reg_sequence_src2_reject
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3

    %0:vreg_64 = COPY $vgpr0_vgpr1
    %1:vreg_64 = COPY $vgpr2_vgpr3
    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
    %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
    ; use of dpp arg as src2, reject
    %6:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub0, 1, 2, implicit $mode, implicit $exec
    ; cannot commute src0 and src2, and %4.sub0 already rejected, reject
    %7:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub1, 1, 2, implicit $mode, implicit $exec
...

# GCN-LABEL: name: dpp_reg_sequence_src2
#GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
#GCN: %4:vreg_64 = REG_SEQUENCE undef %2:vgpr_32, %subreg.sub0, %3, %subreg.sub1
#GCN: %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
#GCN: %6:vgpr_32 = V_FMA_F32_e64_dpp %8, 2, %1.sub0, 2, %5, 2, %4.sub1, 1, 2, 1, 15, 15, 1, implicit $mode, implicit $exec
name: dpp_reg_sequence_src2
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3

    %0:vreg_64 = COPY $vgpr0_vgpr1
    %1:vreg_64 = COPY $vgpr2_vgpr3
    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
    %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
    %6:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub1, 1, 2, implicit $mode, implicit $exec
...

# GCN-LABEL: name: dpp64_add64_impdef
# GCN: %3:vgpr_32 = V_ADD_U32_dpp %1.sub0, %0.sub0, undef %4:vgpr_32, 1, 15, 15, 1, implicit $exec
# GCN: %5:vgpr_32 = V_ADDC_U32_dpp %1.sub1, %0.sub1, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
name: dpp64_add64_impdef
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $vcc_lo
    %0:vreg_64 = IMPLICIT_DEF
    %1:vreg_64 = IMPLICIT_DEF
    %2:vreg_64 = V_MOV_B64_DPP_PSEUDO %1:vreg_64, %0:vreg_64, 1, 15, 15, 1, implicit $exec
    %5:vgpr_32 = V_ADD_U32_e32 %2.sub0, undef %4:vgpr_32, implicit $exec
    %6:vgpr_32 = V_ADDC_U32_e32 %2.sub1, undef %4, implicit-def $vcc, implicit $vcc, implicit $exec
...

# GCN-LABEL: name:  dpp64_add64_undef
# GCN: %3:vgpr_32 = V_ADD_U32_dpp undef %1.sub0:vreg_64, undef %2.sub0:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit $exec
# GCN: %5:vgpr_32 = V_ADDC_U32_dpp undef %1.sub1:vreg_64, undef %2.sub1:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
name: dpp64_add64_undef
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $vcc_lo
    %2:vreg_64 = V_MOV_B64_DPP_PSEUDO undef %1:vreg_64, undef %0:vreg_64, 1, 15, 15, 1, implicit $exec
    %5:vgpr_32 = V_ADD_U32_e32 %2.sub0, undef %4:vgpr_32, implicit $exec
    %6:vgpr_32 = V_ADDC_U32_e32 %2.sub1, undef %4, implicit-def $vcc, implicit $vcc, implicit $exec
...


# GCN-LABEL: name: cndmask_with_src2
# GCN: %5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec
# GCN: %8:vgpr_32 = V_CNDMASK_B32_e64_dpp %2, 4, %1, 0, %1, %7, 1, 15, 15, 1, implicit $exec
name: cndmask_with_src2
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $vgpr0, $vgpr1
    %0:vgpr_32 = COPY $vgpr0
    %1:vgpr_32 = COPY $vgpr1
    %2:vgpr_32 = IMPLICIT_DEF

    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
    %4:sreg_32_xm0_xexec = IMPLICIT_DEF
    %5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec

    ; src2 is legal for _e64
    %6:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 15, 15, 1, implicit $exec
    %7:sreg_32_xm0_xexec = IMPLICIT_DEF
    %8:vgpr_32 = V_CNDMASK_B32_e64 4, %6, 0, %1, %7, implicit $exec
...

---

# Make sure flags aren't dropped
# GCN-LABEL: name: flags_add_f32_e64
# GCN: %4:vgpr_32 = nnan nofpexcept V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
name: flags_add_f32_e64
tracksRegLiveness: true
body:             |
  bb.0:
    liveins: $vgpr0, $vgpr1

    %0:vgpr_32 = COPY $vgpr0
    %1:vgpr_32 = COPY $vgpr1
    %2:vgpr_32 = IMPLICIT_DEF

    %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
    %4:vgpr_32 = nofpexcept nnan V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $mode, implicit $exec
    S_ENDPGM 0, implicit %4

...

# GCN-LABEL: name: dont_combine_more_than_one_operand
# GCN: %3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec
name: dont_combine_more_than_one_operand
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $vgpr0, $vgpr1
    %0:vgpr_32 = COPY $vgpr0
    %1:vgpr_32 = COPY $vgpr1
    %2:vgpr_32 = V_MOV_B32_dpp %0, %1, 1, 15, 15, 1, implicit $exec
    %3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec
...

# GCN-LABEL: name: dont_combine_more_than_one_operand_dpp_reg_sequence
# GCN: %5:vgpr_32 = V_ADD_U32_e32 %4.sub0, %4.sub0, implicit $exec
# GCN: %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
name: dont_combine_more_than_one_operand_dpp_reg_sequence
tracksRegLiveness: true
body: |
  bb.0:
    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo
    %0:vreg_64 = COPY $vgpr0_vgpr1
    %1:vreg_64 = COPY $vgpr2_vgpr3
    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
    %5:vgpr_32 = V_ADD_U32_e32 %4.sub0, %4.sub0, implicit $exec
    %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec
...

# Check op_sel is all 0s when combining
# GCN-LABEL: name: opsel_vop3
# GCN: %4:vgpr_32 = V_ADD_I16_e64_dpp %2, 0, %0, 0, %1, 0, 0, 1, 15, 15, 1, implicit $exec
# GCN: %6:vgpr_32 = V_ADD_I16_e64 4, %5, 0, %1, 0, 0, implicit $exec
# GCN: %8:vgpr_32 = V_ADD_I16_e64 0, %7, 4, %1, 0, 0, implicit $exec
# GCN: %10:vgpr_32 = V_ADD_I16_e64 4, %9, 4, %1, 0, 0, implicit $exec
# GCN: %12:vgpr_32 = V_ADD_I16_e64 8, %11, 0, %1, 0, 0, implicit $exec
name:            opsel_vop3
tracksRegLiveness: true
body:             |
  bb.0:
    liveins: $vgpr0, $vgpr1

    %0:vgpr_32 = COPY $vgpr0
    %1:vgpr_32 = COPY $vgpr1
    %2:vgpr_32 = IMPLICIT_DEF

    ; Combine for op_sel:[0,0,0]
    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
    %4:vgpr_32 = V_ADD_I16_e64 0, %3, 0, %1, 0, 0, implicit $exec

    ; Do not combine for op_sel:[1,0,0]
    %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
    %6:vgpr_32 = V_ADD_I16_e64 4, %5, 0, %1, 0, 0, implicit $exec

    ; Do not combine for op_sel:[0,1,0]
    %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
    %8:vgpr_32 = V_ADD_I16_e64 0, %7, 4, %1, 0, 0, implicit $exec

    ; Do not combine for op_sel:[1,1,0]
    %9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
    %10:vgpr_32 = V_ADD_I16_e64 4, %9, 4, %1, 0, 0, implicit $exec

    ; Do not combine for op_sel:[0,0,1] (dst_op_sel only)
    %11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
    %12:vgpr_32 = V_ADD_I16_e64 8, %11, 0, %1, 0, 0, implicit $exec
...

# Check op_sel is all 0s and op_sel_hi is all 1s when combining
# GCN-LABEL: name: opsel_vop3p
# GCN: %5:vgpr_32 = V_FMA_MIX_F32 0, %4, 0, %1, 0, %2, 0, 0, 0, implicit $mode, implicit $exec
# GCN: %7:vgpr_32 = V_FMA_MIX_F32 4, %6, 4, %1, 4, %2, 0, 0, 0, implicit $mode, implicit $exec
# GCN: %9:vgpr_32 = V_FMA_MIX_F32_dpp %3, 8, %0, 8, %1, 8, %2, 0, 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec
# GCN: %11:vgpr_32 = V_FMA_MIX_F32 12, %10, 12, %1, 12, %2, 0, 0, 0, implicit $mode, implicit $exec

name: opsel_vop3p
tracksRegLiveness: true
body:             |
  bb.0:
    liveins: $vgpr0, $vgpr1, $vgpr2

    %0:vgpr_32 = COPY $vgpr0
    %1:vgpr_32 = COPY $vgpr1
    %2:vgpr_32 = COPY $vgpr2
    %3:vgpr_32 = IMPLICIT_DEF

    ; Do not combine for op_sel:[0,0,0] op_sel_hi:[0,0,0]
    %4:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
    %5:vgpr_32 = V_FMA_MIX_F32 0, %4, 0, %1, 0, %2, 0, 0, 0, implicit $mode, implicit $exec

    ; Do not combine for op_sel:[1,1,1] op_sel_hi:[0,0,0]
    %6:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
    %7:vgpr_32 = V_FMA_MIX_F32 4, %6, 4, %1, 4, %2, 0, 0, 0, implicit $mode, implicit $exec

    ; Combine for op_sel:[0,0,0] op_sel_hi:[1,1,1]
    %8:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
    %9:vgpr_32 = V_FMA_MIX_F32 8, %8, 8, %1, 8, %2, 0, 0, 0, implicit $mode, implicit $exec

    ; Do not combine for op_sel:[1,1,1] op_sel_hi:[1,1,1]
    %10:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
    %11:vgpr_32 = V_FMA_MIX_F32 12, %10, 12, %1, 12, %2, 0, 0, 0, implicit $mode, implicit $exec
...