llvm/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=SDWA %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=SDWA %s
# RUN: llc -mtriple=amdgcn -mcpu=fiji -passes=si-peephole-sdwa -o - %s | FileCheck -check-prefix=SDWA %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes=si-peephole-sdwa -o - %s | FileCheck -check-prefix=SDWA %s
---
name:            add_f16_u32_preserve
tracksRegLiveness: true
registers:
  - { id: 0, class: vreg_64 }
  - { id: 1, class: vreg_64 }
  - { id: 2, class: sreg_64 }
  - { id: 3, class: vgpr_32 }
  - { id: 4, class: vgpr_32 }
  - { id: 5, class: vgpr_32 }
  - { id: 6, class: vgpr_32 }
  - { id: 7, class: vgpr_32 }
  - { id: 8, class: vgpr_32 }
  - { id: 9, class: vgpr_32 }
  - { id: 10, class: vgpr_32 }
  - { id: 11, class: vgpr_32 }
  - { id: 12, class: vgpr_32 }
  - { id: 13, class: vgpr_32 }
body:             |
  bb.0:
    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31

    ; SDWA-LABEL: name: add_f16_u32_preserve
    ; SDWA: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
    ; SDWA-NEXT: {{  $}}
    ; SDWA-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
    ; SDWA-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
    ; SDWA-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
    ; SDWA-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
    ; SDWA-NEXT: [[FLAT_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
    ; SDWA-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 65535, [[FLAT_LOAD_DWORD]], implicit $exec
    ; SDWA-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[FLAT_LOAD_DWORD1]], implicit $exec
    ; SDWA-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[FLAT_LOAD_DWORD]], 8, 8, implicit $exec
    ; SDWA-NEXT: [[V_LSHRREV_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 24, [[FLAT_LOAD_DWORD1]], implicit $exec
    ; SDWA-NEXT: [[V_MUL_F32_sdwa:%[0-9]+]]:vgpr_32 = V_MUL_F32_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 5, 0, 1, 3, implicit $mode, implicit $exec
    ; SDWA-NEXT: [[V_ADD_F16_sdwa:%[0-9]+]]:vgpr_32 = V_ADD_F16_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 1, 2, 4, 5, implicit $mode, implicit $exec, implicit [[V_MUL_F32_sdwa]](tied-def 0)
    ; SDWA-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_ADD_F16_sdwa]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
    ; SDWA-NEXT: $sgpr30_sgpr31 = COPY [[COPY]]
    ; SDWA-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
    %2 = COPY $sgpr30_sgpr31
    %1 = COPY $vgpr2_vgpr3
    %0 = COPY $vgpr0_vgpr1
    %3 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
    %4 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))

    %5 = V_AND_B32_e32 65535, %3, implicit $exec
    %6 = V_LSHRREV_B32_e64 16, %4, implicit $exec
    %7 = V_BFE_U32_e64 %3, 8, 8, implicit $exec
    %8 = V_LSHRREV_B32_e32 24, %4, implicit $exec

    %9 = V_ADD_F16_e64 0, %5, 0, %6, 0, 0, implicit $mode, implicit $exec
    %10 = V_LSHLREV_B16_e64 8, %9, implicit $exec
    %11 = V_MUL_F32_e64 0, %7, 0, %8, 0, 0, implicit $mode, implicit $exec
    %12 = V_LSHLREV_B32_e64 16, %11, implicit $exec

    %13 = V_OR_B32_e64 %10, %12, implicit $exec

    FLAT_STORE_DWORD %0, %13, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
    $sgpr30_sgpr31 = COPY %2
    S_SETPC_B64_return $sgpr30_sgpr31
...
---
name:            sdwa_preserve_keep
tracksRegLiveness: true
registers:
  - { id: 0, class: vreg_64 }
  - { id: 1, class: vreg_64 }
  - { id: 2, class: sreg_64 }
  - { id: 3, class: vgpr_32 }
  - { id: 4, class: vgpr_32 }
  - { id: 5, class: sreg_32_xm0_xexec }
  - { id: 6, class: vgpr_32 }
  - { id: 7, class: vgpr_32 }
  - { id: 8, class: sreg_32_xm0 }
  - { id: 9, class: vgpr_32 }
  - { id: 10, class: sreg_32_xm0 }
  - { id: 11, class: vgpr_32 }
  - { id: 17, class: vgpr_32 }
body:             |
  bb.0:
    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31

    ; SDWA-LABEL: name: sdwa_preserve_keep
    ; SDWA: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
    ; SDWA-NEXT: {{  $}}
    ; SDWA-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
    ; SDWA-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
    ; SDWA-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
    ; SDWA-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
    ; SDWA-NEXT: [[FLAT_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
    ; SDWA-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 8, [[FLAT_LOAD_DWORD]], implicit $exec
    ; SDWA-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 255
    ; SDWA-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[FLAT_LOAD_DWORD]], killed [[S_MOV_B32_]], implicit $exec
    ; SDWA-NEXT: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[FLAT_LOAD_DWORD1]], 0, 5, 2, 4, implicit $exec, implicit [[V_AND_B32_e64_]](tied-def 0)
    ; SDWA-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_MOV_B32_sdwa]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
    ; SDWA-NEXT: S_ENDPGM 0
    %2 = COPY $sgpr30_sgpr31
    %1 = COPY $vgpr2_vgpr3
    %0 = COPY $vgpr0_vgpr1
    %3 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
    %4 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))

    %9:vgpr_32 = V_LSHRREV_B16_e64 8, %3, implicit $exec
    %10:sreg_32_xm0 = S_MOV_B32 255
    %11:vgpr_32 = V_AND_B32_e64 %3, killed %10, implicit $exec
    %17:vgpr_32 = V_MOV_B32_sdwa 0, %4, 0, 5, 2, 4, implicit $exec, implicit %11(tied-def 0)
    FLAT_STORE_DWORD %0, %17, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
    S_ENDPGM 0

...
---
name:            sdwa_preserve_remove
tracksRegLiveness: true
registers:
  - { id: 0, class: vreg_64 }
  - { id: 1, class: vreg_64 }
  - { id: 2, class: sreg_64 }
  - { id: 3, class: vgpr_32 }
  - { id: 4, class: vgpr_32 }
  - { id: 5, class: sreg_32_xm0_xexec }
  - { id: 6, class: vgpr_32 }
  - { id: 7, class: vgpr_32 }
  - { id: 8, class: sreg_32_xm0 }
  - { id: 9, class: vgpr_32 }
  - { id: 10, class: sreg_32_xm0 }
  - { id: 11, class: vgpr_32 }
  - { id: 17, class: vgpr_32 }
body:             |
  bb.0:
    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31

    ; SDWA-LABEL: name: sdwa_preserve_remove
    ; SDWA: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
    ; SDWA-NEXT: {{  $}}
    ; SDWA-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
    ; SDWA-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
    ; SDWA-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
    ; SDWA-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
    ; SDWA-NEXT: [[FLAT_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
    ; SDWA-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 8, [[FLAT_LOAD_DWORD]], implicit $exec
    ; SDWA-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 65535
    ; SDWA-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[FLAT_LOAD_DWORD]], killed [[S_MOV_B32_]], implicit $exec
    ; SDWA-NEXT: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[FLAT_LOAD_DWORD1]], 0, 5, 2, 4, implicit $exec, implicit [[FLAT_LOAD_DWORD]](tied-def 0)
    ; SDWA-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_MOV_B32_sdwa]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
    ; SDWA-NEXT: S_ENDPGM 0
    %2 = COPY $sgpr30_sgpr31
    %1 = COPY $vgpr2_vgpr3
    %0 = COPY $vgpr0_vgpr1
    %3 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
    %4 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))

    %9:vgpr_32 = V_LSHRREV_B16_e64 8, %3, implicit $exec
    %10:sreg_32_xm0 = S_MOV_B32 65535
    %11:vgpr_32 = V_AND_B32_e64 %3, killed %10, implicit $exec
    %17:vgpr_32 = V_MOV_B32_sdwa 0, %4, 0, 5, 2, 4, implicit $exec, implicit %11(tied-def 0)
    FLAT_STORE_DWORD %0, %17, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
    S_ENDPGM 0

...
---
name:            add_f16_u32_preserve_different_bb
tracksRegLiveness: true
body:             |
  ; SDWA-LABEL: name: add_f16_u32_preserve_different_bb
  ; SDWA: bb.0:
  ; SDWA-NEXT:   successors: %bb.1(0x80000000)
  ; SDWA-NEXT:   liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
  ; SDWA-NEXT: {{  $}}
  ; SDWA-NEXT:   [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
  ; SDWA-NEXT:   [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
  ; SDWA-NEXT:   [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
  ; SDWA-NEXT:   [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
  ; SDWA-NEXT:   [[FLAT_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
  ; SDWA-NEXT:   [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 65535, [[FLAT_LOAD_DWORD]], implicit $exec
  ; SDWA-NEXT:   [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[FLAT_LOAD_DWORD1]], implicit $exec
  ; SDWA-NEXT:   [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[FLAT_LOAD_DWORD]], 8, 8, implicit $exec
  ; SDWA-NEXT:   [[V_LSHRREV_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 24, [[FLAT_LOAD_DWORD1]], implicit $exec
  ; SDWA-NEXT: {{  $}}
  ; SDWA-NEXT: bb.1:
  ; SDWA-NEXT:   successors: %bb.2(0x80000000)
  ; SDWA-NEXT: {{  $}}
  ; SDWA-NEXT:   [[V_MUL_F32_sdwa:%[0-9]+]]:vgpr_32 = V_MUL_F32_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 5, 0, 1, 3, implicit $mode, implicit $exec
  ; SDWA-NEXT: {{  $}}
  ; SDWA-NEXT: bb.2:
  ; SDWA-NEXT:   [[V_ADD_F16_sdwa:%[0-9]+]]:vgpr_32 = V_ADD_F16_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 1, 2, 4, 5, implicit $mode, implicit $exec, implicit [[V_MUL_F32_sdwa]](tied-def 0)
  ; SDWA-NEXT:   FLAT_STORE_DWORD [[COPY2]], [[V_ADD_F16_sdwa]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
  ; SDWA-NEXT:   $sgpr30_sgpr31 = COPY [[COPY]]
  ; SDWA-NEXT:   S_SETPC_B64_return $sgpr30_sgpr31
  bb.0:
    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31

    %2:sreg_64 = COPY $sgpr30_sgpr31
    %1:vreg_64 = COPY $vgpr2_vgpr3
    %0:vreg_64 = COPY $vgpr0_vgpr1
    %3:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
    %4:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))

    %5:vgpr_32 = V_AND_B32_e32 65535, %3, implicit $exec
    %6:vgpr_32 = V_LSHRREV_B32_e64 16, %4, implicit $exec
    %7:vgpr_32 = V_BFE_U32_e64 %3, 8, 8, implicit $exec
    %8:vgpr_32 = V_LSHRREV_B32_e32 24, %4, implicit $exec

    %9:vgpr_32 = V_ADD_F16_e64 0, %5, 0, %6, 0, 0, implicit $mode, implicit $exec
    %10:vgpr_32 = V_LSHLREV_B16_e64 8, %9, implicit $exec

  bb.1:
    %11:vgpr_32 = V_MUL_F32_e64 0, %7, 0, %8, 0, 0, implicit $mode, implicit $exec
    %12:vgpr_32 = V_LSHLREV_B32_e64 16, %11, implicit $exec

  bb.2:
    %13:vgpr_32 = V_OR_B32_e64 %10, %12, implicit $exec

    FLAT_STORE_DWORD %0, %13, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
    $sgpr30_sgpr31 = COPY %2
    S_SETPC_B64_return $sgpr30_sgpr31
...

# Should not add kill flag to reused ops in SDWAInst

---
name: multiuse_kill
tracksRegLiveness: true

body:             |
  bb.0:
    ; SDWA-LABEL: name: multiuse_kill
    ; SDWA: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
    ; SDWA-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
    ; SDWA-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
    ; SDWA-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, killed [[DEF]], implicit $exec
    ; SDWA-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
    ; SDWA-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 killed [[S_MOV_B32_]], [[DEF1]], implicit $exec
    ; SDWA-NEXT: [[V_OR_B32_sdwa:%[0-9]+]]:vgpr_32 = V_OR_B32_sdwa 0, [[DEF1]], 0, [[V_LSHLREV_B32_e64_]], 0, 6, 0, 4, 6, implicit $exec
    ; SDWA-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, killed [[DEF2]], implicit $exec
    ; SDWA-NEXT: [[V_OR_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_OR_B32_sdwa 0, [[DEF1]], 0, [[V_LSHLREV_B32_e64_1]], 0, 6, 0, 4, 6, implicit $exec
    ; SDWA-NEXT: S_ENDPGM 0
    %0:vgpr_32 = IMPLICIT_DEF
    %1:vgpr_32 = IMPLICIT_DEF
    %2:vgpr_32 = IMPLICIT_DEF
    %3:vgpr_32 = V_LSHLREV_B32_e64 16, killed %0, implicit $exec
    %4:sreg_32 = S_MOV_B32 65535
    %5:vgpr_32 = V_AND_B32_e64 killed %4, killed %1, implicit $exec
    %6:vgpr_32 = V_OR_B32_e64 %5, killed %3, implicit $exec
    %7:vgpr_32 = V_LSHLREV_B32_e64 16, killed %2, implicit $exec
    %8:vgpr_32 = V_OR_B32_e64 %5, killed %7, implicit $exec

    S_ENDPGM 0

...