llvm/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.single.2b.mir

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=machine-scheduler -verify-misched -o - %s | FileCheck -check-prefix=GCN %s

--- |
  define amdgpu_kernel void @single-wave-phase-2b(ptr addrspace(3) noalias %in0, ptr addrspace(3) noalias %in1, ptr addrspace(3) noalias %in2, ptr addrspace(3) noalias %in3, ptr addrspace(3) noalias %in4, ptr addrspace(3) noalias %in5, ptr addrspace(3) noalias %in6, ptr addrspace(3) noalias %in7, ptr addrspace(3) noalias %in8, ptr addrspace(3) noalias %in9, ptr addrspace(3) noalias %in10, ptr addrspace(3) noalias %in11, ptr addrspace(7) noalias %in12, ptr addrspace(7) noalias %in13, ptr addrspace(7) noalias %in14, ptr addrspace(7) noalias %in15, ptr addrspace(7) noalias %in16, ptr addrspace(7) noalias %in17, ptr addrspace(7) noalias %in18, ptr addrspace(7) noalias %in19, ptr addrspace(7) noalias %in20, ptr addrspace(7) noalias %in21, ptr addrspace(7) noalias %in22, ptr addrspace(7) noalias %in23, ptr addrspace(7) noalias %in24, ptr addrspace(7) noalias %in25, ptr addrspace(7) noalias %in26, ptr addrspace(7) noalias %in27, ptr addrspace(7) noalias %in28, ptr addrspace(7) noalias %in29) #0 { ret void }

  attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" }

  !0 = distinct !{!0}
  !1 = !{!1, !0}
...


---
name:            single-wave-phase-2b
tracksRegLiveness: true
machineFunctionInfo:
  occupancy:       1
body:             |
  ; GCN-LABEL: name: single-wave-phase-2b
  ; GCN: bb.0:
  ; GCN-NEXT:   successors: %bb.1(0x80000000)
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT:   [[DEF:%[0-9]+]]:areg_512_align2 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:areg_512_align2 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF3:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF4:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF5:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF32:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF33:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF34:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF35:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF36:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF37:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF38:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF39:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF40:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF41:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF42:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF43:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF44:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF45:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF46:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF47:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF48:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
  ; GCN-NEXT:   [[DEF49:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT: bb.1:
  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT:   [[DS_READ_B128_gfx9_:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF6]], 0, 0, implicit $exec :: (load (s128) from %ir.in0, !alias.scope !0, addrspace 3)
  ; GCN-NEXT:   [[DS_READ_B128_gfx9_1:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 0, 0, implicit $exec :: (load (s128) from %ir.in4, !alias.scope !0, addrspace 3)
  ; GCN-NEXT:   [[DS_READ_B128_gfx9_2:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF6]], 1040, 0, implicit $exec :: (load (s128) from %ir.in1, !alias.scope !0, addrspace 3)
  ; GCN-NEXT:   [[DS_READ_B128_gfx9_3:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 2064, 0, implicit $exec :: (load (s128) from %ir.in5, !alias.scope !0, addrspace 3)
  ; GCN-NEXT:   [[DEF:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_]].sub0_sub1, [[DS_READ_B128_gfx9_1]].sub0_sub1, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
  ; GCN-NEXT:   [[DS_READ_B128_gfx9_4:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF6]], 2080, 0, implicit $exec :: (load (s128) from %ir.in2, !alias.scope !0, addrspace 3)
  ; GCN-NEXT:   [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF8]], implicit $exec
  ; GCN-NEXT:   [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF9]], implicit $exec
  ; GCN-NEXT:   dead [[V_MFMA_F32_32X32X8F16_mac_e64_:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_]].sub2_sub3, [[DS_READ_B128_gfx9_1]].sub2_sub3, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
  ; GCN-NEXT:   [[DS_READ_B128_gfx9_5:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF6]], 3120, 0, implicit $exec :: (load (s128) from %ir.in3, !alias.scope !0, addrspace 3)
  ; GCN-NEXT:   [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF10]], implicit $exec
  ; GCN-NEXT:   [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF11]], implicit $exec
  ; GCN-NEXT:   [[DEF:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_2]].sub0_sub1, [[DS_READ_B128_gfx9_3]].sub0_sub1, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
  ; GCN-NEXT:   [[DS_READ_B128_gfx9_6:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 4128, 0, implicit $exec :: (load (s128) from %ir.in6, !alias.scope !0, addrspace 3)
  ; GCN-NEXT:   [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF12]], implicit $exec
  ; GCN-NEXT:   [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF13]], implicit $exec
  ; GCN-NEXT:   [[DEF:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_2]].sub2_sub3, [[DS_READ_B128_gfx9_3]].sub2_sub3, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
  ; GCN-NEXT:   [[DS_READ_B128_gfx9_7:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 6192, 0, implicit $exec :: (load (s128) from %ir.in7, !alias.scope !0, addrspace 3)
  ; GCN-NEXT:   [[V_ADD_U32_e32_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF14]], implicit $exec
  ; GCN-NEXT:   [[V_ADD_U32_e32_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF15]], implicit $exec
  ; GCN-NEXT:   [[DEF:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_4]].sub0_sub1, [[DS_READ_B128_gfx9_6]].sub0_sub1, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
  ; GCN-NEXT:   [[DS_READ_B128_gfx9_8:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 1024, 0, implicit $exec :: (load (s128) from %ir.in8, !alias.scope !0, addrspace 3)
  ; GCN-NEXT:   [[V_ADD_U32_e32_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF16]], implicit $exec
  ; GCN-NEXT:   [[V_ADD_U32_e32_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF17]], implicit $exec
  ; GCN-NEXT:   [[DEF:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_4]].sub2_sub3, [[DS_READ_B128_gfx9_6]].sub2_sub3, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
  ; GCN-NEXT:   [[DS_READ_B128_gfx9_9:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 3088, 0, implicit $exec :: (load (s128) from %ir.in9, !alias.scope !0, addrspace 3)
  ; GCN-NEXT:   [[V_ADD_U32_e32_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF18]], implicit $exec
  ; GCN-NEXT:   [[V_ADD_U32_e32_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF19]], implicit $exec
  ; GCN-NEXT:   [[DEF:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_5]].sub0_sub1, [[DS_READ_B128_gfx9_7]].sub0_sub1, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
  ; GCN-NEXT:   [[DS_READ_B128_gfx9_10:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 5152, 0, implicit $exec :: (load (s128) from %ir.in10, !alias.scope !0, addrspace 3)
  ; GCN-NEXT:   [[V_ADD_U32_e32_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF20]], implicit $exec
  ; GCN-NEXT:   [[V_ADD_U32_e32_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF21]], implicit $exec
  ; GCN-NEXT:   [[DEF:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_5]].sub2_sub3, [[DS_READ_B128_gfx9_7]].sub2_sub3, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
  ; GCN-NEXT:   [[DS_READ_B128_gfx9_11:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 7216, 0, implicit $exec :: (load (s128) from %ir.in11, !alias.scope !0, addrspace 3)
  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_]].sub0_sub1, [[DS_READ_B128_gfx9_8]].sub0_sub1, [[DEF1]], 0, 0, 0, implicit $mode, implicit $exec
  ; GCN-NEXT:   undef [[V_PERM_B32_e64_:%[0-9]+]].sub1:vreg_128_align2 = V_PERM_B32_e64 [[DEF28]], [[DEF29]], [[DEF44]], implicit $exec
  ; GCN-NEXT:   [[V_PERM_B32_e64_:%[0-9]+]].sub0:vreg_128_align2 = V_PERM_B32_e64 [[DEF30]], [[DEF31]], [[DEF44]], implicit $exec
  ; GCN-NEXT:   [[V_PERM_B32_e64_:%[0-9]+]].sub3:vreg_128_align2 = V_PERM_B32_e64 [[DEF24]], [[DEF25]], [[DEF44]], implicit $exec
  ; GCN-NEXT:   [[V_PERM_B32_e64_:%[0-9]+]].sub2:vreg_128_align2 = V_PERM_B32_e64 [[DEF26]], [[DEF27]], [[DEF44]], implicit $exec
  ; GCN-NEXT:   DS_WRITE_B128_gfx9 [[DEF40]], [[V_PERM_B32_e64_]], 0, 0, implicit $exec :: (store (s128) into %ir.in0, !alias.scope !0, addrspace 3)
  ; GCN-NEXT:   [[DEF31:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in12, !alias.scope !0, addrspace 7)
  ; GCN-NEXT:   [[DEF30:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_1]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in13, !alias.scope !0, addrspace 7)
  ; GCN-NEXT:   [[DEF29:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_2]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in14, !alias.scope !0, addrspace 7)
  ; GCN-NEXT:   [[DEF28:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_3]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in15, !alias.scope !0, addrspace 7)
  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_]].sub2_sub3, [[DS_READ_B128_gfx9_8]].sub2_sub3, [[DEF1]], 0, 0, 0, implicit $mode, implicit $exec
  ; GCN-NEXT:   [[DEF27:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_4]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in16, !alias.scope !0, addrspace 7)
  ; GCN-NEXT:   [[DEF26:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_5]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in17, !alias.scope !0, addrspace 7)
  ; GCN-NEXT:   [[DEF25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_6]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in18, !alias.scope !0, addrspace 7)
  ; GCN-NEXT:   [[DEF24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_7]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in19, !alias.scope !0, addrspace 7)
  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_2]].sub0_sub1, [[DS_READ_B128_gfx9_9]].sub0_sub1, [[DEF1]], 0, 0, 0, implicit $mode, implicit $exec
  ; GCN-NEXT:   undef [[V_PERM_B32_e64_1:%[0-9]+]].sub1:vreg_128_align2 = V_PERM_B32_e64 [[DEF36]], [[DEF37]], [[DEF44]], implicit $exec
  ; GCN-NEXT:   [[V_PERM_B32_e64_1:%[0-9]+]].sub0:vreg_128_align2 = V_PERM_B32_e64 [[DEF38]], [[DEF39]], [[DEF44]], implicit $exec
  ; GCN-NEXT:   [[V_PERM_B32_e64_1:%[0-9]+]].sub3:vreg_128_align2 = V_PERM_B32_e64 [[DEF32]], [[DEF33]], [[DEF44]], implicit $exec
  ; GCN-NEXT:   [[V_PERM_B32_e64_1:%[0-9]+]].sub2:vreg_128_align2 = V_PERM_B32_e64 [[DEF34]], [[DEF35]], [[DEF44]], implicit $exec
  ; GCN-NEXT:   DS_WRITE_B128_gfx9 [[DEF40]], [[V_PERM_B32_e64_1]], 1040, 0, implicit $exec :: (store (s128) into %ir.in1, !alias.scope !0, addrspace 3)
  ; GCN-NEXT:   [[DEF32:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_8]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in20, !alias.scope !0, addrspace 7)
  ; GCN-NEXT:   [[DEF33:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_9]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in21, !alias.scope !0, addrspace 7)
  ; GCN-NEXT:   [[DEF34:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_10]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in22, !alias.scope !0, addrspace 7)
  ; GCN-NEXT:   [[DEF35:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_11]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in23, !alias.scope !0, addrspace 7)
  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_2]].sub2_sub3, [[DS_READ_B128_gfx9_9]].sub2_sub3, [[DEF1]], 0, 0, 0, implicit $mode, implicit $exec
  ; GCN-NEXT:   [[DEF36:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_12]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in24, !alias.scope !0, addrspace 7)
  ; GCN-NEXT:   [[DEF37:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_13]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in25, !alias.scope !0, addrspace 7)
  ; GCN-NEXT:   [[V_ADD_U32_e32_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF22]], implicit $exec
  ; GCN-NEXT:   [[V_ADD_U32_e32_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF23]], implicit $exec
  ; GCN-NEXT:   [[DEF38:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_14]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in26, !alias.scope !0, addrspace 7)
  ; GCN-NEXT:   [[DEF39:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_15]], [[DEF47]], 0, 0, 0, 0, implicit $exec
  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_4]].sub0_sub1, [[DS_READ_B128_gfx9_10]].sub0_sub1, [[DEF1]], 0, 0, 0, implicit $mode, implicit $exec
  ; GCN-NEXT:   DS_WRITE_B128_gfx9 [[DEF41]], [[DEF2]], 0, 0, implicit $exec :: (store (s128) into %ir.in2, !alias.scope !0, addrspace 3)
  ; GCN-NEXT:   [[V_ADD_U32_e32_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -16, [[DEF45]], implicit $exec
  ; GCN-NEXT:   [[V_ADD_U32_e32_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -16, [[DEF46]], implicit $exec
  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:av_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN [[V_ADD_U32_e32_16]], [[DEF48]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in26, !alias.scope !0, addrspace 7)
  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_4]].sub2_sub3, [[DS_READ_B128_gfx9_10]].sub2_sub3, [[DEF1]], 0, 0, 0, implicit $mode, implicit $exec
  ; GCN-NEXT:   DS_WRITE_B128_gfx9 [[DEF41]], [[DEF3]], 2064, 0, implicit $exec :: (store (s128) into %ir.in3, !alias.scope !0, addrspace 3)
  ; GCN-NEXT:   [[DEF3:%[0-9]+]]:av_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN [[DEF45]], [[DEF48]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in27, !alias.scope !0, addrspace 7)
  ; GCN-NEXT:   [[DEF45:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, [[DEF45]], implicit $exec
  ; GCN-NEXT:   [[DEF23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF23]], implicit $exec
  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_5]].sub0_sub1, [[DS_READ_B128_gfx9_11]].sub0_sub1, [[DEF1]], 0, 0, 0, implicit $mode, implicit $exec
  ; GCN-NEXT:   DS_WRITE_B128_gfx9 [[DEF41]], [[DEF4]], 2080, 0, implicit $exec :: (store (s128) into %ir.in4, !alias.scope !0, addrspace 3)
  ; GCN-NEXT:   [[DEF4:%[0-9]+]]:av_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN [[DEF46]], [[DEF48]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in28, !alias.scope !0, addrspace 7)
  ; GCN-NEXT:   [[DEF46:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, [[DEF46]], implicit $exec
  ; GCN-NEXT:   [[DEF22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF22]], implicit $exec
  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_5]].sub2_sub3, [[DS_READ_B128_gfx9_11]].sub2_sub3, [[DEF1]], 0, 0, 0, implicit $mode, implicit $exec
  ; GCN-NEXT:   DS_WRITE_B128_gfx9 [[DEF41]], [[DEF5]], 16, 0, implicit $exec :: (store (s128) into %ir.in5, !alias.scope !0, addrspace 3)
  ; GCN-NEXT:   [[DEF5:%[0-9]+]]:av_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN [[V_ADD_U32_e32_17]], [[DEF48]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in29, !alias.scope !0, addrspace 7)
  ; GCN-NEXT:   IGLP_OPT 1
  ; GCN-NEXT:   [[DEF49:%[0-9]+]]:sreg_32 = nsw S_ADD_I32 [[DEF49]], -1, implicit-def dead $scc
  ; GCN-NEXT:   S_CMP_LG_U32 [[DEF49]], 0, implicit-def $scc
  ; GCN-NEXT:   [[DEF9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF9]], implicit $exec
  ; GCN-NEXT:   [[DEF8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF8]], implicit $exec
  ; GCN-NEXT:   [[DEF13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF13]], implicit $exec
  ; GCN-NEXT:   [[DEF12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF12]], implicit $exec
  ; GCN-NEXT:   [[DEF11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF11]], implicit $exec
  ; GCN-NEXT:   [[DEF10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF10]], implicit $exec
  ; GCN-NEXT:   [[DEF15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF15]], implicit $exec
  ; GCN-NEXT:   [[DEF14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF14]], implicit $exec
  ; GCN-NEXT:   [[DEF17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF17]], implicit $exec
  ; GCN-NEXT:   [[DEF16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF16]], implicit $exec
  ; GCN-NEXT:   [[DEF19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF19]], implicit $exec
  ; GCN-NEXT:   [[DEF18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF18]], implicit $exec
  ; GCN-NEXT:   [[DEF21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF21]], implicit $exec
  ; GCN-NEXT:   [[DEF20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF20]], implicit $exec
  ; GCN-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit killed $scc
  ; GCN-NEXT: {{  $}}
  ; GCN-NEXT: bb.2:
  ; GCN-NEXT:   S_ENDPGM 0
  bb.0:
  %761:areg_512_align2 = IMPLICIT_DEF
  %814:areg_512_align2 = IMPLICIT_DEF
  %1757:av_128_align2 = IMPLICIT_DEF
  %1755:av_128_align2 = IMPLICIT_DEF
  %1754:av_128_align2 = IMPLICIT_DEF
  %1756:av_128_align2 = IMPLICIT_DEF
  %37:vgpr_32 = IMPLICIT_DEF
  %38:vgpr_32 = IMPLICIT_DEF
  %1736:vgpr_32 = IMPLICIT_DEF
  %1737:vgpr_32 = IMPLICIT_DEF
  %1738:vgpr_32 = IMPLICIT_DEF
  %1739:vgpr_32 = IMPLICIT_DEF
  %1740:vgpr_32 = IMPLICIT_DEF
  %1741:vgpr_32 = IMPLICIT_DEF
  %1742:vgpr_32 = IMPLICIT_DEF
  %1743:vgpr_32 = IMPLICIT_DEF
  %1744:vgpr_32 = IMPLICIT_DEF
  %1745:vgpr_32 = IMPLICIT_DEF
  %1746:vgpr_32 = IMPLICIT_DEF
  %1747:vgpr_32 = IMPLICIT_DEF
  %1748:vgpr_32 = IMPLICIT_DEF
  %1749:vgpr_32 = IMPLICIT_DEF
  %1750:vgpr_32 = IMPLICIT_DEF
  %1751:vgpr_32 = IMPLICIT_DEF
  %1766:vgpr_32 = IMPLICIT_DEF
  %1767:vgpr_32 = IMPLICIT_DEF
  %1768:vgpr_32 = IMPLICIT_DEF
  %1769:vgpr_32 = IMPLICIT_DEF
  %1770:vgpr_32 = IMPLICIT_DEF
  %1771:vgpr_32 = IMPLICIT_DEF
  %1772:vgpr_32 = IMPLICIT_DEF
  %1773:vgpr_32 = IMPLICIT_DEF
  %1758:vgpr_32 = IMPLICIT_DEF
  %1759:vgpr_32 = IMPLICIT_DEF
  %1760:vgpr_32 = IMPLICIT_DEF
  %1761:vgpr_32 = IMPLICIT_DEF
  %1762:vgpr_32 = IMPLICIT_DEF
  %1763:vgpr_32 = IMPLICIT_DEF
  %1764:vgpr_32 = IMPLICIT_DEF
  %1765:vgpr_32 = IMPLICIT_DEF
  %14:vgpr_32 = IMPLICIT_DEF
  %31:vgpr_32 = IMPLICIT_DEF
  %41:vgpr_32 = IMPLICIT_DEF
  %43:sreg_32 = IMPLICIT_DEF
  %535:sreg_32 = IMPLICIT_DEF
  %1752:vgpr_32 = IMPLICIT_DEF
  %1753:vgpr_32 = IMPLICIT_DEF
  %450:sgpr_128 = IMPLICIT_DEF
  %518:sgpr_128 = IMPLICIT_DEF
  %1735:sreg_32 = IMPLICIT_DEF

  bb.1:
  IGLP_OPT 1
  %683:av_128_align2 = DS_READ_B128_gfx9 %37:vgpr_32, 0, 0, implicit $exec :: (load (s128) from %ir.in0, !alias.scope !0, addrspace 3)
  %688:av_128_align2 = DS_READ_B128_gfx9 %37:vgpr_32, 1040, 0, implicit $exec :: (load (s128) from %ir.in1, !alias.scope !0, addrspace 3)
  %693:av_128_align2 = DS_READ_B128_gfx9 %37:vgpr_32, 2080, 0, implicit $exec :: (load (s128) from %ir.in2, !alias.scope !0, addrspace 3)
  %698:av_128_align2 = DS_READ_B128_gfx9 %37:vgpr_32, 3120, 0, implicit $exec :: (load (s128) from %ir.in3, !alias.scope !0, addrspace 3)
  %703:av_128_align2 = DS_READ_B128_gfx9 %38:vgpr_32, 0, 0, implicit $exec :: (load (s128) from %ir.in4, !alias.scope !0, addrspace 3)
  %708:av_128_align2 = DS_READ_B128_gfx9 %38:vgpr_32, 2064, 0, implicit $exec :: (load (s128) from %ir.in5, !alias.scope !0, addrspace 3)
  %713:av_128_align2 = DS_READ_B128_gfx9 %38:vgpr_32, 4128, 0, implicit $exec :: (load (s128) from %ir.in6, !alias.scope !0, addrspace 3)
  %718:av_128_align2 = DS_READ_B128_gfx9 %38:vgpr_32, 6192, 0, implicit $exec :: (load (s128) from %ir.in7, !alias.scope !0, addrspace 3)
  %761:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %683.sub0_sub1:av_128_align2, %703.sub0_sub1:av_128_align2, %761:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
  %762:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %683.sub2_sub3:av_128_align2, %703.sub2_sub3:av_128_align2, %761:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
  %761:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %688.sub0_sub1:av_128_align2, %708.sub0_sub1:av_128_align2, %761:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
  %761:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %688.sub2_sub3:av_128_align2, %708.sub2_sub3:av_128_align2, %761:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
  %761:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %693.sub0_sub1:av_128_align2, %713.sub0_sub1:av_128_align2, %761:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
  %761:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %693.sub2_sub3:av_128_align2, %713.sub2_sub3:av_128_align2, %761:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
  %761:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %698.sub0_sub1:av_128_align2, %718.sub0_sub1:av_128_align2, %761:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
  %761:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %698.sub2_sub3:av_128_align2, %718.sub2_sub3:av_128_align2, %761:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
  %764:av_128_align2 = DS_READ_B128_gfx9 %38:vgpr_32, 1024, 0, implicit $exec :: (load (s128) from %ir.in8, !alias.scope !0, addrspace 3)
  %769:av_128_align2 = DS_READ_B128_gfx9 %38:vgpr_32, 3088, 0, implicit $exec :: (load (s128) from %ir.in9, !alias.scope !0, addrspace 3)
  %774:av_128_align2 = DS_READ_B128_gfx9 %38:vgpr_32, 5152, 0, implicit $exec :: (load (s128) from %ir.in10, !alias.scope !0, addrspace 3)
  %779:av_128_align2 = DS_READ_B128_gfx9 %38:vgpr_32, 7216, 0, implicit $exec :: (load (s128) from %ir.in11, !alias.scope !0, addrspace 3)
  %814:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %683.sub0_sub1:av_128_align2, %764.sub0_sub1:av_128_align2, %814:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
  %814:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %683.sub2_sub3:av_128_align2, %764.sub2_sub3:av_128_align2, %814:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
  %814:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %688.sub0_sub1:av_128_align2, %769.sub0_sub1:av_128_align2, %814:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
  %814:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %688.sub2_sub3:av_128_align2, %769.sub2_sub3:av_128_align2, %814:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
  %814:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %693.sub0_sub1:av_128_align2, %774.sub0_sub1:av_128_align2, %814:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
  %814:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %693.sub2_sub3:av_128_align2, %774.sub2_sub3:av_128_align2, %814:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
  %814:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %698.sub0_sub1:av_128_align2, %779.sub0_sub1:av_128_align2, %814:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
  %814:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %698.sub2_sub3:av_128_align2, %779.sub2_sub3:av_128_align2, %814:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
  undef %1599.sub3:vreg_128_align2 = V_PERM_B32_e64 %1766:vgpr_32, %1767:vgpr_32, %535:sreg_32, implicit $exec
  %1599.sub2:vreg_128_align2 = V_PERM_B32_e64 %1768:vgpr_32, %1769:vgpr_32, %535:sreg_32, implicit $exec
  %1599.sub1:vreg_128_align2 = V_PERM_B32_e64 %1770:vgpr_32, %1771:vgpr_32, %535:sreg_32, implicit $exec
  %1599.sub0:vreg_128_align2 = V_PERM_B32_e64 %1772:vgpr_32, %1773:vgpr_32, %535:sreg_32, implicit $exec
  undef %1579.sub3:vreg_128_align2 = V_PERM_B32_e64 %1758:vgpr_32, %1759:vgpr_32, %535:sreg_32, implicit $exec
  %1579.sub2:vreg_128_align2 = V_PERM_B32_e64 %1760:vgpr_32, %1761:vgpr_32, %535:sreg_32, implicit $exec
  %1579.sub1:vreg_128_align2 = V_PERM_B32_e64 %1762:vgpr_32, %1763:vgpr_32, %535:sreg_32, implicit $exec
  %1579.sub0:vreg_128_align2 = V_PERM_B32_e64 %1764:vgpr_32, %1765:vgpr_32, %535:sreg_32, implicit $exec
  DS_WRITE_B128_gfx9 %14:vgpr_32, %1599:vreg_128_align2, 0, 0, implicit $exec :: (store (s128) into %ir.in0, !alias.scope !0, addrspace 3)
  DS_WRITE_B128_gfx9 %14:vgpr_32, %1579:vreg_128_align2, 1040, 0, implicit $exec :: (store (s128) into %ir.in1, !alias.scope !0, addrspace 3)
  %830:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1736:vgpr_32, implicit $exec
  %1773:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %830:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in12, !alias.scope !0, addrspace 7)
  %833:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1737:vgpr_32, implicit $exec
  %1772:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %833:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in13, !alias.scope !0, addrspace 7)
  %835:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1738:vgpr_32, implicit $exec
  %1771:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %835:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in14, !alias.scope !0, addrspace 7)
  %837:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1739:vgpr_32, implicit $exec
  %1770:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %837:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in15, !alias.scope !0, addrspace 7)
  %839:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1740:vgpr_32, implicit $exec
  %1769:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %839:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in16, !alias.scope !0, addrspace 7)
  %841:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1741:vgpr_32, implicit $exec
  %1768:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %841:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in17, !alias.scope !0, addrspace 7)
  %843:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1742:vgpr_32, implicit $exec
  %1767:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %843:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in18, !alias.scope !0, addrspace 7)
  %845:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1743:vgpr_32, implicit $exec
  %1766:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %845:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in19, !alias.scope !0, addrspace 7)
  %847:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1744:vgpr_32, implicit $exec
  %1758:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %847:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in20, !alias.scope !0, addrspace 7)
  %849:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1745:vgpr_32, implicit $exec
  %1759:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %849:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in21, !alias.scope !0, addrspace 7)
  %851:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1746:vgpr_32, implicit $exec
  %1760:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %851:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in22, !alias.scope !0, addrspace 7)
  %853:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1747:vgpr_32, implicit $exec
  %1761:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %853:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in23, !alias.scope !0, addrspace 7)
  %855:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1748:vgpr_32, implicit $exec
  %1762:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %855:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in24, !alias.scope !0, addrspace 7)
  %857:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1749:vgpr_32, implicit $exec
  %1763:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %857:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in25, !alias.scope !0, addrspace 7)
  %859:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1750:vgpr_32, implicit $exec
  %1764:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %859:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in26, !alias.scope !0, addrspace 7)
  %861:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1751:vgpr_32, implicit $exec
  %1765:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %861:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec
  DS_WRITE_B128_gfx9 %31:vgpr_32, %1757:av_128_align2, 0, 0, implicit $exec :: (store (s128) into %ir.in2, !alias.scope !0, addrspace 3)
  DS_WRITE_B128_gfx9 %31:vgpr_32, %1755:av_128_align2, 2064, 0, implicit $exec :: (store (s128) into %ir.in3, !alias.scope !0, addrspace 3)
  DS_WRITE_B128_gfx9 %31:vgpr_32, %1754:av_128_align2, 2080, 0, implicit $exec :: (store (s128) into %ir.in4, !alias.scope !0, addrspace 3)
  DS_WRITE_B128_gfx9 %31:vgpr_32, %1756:av_128_align2, 16, 0, implicit $exec :: (store (s128) into %ir.in5, !alias.scope !0, addrspace 3)
  %864:vgpr_32 = V_ADD_U32_e32 -16, %1752:vgpr_32, implicit $exec
  %1757:av_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %864:vgpr_32, %518:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in26, !alias.scope !0, addrspace 7)
  %1755:av_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %1752:vgpr_32, %518:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in27, !alias.scope !0, addrspace 7)
  %1754:av_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %1753:vgpr_32, %518:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in28, !alias.scope !0, addrspace 7)
  %865:vgpr_32 = V_ADD_U32_e32 -16, %1753:vgpr_32, implicit $exec
  %1756:av_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %865:vgpr_32, %518:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in29, !alias.scope !0, addrspace 7)
  %1753:vgpr_32 = V_ADD_U32_e32 128, %1753:vgpr_32, implicit $exec
  %1752:vgpr_32 = V_ADD_U32_e32 128, %1752:vgpr_32, implicit $exec
  %1751:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1751:vgpr_32, implicit $exec
  %1750:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1750:vgpr_32, implicit $exec
  %1749:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1749:vgpr_32, implicit $exec
  %1748:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1748:vgpr_32, implicit $exec
  %1747:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1747:vgpr_32, implicit $exec
  %1746:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1746:vgpr_32, implicit $exec
  %1745:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1745:vgpr_32, implicit $exec
  %1744:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1744:vgpr_32, implicit $exec
  %1743:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1743:vgpr_32, implicit $exec
  %1742:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1742:vgpr_32, implicit $exec
  %1741:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1741:vgpr_32, implicit $exec
  %1740:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1740:vgpr_32, implicit $exec
  %1739:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1739:vgpr_32, implicit $exec
  %1738:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1738:vgpr_32, implicit $exec
  %1737:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1737:vgpr_32, implicit $exec
  %1736:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1736:vgpr_32, implicit $exec
  %1735:sreg_32 = nsw S_ADD_I32 %1735:sreg_32, -1, implicit-def dead $scc
  S_CMP_LG_U32 %1735:sreg_32, 0, implicit-def $scc
  S_CBRANCH_SCC1 %bb.1, implicit killed $scc

  bb.2:
  S_ENDPGM 0
---