# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -run-pass register-coalescer -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s
#
---
# the COPY can be coalesced based on subregister liveness
name: subrange_coalesce_liveout
tracksRegLiveness: true
body: |
; GCN-LABEL: name: subrange_coalesce_liveout
; GCN: bb.0:
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN-NEXT: liveins: $vgpr0_vgpr1
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec
; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
; GCN-NEXT: S_BRANCH %bb.1
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.1:
; GCN-NEXT: successors: %bb.2(0x80000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub1, implicit $exec
; GCN-NEXT: S_BRANCH %bb.2
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.2:
; GCN-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub2, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
; GCN-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1, %bb.2
liveins: $vgpr0_vgpr1
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
%2:vgpr_32 = COPY %1.sub0
S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1:
successors: %bb.2
%2:vgpr_32 = V_AND_B32_e64 %1.sub0, %1.sub1, implicit $exec
S_BRANCH %bb.2
bb.2:
%4:vgpr_32 = V_ADD_U32_e32 %1.sub2, %2, implicit $exec
S_ENDPGM 0
...
---
# early-clobber stops the coalescer from coalescing the COPY
name: subrange_coalesce_early_clobber
tracksRegLiveness: true
body: |
; GCN-LABEL: name: subrange_coalesce_early_clobber
; GCN: bb.0:
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN-NEXT: liveins: $vgpr0_vgpr1
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX4_]].sub0
; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
; GCN-NEXT: S_BRANCH %bb.1
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.1:
; GCN-NEXT: successors: %bb.2(0x80000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: early-clobber [[COPY1]]:vgpr_32 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub2, implicit $exec
; GCN-NEXT: S_BRANCH %bb.2
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.2:
; GCN-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub2, [[COPY1]], implicit $exec
; GCN-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1, %bb.2
liveins: $vgpr0_vgpr1
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
%2:vgpr_32 = COPY %1.sub0
S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1:
successors: %bb.2
early-clobber %2:vgpr_32 = V_AND_B32_e64 %1.sub0, %1.sub2, implicit $exec
S_BRANCH %bb.2
bb.2:
%4:vgpr_32 = V_ADD_U32_e32 %1.sub2, %2, implicit $exec
S_ENDPGM 0
...
---
# non-conflict lane(sub1) was redefined, coalescable
name: subrange_coalesce_unrelated_sub_redefined
tracksRegLiveness: true
body: |
; GCN-LABEL: name: subrange_coalesce_unrelated_sub_redefined
; GCN: bb.0:
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN-NEXT: liveins: $vgpr0_vgpr1
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec
; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
; GCN-NEXT: S_BRANCH %bb.1
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.1:
; GCN-NEXT: successors: %bb.2(0x80000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub1, implicit $exec
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]].sub1:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
; GCN-NEXT: S_BRANCH %bb.2
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.2:
; GCN-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub1, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
; GCN-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1, %bb.2
liveins: $vgpr0_vgpr1
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
%2:vgpr_32 = COPY %1.sub0
S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1:
successors: %bb.2
%2:vgpr_32 = V_AND_B32_e64 %1.sub0, %1.sub1, implicit $exec
%1.sub1:vreg_128 = V_AND_B32_e64 %2, %2, implicit $exec
S_BRANCH %bb.2
bb.2:
%4:vgpr_32 = V_ADD_U32_e32 %1.sub1, %2, implicit $exec
S_ENDPGM 0
...
---
# Another complex example showing the capability of resolving lane conflict
# based on subranges.
name: subrange_coalesce_complex_pattern
tracksRegLiveness: true
body: |
; GCN-LABEL: name: subrange_coalesce_complex_pattern
; GCN: bb.0:
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN-NEXT: liveins: $vgpr0_vgpr1
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec
; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
; GCN-NEXT: S_BRANCH %bb.1
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.1:
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub1, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]].sub2:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
; GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
; GCN-NEXT: S_BRANCH %bb.2
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.2:
; GCN-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub1, [[GLOBAL_LOAD_DWORDX4_]].sub2, implicit $exec
; GCN-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1, %bb.2
liveins: $vgpr0_vgpr1
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
%2:vgpr_32 = COPY %1.sub0
S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1:
successors: %bb.1, %bb.2
%2:vgpr_32 = V_AND_B32_e64 %1.sub1, %2, implicit $exec
%1.sub2:vreg_128 = V_AND_B32_e64 %2, %2, implicit $exec
S_CBRANCH_EXECZ %bb.1, implicit $exec
S_BRANCH %bb.2
bb.2:
%4:vgpr_32 = V_ADD_U32_e32 %1.sub1, %1.sub2, implicit $exec
S_ENDPGM 0
...