llvm/llvm/test/CodeGen/AArch64/machine-combiner-reassociate-ops-in-different-blocks.mir

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=arm64-appe-ios -o - -run-pass=machine-combiner %s | FileCheck %s

--- |
  define float @reassoicate_some_inputs_in_different_block(ptr %a, i1 %c) {
    ret float undef
  }

  define float @reassoicate_candidates_in_different_blocks(ptr %a, i1 %c) {
    ret float undef
  }

  define float @reassoicate_candidates_in_different_blocks_no_sink(ptr %a, i1 %c) {
    ret float undef
  }

  define float @no_reassociate_different_block(ptr %a, i1 %c) {
    ret float undef
  }

  declare void @use()


...
# FIXME: Should reassociate the serialized reduction in bb.1 to improve parallelism.
---
name:            reassoicate_some_inputs_in_different_block
alignment:       4
tracksRegLiveness: true
body:             |
  ; CHECK-LABEL: name: reassoicate_some_inputs_in_different_block
  ; CHECK: bb.0:
  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
  ; CHECK-NEXT:   liveins: $x0, $w1
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr32 = COPY $w1
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr64common = COPY $x0
  ; CHECK-NEXT:   [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load (s128), align 4)
  ; CHECK-NEXT:   [[LDRQui1:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load (s128), align 4)
  ; CHECK-NEXT:   [[LDRQui2:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 2 :: (load (s128), align 4)
  ; CHECK-NEXT:   [[LDRQui3:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 4 :: (load (s128), align 4)
  ; CHECK-NEXT:   TBZW [[COPY]], 0, %bb.2
  ; CHECK-NEXT:   B %bb.1
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.1:
  ; CHECK-NEXT:   [[FADDv4f32_:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui]], [[LDRQui2]], implicit $fpcr
  ; CHECK-NEXT:   [[FADDv4f32_1:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui3]], [[LDRQui1]], implicit $fpcr
  ; CHECK-NEXT:   [[FADDv4f32_2:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 killed [[FADDv4f32_1]], killed [[FADDv4f32_]], implicit $fpcr
  ; CHECK-NEXT:   [[FADDPv4f32_:%[0-9]+]]:fpr128 = nofpexcept FADDPv4f32 [[FADDv4f32_2]], [[FADDv4f32_2]], implicit $fpcr
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gpr64all = COPY [[FADDPv4f32_]].dsub
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:fpr64 = COPY [[COPY2]]
  ; CHECK-NEXT:   [[FADDPv2i32p:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed [[COPY3]], implicit $fpcr
  ; CHECK-NEXT:   $s0 = COPY [[FADDPv2i32p]]
  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.2:
  ; CHECK-NEXT:   $q0 = COPY [[LDRQui]]
  ; CHECK-NEXT:   $q1 = COPY [[LDRQui2]]
  ; CHECK-NEXT:   $q2 = COPY [[LDRQui1]]
  ; CHECK-NEXT:   $q3 = COPY [[LDRQui3]]
  ; CHECK-NEXT:   TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3
  bb.0:
    successors: %bb.1, %bb.2
    liveins: $x0, $w1

    %5:gpr32 = COPY $w1
    %4:gpr64common = COPY $x0
    %0:fpr128 = LDRQui %4, 0 :: (load (s128), align 4)
    %1:fpr128 = LDRQui %4, 1 :: (load (s128), align 4)
    %2:fpr128 = LDRQui %4, 2 :: (load (s128), align 4)
    %3:fpr128 = LDRQui %4, 4 :: (load (s128), align 4)
    TBZW %5, 0, %bb.2
    B %bb.1

  bb.1:
    %6:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %0, %2, implicit $fpcr
    %7:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %1, killed %6, implicit $fpcr
    %8:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %3, killed %7, implicit $fpcr
    %9:fpr128 = nofpexcept FADDPv4f32 %8, %8, implicit $fpcr
    %10:gpr64all = COPY %9.dsub
    %12:fpr64 = COPY %10
    %11:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed %12, implicit $fpcr
    $s0 = COPY %11
    RET_ReallyLR implicit $s0

  bb.2:
    $q0 = COPY %0
    $q1 = COPY %2
    $q2 = COPY %1
    $q3 = COPY %3
    TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3

...
# Variation of reassoicate_some_inputs_in_different_block where the candidate
# instructions are split across 2 blocks.
---
name:            reassoicate_candidates_in_different_blocks
alignment:       4
tracksRegLiveness: true
body:             |
  ; CHECK-LABEL: name: reassoicate_candidates_in_different_blocks
  ; CHECK: bb.0:
  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
  ; CHECK-NEXT:   liveins: $x0, $w1
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr32 = COPY $w1
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr64common = COPY $x0
  ; CHECK-NEXT:   [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load (s128), align 4)
  ; CHECK-NEXT:   [[LDRQui1:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load (s128), align 4)
  ; CHECK-NEXT:   [[LDRQui2:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 2 :: (load (s128), align 4)
  ; CHECK-NEXT:   [[LDRQui3:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 4 :: (load (s128), align 4)
  ; CHECK-NEXT:   [[FADDv4f32_:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui]], [[LDRQui2]], implicit $fpcr
  ; CHECK-NEXT:   [[FADDv4f32_1:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui1]], killed [[FADDv4f32_]], implicit $fpcr
  ; CHECK-NEXT:   TBZW [[COPY]], 0, %bb.2
  ; CHECK-NEXT:   B %bb.1
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.1:
  ; CHECK-NEXT:   [[FADDv4f32_2:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui3]], killed [[FADDv4f32_1]], implicit $fpcr
  ; CHECK-NEXT:   [[FADDPv4f32_:%[0-9]+]]:fpr128 = nofpexcept FADDPv4f32 [[FADDv4f32_2]], [[FADDv4f32_2]], implicit $fpcr
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gpr64all = COPY [[FADDPv4f32_]].dsub
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:fpr64 = COPY [[COPY2]]
  ; CHECK-NEXT:   [[FADDPv2i32p:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed [[COPY3]], implicit $fpcr
  ; CHECK-NEXT:   $s0 = COPY [[FADDPv2i32p]]
  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.2:
  ; CHECK-NEXT:   $q0 = COPY [[LDRQui]]
  ; CHECK-NEXT:   $q1 = COPY [[LDRQui2]]
  ; CHECK-NEXT:   $q2 = COPY [[LDRQui1]]
  ; CHECK-NEXT:   $q3 = COPY [[LDRQui3]]
  ; CHECK-NEXT:   TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3
  bb.0:
    successors: %bb.1, %bb.2
    liveins: $x0, $w1

    %5:gpr32 = COPY $w1
    %4:gpr64common = COPY $x0
    %0:fpr128 = LDRQui %4, 0 :: (load (s128), align 4)
    %1:fpr128 = LDRQui %4, 1 :: (load (s128), align 4)
    %2:fpr128 = LDRQui %4, 2 :: (load (s128), align 4)
    %3:fpr128 = LDRQui %4, 4 :: (load (s128), align 4)
    %6:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %0, %2, implicit $fpcr
    %7:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %1, killed %6, implicit $fpcr
    TBZW %5, 0, %bb.2
    B %bb.1

  bb.1:
    %8:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %3, killed %7, implicit $fpcr
    %9:fpr128 = nofpexcept FADDPv4f32 %8, %8, implicit $fpcr
    %10:gpr64all = COPY %9.dsub
    %12:fpr64 = COPY %10
    %11:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed %12, implicit $fpcr
    $s0 = COPY %11
    RET_ReallyLR implicit $s0

  bb.2:
    $q0 = COPY %0
    $q1 = COPY %2
    $q2 = COPY %1
    $q3 = COPY %3
    TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3

...

---
name:            reassoicate_candidates_in_different_blocks_no_sink
alignment:       4
tracksRegLiveness: true
body:             |
  ; CHECK-LABEL: name: reassoicate_candidates_in_different_blocks_no_sink
  ; CHECK: bb.0:
  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
  ; CHECK-NEXT:   liveins: $x0, $w1
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr32 = COPY $w1
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr64common = COPY $x0
  ; CHECK-NEXT:   [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load (s128), align 4)
  ; CHECK-NEXT:   [[LDRQui1:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load (s128), align 4)
  ; CHECK-NEXT:   [[LDRQui2:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 2 :: (load (s128), align 4)
  ; CHECK-NEXT:   [[LDRQui3:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 4 :: (load (s128), align 4)
  ; CHECK-NEXT:   [[FADDv4f32_:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui]], [[LDRQui2]], implicit $fpcr
  ; CHECK-NEXT:   [[FADDv4f32_1:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui1]], killed [[FADDv4f32_]], implicit $fpcr
  ; CHECK-NEXT:   TBZW [[COPY]], 0, %bb.2
  ; CHECK-NEXT:   B %bb.1
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.1:
  ; CHECK-NEXT:   [[FADDv4f32_2:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui3]], killed [[FADDv4f32_1]], implicit $fpcr
  ; CHECK-NEXT:   [[FADDPv4f32_:%[0-9]+]]:fpr128 = nofpexcept FADDPv4f32 [[FADDv4f32_2]], [[FADDv4f32_2]], implicit $fpcr
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gpr64all = COPY [[FADDPv4f32_]].dsub
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:fpr64 = COPY [[COPY2]]
  ; CHECK-NEXT:   [[FADDPv2i32p:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed [[COPY3]], implicit $fpcr
  ; CHECK-NEXT:   $s0 = COPY [[FADDPv2i32p]]
  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.2:
  ; CHECK-NEXT:   $q0 = COPY [[LDRQui]]
  ; CHECK-NEXT:   $q1 = COPY [[LDRQui2]]
  ; CHECK-NEXT:   $q2 = COPY [[LDRQui1]]
  ; CHECK-NEXT:   $q3 = COPY [[FADDv4f32_1]]
  ; CHECK-NEXT:   TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3
  bb.0:
    successors: %bb.1, %bb.2
    liveins: $x0, $w1

    %5:gpr32 = COPY $w1
    %4:gpr64common = COPY $x0
    %0:fpr128 = LDRQui %4, 0 :: (load (s128), align 4)
    %1:fpr128 = LDRQui %4, 1 :: (load (s128), align 4)
    %2:fpr128 = LDRQui %4, 2 :: (load (s128), align 4)
    %3:fpr128 = LDRQui %4, 4 :: (load (s128), align 4)
    %6:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %0, %2, implicit $fpcr
    %7:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %1, killed %6, implicit $fpcr
    TBZW %5, 0, %bb.2
    B %bb.1

  bb.1:
    %8:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %3, killed %7, implicit $fpcr
    %9:fpr128 = nofpexcept FADDPv4f32 %8, %8, implicit $fpcr
    %10:gpr64all = COPY %9.dsub
    %12:fpr64 = COPY %10
    %11:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed %12, implicit $fpcr
    $s0 = COPY %11
    RET_ReallyLR implicit $s0

  bb.2:
    $q0 = COPY %0
    $q1 = COPY %2
    $q2 = COPY %1
    $q3 = COPY %7
    TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3

...

# Reassociation of the reduction in bb.1 is not profitable, because LDRQui3 has a
# much larger latency than the other loads.
---
name:            no_reassociate_different_block
alignment:       4
tracksRegLiveness: true
body:             |
  ; CHECK-LABEL: name: no_reassociate_different_block
  ; CHECK: bb.0:
  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
  ; CHECK-NEXT:   liveins: $x0, $w1
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr32 = COPY $w1
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr64common = COPY $x0
  ; CHECK-NEXT:   [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load (s128), align 4)
  ; CHECK-NEXT:   [[LDRQui1:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load (s128), align 4)
  ; CHECK-NEXT:   [[LDRQui2:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 2 :: (load (s128), align 4)
  ; CHECK-NEXT:   [[LDRXui:%[0-9]+]]:gpr64common = LDRXui [[COPY1]], 8 :: (load (s64))
  ; CHECK-NEXT:   [[LDRXui1:%[0-9]+]]:gpr64common = LDRXui killed [[LDRXui]], 0 :: (load (s64))
  ; CHECK-NEXT:   [[LDRQui3:%[0-9]+]]:fpr128 = LDRQui killed [[LDRXui1]], 0 :: (load (s128), align 4)
  ; CHECK-NEXT:   TBZW [[COPY]], 0, %bb.2
  ; CHECK-NEXT:   B %bb.1
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.1:
  ; CHECK-NEXT:   [[FADDv4f32_:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui]], [[LDRQui2]], implicit $fpcr
  ; CHECK-NEXT:   [[FADDv4f32_1:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui1]], killed [[FADDv4f32_]], implicit $fpcr
  ; CHECK-NEXT:   [[FADDv4f32_2:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui3]], killed [[FADDv4f32_1]], implicit $fpcr
  ; CHECK-NEXT:   [[FADDPv4f32_:%[0-9]+]]:fpr128 = nofpexcept FADDPv4f32 [[FADDv4f32_2]], [[FADDv4f32_2]], implicit $fpcr
  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gpr64all = COPY [[FADDPv4f32_]].dsub
  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:fpr64 = COPY [[COPY2]]
  ; CHECK-NEXT:   [[FADDPv2i32p:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed [[COPY3]], implicit $fpcr
  ; CHECK-NEXT:   $s0 = COPY [[FADDPv2i32p]]
  ; CHECK-NEXT:   RET_ReallyLR implicit $s0
  ; CHECK-NEXT: {{  $}}
  ; CHECK-NEXT: bb.2:
  ; CHECK-NEXT:   $q0 = COPY [[LDRQui]]
  ; CHECK-NEXT:   $q1 = COPY [[LDRQui2]]
  ; CHECK-NEXT:   $q2 = COPY [[LDRQui1]]
  ; CHECK-NEXT:   $q3 = COPY [[LDRQui3]]
  ; CHECK-NEXT:   TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3
  bb.0:
    successors: %bb.1, %bb.2
    liveins: $x0, $w1

    %5:gpr32 = COPY $w1
    %4:gpr64common = COPY $x0
    %0:fpr128 = LDRQui %4, 0 :: (load (s128), align 4)
    %1:fpr128 = LDRQui %4, 1 :: (load (s128), align 4)
    %2:fpr128 = LDRQui %4, 2 :: (load (s128), align 4)
    %6:gpr64common = LDRXui %4, 8 :: (load (s64))
    %7:gpr64common = LDRXui killed %6, 0 :: (load (s64))
    %3:fpr128 = LDRQui killed %7, 0 :: (load (s128), align 4)
    TBZW %5, 0, %bb.2
    B %bb.1

  bb.1:
    %8:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %0, %2, implicit $fpcr
    %9:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %1, killed %8, implicit $fpcr
    %10:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %3, killed %9, implicit $fpcr
    %11:fpr128 = nofpexcept FADDPv4f32 %10, %10, implicit $fpcr
    %12:gpr64all = COPY %11.dsub
    %14:fpr64 = COPY %12
    %13:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed %14, implicit $fpcr
    $s0 = COPY %13
    RET_ReallyLR implicit $s0

  bb.2:
    $q0 = COPY %0
    $q1 = COPY %2
    $q2 = COPY %1
    $q3 = COPY %3
    TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3

...