llvm/llvm/test/CodeGen/Hexagon/autohvx/vector-align-interleaved.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=hexagon < %s | FileCheck %s

; In this testcase, when loads were moved close to users, they were actualy
; moved right before the consuming stores. This was after the store group
; was moved, so the loads and stores ended up being interleaved. This violated
; the assumption in store realigning that all loads were available before the
; first store, causing some code depending on the loads being inserted before
; the load used.
; Just make sure that this compiles ok.

; Function Attrs: nounwind
define void @f0(ptr noalias nocapture readonly %a0, ptr noalias nocapture %a1, i32 %a2) #0 {
; CHECK-LABEL: f0:
; CHECK:       // %bb.0: // %b0
; CHECK-NEXT:    {
; CHECK-NEXT:     p0 = cmp.eq(r2,#0)
; CHECK-NEXT:     if (p0.new) jumpr:nt r31
; CHECK-NEXT:    }
; CHECK-NEXT:    .p2align 4
; CHECK-NEXT:  .LBB0_1: // %b2
; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    {
; CHECK-NEXT:     v0.cur = vmem(r0+#0)
; CHECK-NEXT:     vmem(r1+#0) = v0
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v29.cur = vmem(r0+#1)
; CHECK-NEXT:     vmem(r1+#1) = v29
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v30.cur = vmem(r0+#2)
; CHECK-NEXT:     vmem(r1+#2) = v30
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     r0 = add(r0,#256)
; CHECK-NEXT:     r1 = add(r1,#256)
; CHECK-NEXT:     v31.cur = vmem(r0+#3)
; CHECK-NEXT:     vmem(r1+#3) = v31
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     jump .LBB0_1
; CHECK-NEXT:    }
b0:
  %v0 = icmp eq i32 %a2, 0
  br i1 %v0, label %b3, label %b1

b1:                                               ; preds = %b0
  br label %b2

b2:                                               ; preds = %b2, %b1
  %v3 = phi ptr [ %v16, %b2 ], [ %a1, %b1 ]
  %v4 = phi ptr [ %v11, %b2 ], [ %a0, %b1 ]
  %v5 = getelementptr inbounds <16 x i32>, ptr %v4, i32 1
  %v6 = load <16 x i32>, ptr %v4, align 64
  %v7 = getelementptr inbounds <16 x i32>, ptr %v4, i32 2
  %v8 = load <16 x i32>, ptr %v5, align 64
  %v9 = getelementptr inbounds <16 x i32>, ptr %v4, i32 3
  %v10 = load <16 x i32>, ptr %v7, align 64
  %v11 = getelementptr inbounds <16 x i32>, ptr %v4, i32 4
  %v12 = load <16 x i32>, ptr %v9, align 64
  %v13 = getelementptr inbounds <16 x i32>, ptr %v3, i32 1
  store <16 x i32> %v6, ptr %v3, align 64
  %v14 = getelementptr inbounds <16 x i32>, ptr %v3, i32 2
  store <16 x i32> %v8, ptr %v13, align 64
  %v15 = getelementptr inbounds <16 x i32>, ptr %v3, i32 3
  store <16 x i32> %v10, ptr %v14, align 64
  %v16 = getelementptr inbounds <16 x i32>, ptr %v3, i32 4
  store <16 x i32> %v12, ptr %v15, align 64
  br label %b2

b3:                                               ; preds = %b0
  ret void
}

attributes #0 = { nounwind "target-features"="+hvxv65,+hvx-length64b" }