llvm/llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -arm-memtransfer-tploop=force-enabled %s -o - | FileCheck %s

; In this test, the successors of various blocks were becoming invalid after
; ifcvt as the blocks did not properly fall through to the successor after a
; WhileLoopStart

@arr_183 = external dso_local local_unnamed_addr global [20 x [23 x [19 x i8]]], align 1
define i32 @a(i8 zeroext %b, ptr nocapture readonly %c, ptr nocapture readonly %d) {
; CHECK-LABEL: a:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    .save {r4, r5, r7, lr}
; CHECK-NEXT:    push {r4, r5, r7, lr}
; CHECK-NEXT:    cmp r0, #2
; CHECK-NEXT:    bls.w .LBB0_12
; CHECK-NEXT:  @ %bb.1: @ %for.body.us.preheader
; CHECK-NEXT:    movw r5, :lower16:arr_183
; CHECK-NEXT:    movs r3, #0
; CHECK-NEXT:    movt r5, :upper16:arr_183
; CHECK-NEXT:    mov.w r12, #19
; CHECK-NEXT:    vmov.i32 q0, #0x0
; CHECK-NEXT:    vmov.i32 q1, #0x0
; CHECK-NEXT:    vmov.i32 q2, #0x0
; CHECK-NEXT:    vmov.i32 q3, #0x0
; CHECK-NEXT:    b .LBB0_3
; CHECK-NEXT:  .LBB0_2: @ %land.end.us.3
; CHECK-NEXT:    @ in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT:    movs r3, #1
; CHECK-NEXT:  .LBB0_3: @ %for.body.us
; CHECK-NEXT:    @ =>This Loop Header: Depth=1
; CHECK-NEXT:    @ Child Loop BB0_4 Depth 2
; CHECK-NEXT:    @ Child Loop BB0_6 Depth 2
; CHECK-NEXT:    @ Child Loop BB0_8 Depth 2
; CHECK-NEXT:    @ Child Loop BB0_11 Depth 2
; CHECK-NEXT:    ldr.w r0, [r2, r3, lsl #2]
; CHECK-NEXT:    cmp r0, #0
; CHECK-NEXT:    ite ne
; CHECK-NEXT:    ldrbne r0, [r1, r3]
; CHECK-NEXT:    moveq r0, #0
; CHECK-NEXT:    mla r3, r3, r12, r5
; CHECK-NEXT:    add r3, r0
; CHECK-NEXT:    rsb.w r0, r0, #108
; CHECK-NEXT:    wlstp.8 lr, r0, .LBB0_5
; CHECK-NEXT:  .LBB0_4: @ Parent Loop BB0_3 Depth=1
; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
; CHECK-NEXT:    vstrb.8 q0, [r3], #16
; CHECK-NEXT:    letp lr, .LBB0_4
; CHECK-NEXT:  .LBB0_5: @ %land.end.us
; CHECK-NEXT:    @ in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT:    ldr r0, [r2, #4]
; CHECK-NEXT:    cmp r0, #0
; CHECK-NEXT:    ite ne
; CHECK-NEXT:    ldrbne r0, [r1, #1]
; CHECK-NEXT:    moveq r0, #0
; CHECK-NEXT:    adds r3, r5, r0
; CHECK-NEXT:    rsb.w r0, r0, #108
; CHECK-NEXT:    adds r3, #19
; CHECK-NEXT:    wlstp.8 lr, r0, .LBB0_7
; CHECK-NEXT:  .LBB0_6: @ Parent Loop BB0_3 Depth=1
; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
; CHECK-NEXT:    vstrb.8 q1, [r3], #16
; CHECK-NEXT:    letp lr, .LBB0_6
; CHECK-NEXT:  .LBB0_7: @ %land.end.us.1
; CHECK-NEXT:    @ in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT:    ldr r0, [r2, #4]
; CHECK-NEXT:    cmp r0, #0
; CHECK-NEXT:    ite ne
; CHECK-NEXT:    ldrbne r0, [r1, #1]
; CHECK-NEXT:    moveq r0, #0
; CHECK-NEXT:    adds r3, r5, r0
; CHECK-NEXT:    rsb.w r0, r0, #108
; CHECK-NEXT:    adds r3, #19
; CHECK-NEXT:    wlstp.8 lr, r0, .LBB0_9
; CHECK-NEXT:  .LBB0_8: @ Parent Loop BB0_3 Depth=1
; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
; CHECK-NEXT:    vstrb.8 q2, [r3], #16
; CHECK-NEXT:    letp lr, .LBB0_8
; CHECK-NEXT:  .LBB0_9: @ %land.end.us.2
; CHECK-NEXT:    @ in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT:    ldr r0, [r2, #4]
; CHECK-NEXT:    cmp r0, #0
; CHECK-NEXT:    ite ne
; CHECK-NEXT:    ldrbne r0, [r1, #1]
; CHECK-NEXT:    moveq r0, #0
; CHECK-NEXT:    adds r3, r5, r0
; CHECK-NEXT:    rsb.w r0, r0, #108
; CHECK-NEXT:    add.w r4, r0, #15
; CHECK-NEXT:    adds r3, #19
; CHECK-NEXT:    lsrs r4, r4, #4
; CHECK-NEXT:    cmp.w r4, #0
; CHECK-NEXT:    beq .LBB0_2
; CHECK-NEXT:  @ %bb.10: @ %land.end.us.2
; CHECK-NEXT:    @ in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT:    dlstp.8 lr, r0
; CHECK-NEXT:  .LBB0_11: @ Parent Loop BB0_3 Depth=1
; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
; CHECK-NEXT:    vstrb.8 q3, [r3], #16
; CHECK-NEXT:    letp lr, .LBB0_11
; CHECK-NEXT:    b .LBB0_2
; CHECK-NEXT:  .LBB0_12:
; CHECK-NEXT:    movw r12, :lower16:arr_183
; CHECK-NEXT:    vmov.i32 q0, #0x0
; CHECK-NEXT:    movt r12, :upper16:arr_183
; CHECK-NEXT:    vmov.i32 q1, #0x0
; CHECK-NEXT:    vmov.i32 q2, #0x0
; CHECK-NEXT:    vmov.i32 q3, #0x0
; CHECK-NEXT:    b .LBB0_14
; CHECK-NEXT:  .LBB0_13: @ %for.body.lr.ph.3
; CHECK-NEXT:    @ in Loop: Header=BB0_14 Depth=1
; CHECK-NEXT:    ldr r3, [r2, #4]
; CHECK-NEXT:    cmp r3, #0
; CHECK-NEXT:    ite ne
; CHECK-NEXT:    ldrbne r3, [r1, #1]
; CHECK-NEXT:    moveq r3, #0
; CHECK-NEXT:    add.w r5, r12, r3
; CHECK-NEXT:    rsb.w r3, r3, #108
; CHECK-NEXT:    add.w r4, r5, #19
; CHECK-NEXT:    wlstp.8 lr, r3, .LBB0_14
; CHECK-NEXT:    b .LBB0_24
; CHECK-NEXT:  .LBB0_14: @ %for.cond
; CHECK-NEXT:    @ =>This Loop Header: Depth=1
; CHECK-NEXT:    @ Child Loop BB0_16 Depth 2
; CHECK-NEXT:    @ Child Loop BB0_19 Depth 2
; CHECK-NEXT:    @ Child Loop BB0_22 Depth 2
; CHECK-NEXT:    @ Child Loop BB0_24 Depth 2
; CHECK-NEXT:    cmp r0, #2
; CHECK-NEXT:    blo .LBB0_17
; CHECK-NEXT:  @ %bb.15: @ %for.body.lr.ph
; CHECK-NEXT:    @ in Loop: Header=BB0_14 Depth=1
; CHECK-NEXT:    ldr r3, [r2, #4]
; CHECK-NEXT:    cmp r3, #0
; CHECK-NEXT:    ite ne
; CHECK-NEXT:    ldrbne r3, [r1, #1]
; CHECK-NEXT:    moveq r3, #0
; CHECK-NEXT:    add.w r5, r12, r3
; CHECK-NEXT:    rsb.w r3, r3, #108
; CHECK-NEXT:    add.w r4, r5, #19
; CHECK-NEXT:    wlstp.8 lr, r3, .LBB0_17
; CHECK-NEXT:  .LBB0_16: @ Parent Loop BB0_14 Depth=1
; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
; CHECK-NEXT:    vstrb.8 q0, [r4], #16
; CHECK-NEXT:    letp lr, .LBB0_16
; CHECK-NEXT:  .LBB0_17: @ %for.cond.backedge
; CHECK-NEXT:    @ in Loop: Header=BB0_14 Depth=1
; CHECK-NEXT:    cmp r0, #2
; CHECK-NEXT:    blo .LBB0_20
; CHECK-NEXT:  @ %bb.18: @ %for.body.lr.ph.1
; CHECK-NEXT:    @ in Loop: Header=BB0_14 Depth=1
; CHECK-NEXT:    ldr r3, [r2, #4]
; CHECK-NEXT:    cmp r3, #0
; CHECK-NEXT:    ite ne
; CHECK-NEXT:    ldrbne r3, [r1, #1]
; CHECK-NEXT:    moveq r3, #0
; CHECK-NEXT:    add.w r5, r12, r3
; CHECK-NEXT:    rsb.w r3, r3, #108
; CHECK-NEXT:    add.w r4, r5, #19
; CHECK-NEXT:    wlstp.8 lr, r3, .LBB0_20
; CHECK-NEXT:  .LBB0_19: @ Parent Loop BB0_14 Depth=1
; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
; CHECK-NEXT:    vstrb.8 q1, [r4], #16
; CHECK-NEXT:    letp lr, .LBB0_19
; CHECK-NEXT:  .LBB0_20: @ %for.cond.backedge.1
; CHECK-NEXT:    @ in Loop: Header=BB0_14 Depth=1
; CHECK-NEXT:    cmp r0, #2
; CHECK-NEXT:    blo .LBB0_23
; CHECK-NEXT:  @ %bb.21: @ %for.body.lr.ph.2
; CHECK-NEXT:    @ in Loop: Header=BB0_14 Depth=1
; CHECK-NEXT:    ldr r3, [r2, #4]
; CHECK-NEXT:    cmp r3, #0
; CHECK-NEXT:    ite ne
; CHECK-NEXT:    ldrbne r3, [r1, #1]
; CHECK-NEXT:    moveq r3, #0
; CHECK-NEXT:    add.w r5, r12, r3
; CHECK-NEXT:    rsb.w r3, r3, #108
; CHECK-NEXT:    add.w r4, r5, #19
; CHECK-NEXT:    wlstp.8 lr, r3, .LBB0_23
; CHECK-NEXT:  .LBB0_22: @ Parent Loop BB0_14 Depth=1
; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
; CHECK-NEXT:    vstrb.8 q2, [r4], #16
; CHECK-NEXT:    letp lr, .LBB0_22
; CHECK-NEXT:  .LBB0_23: @ %for.cond.backedge.2
; CHECK-NEXT:    @ in Loop: Header=BB0_14 Depth=1
; CHECK-NEXT:    cmp r0, #2
; CHECK-NEXT:    blo .LBB0_14
; CHECK-NEXT:    b .LBB0_13
; CHECK-NEXT:  .LBB0_24: @ Parent Loop BB0_14 Depth=1
; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
; CHECK-NEXT:    vstrb.8 q3, [r4], #16
; CHECK-NEXT:    letp lr, .LBB0_24
; CHECK-NEXT:    b .LBB0_14
entry:
  %cmp = icmp ugt i8 %b, 2  ; avoid following BB optimizing away through the domination
  br i1 %cmp, label %for.body.us.preheader, label %for.cond.preheader

for.cond.preheader:                               ; preds = %entry
  %cmp43 = icmp ugt i8 %b, 1
  %arrayidx6 = getelementptr inbounds [3 x i32], ptr %d, i32 0, i32 1
  %arrayidx12 = getelementptr inbounds [3 x i8], ptr %c, i32 0, i32 1
  %cmp43.1 = icmp ugt i8 %b, 1
  %arrayidx6.1 = getelementptr inbounds [3 x i32], ptr %d, i32 0, i32 1
  %arrayidx12.1 = getelementptr inbounds [3 x i8], ptr %c, i32 0, i32 1
  %cmp43.2 = icmp ugt i8 %b, 1
  %arrayidx6.2 = getelementptr inbounds [3 x i32], ptr %d, i32 0, i32 1
  %arrayidx12.2 = getelementptr inbounds [3 x i8], ptr %c, i32 0, i32 1
  %cmp43.3 = icmp ugt i8 %b, 1
  %arrayidx6.3 = getelementptr inbounds [3 x i32], ptr %d, i32 0, i32 1
  %arrayidx12.3 = getelementptr inbounds [3 x i8], ptr %c, i32 0, i32 1
  br label %for.cond

for.body.us.preheader:                            ; preds = %entry
  %arrayidx6.us.1 = getelementptr inbounds [3 x i32], ptr %d, i32 0, i32 1
  %arrayidx12.us.1 = getelementptr inbounds [3 x i8], ptr %c, i32 0, i32 1
  %arrayidx6.us.2 = getelementptr inbounds [3 x i32], ptr %d, i32 0, i32 1
  %arrayidx12.us.2 = getelementptr inbounds [3 x i8], ptr %c, i32 0, i32 1
  %arrayidx6.us.3 = getelementptr inbounds [3 x i32], ptr %d, i32 0, i32 1
  %arrayidx12.us.3 = getelementptr inbounds [3 x i8], ptr %c, i32 0, i32 1
  br label %for.body.us

for.cond:                                         ; preds = %for.cond.backedge.3, %for.cond.preheader
  br i1 %cmp43, label %for.body.lr.ph, label %for.cond.backedge

for.body.lr.ph:                                   ; preds = %for.cond
  %0 = load i32, ptr %arrayidx6, align 4
  %tobool7.not = icmp eq i32 %0, 0
  br i1 %tobool7.not, label %land.end, label %land.rhs

for.body.us:                                      ; preds = %land.end.us.3, %for.body.us.preheader
  %conv44.us = phi i32 [ 0, %for.body.us.preheader ], [ 1, %land.end.us.3 ]
  %arrayidx6.us = getelementptr inbounds [3 x i32], ptr %d, i32 0, i32 %conv44.us
  %1 = load i32, ptr %arrayidx6.us, align 4
  %tobool7.not.us = icmp eq i32 %1, 0
  br i1 %tobool7.not.us, label %land.end.us, label %land.rhs.us

land.rhs.us:                                      ; preds = %for.body.us
  %arrayidx12.us = getelementptr inbounds [3 x i8], ptr %c, i32 0, i32 %conv44.us
  %2 = load i8, ptr %arrayidx12.us, align 1
  %tobool13.us = zext i8 %2 to i32
  br label %land.end.us

land.end.us:                                      ; preds = %land.rhs.us, %for.body.us
  %3 = phi i32 [ 0, %for.body.us ], [ %tobool13.us, %land.rhs.us ]
  %scevgep45 = getelementptr [20 x [23 x [19 x i8]]], ptr @arr_183, i32 0, i32 0, i32 %conv44.us, i32 %3
  %4 = sub nuw nsw i32 108, %3
  call void @llvm.memset.p0.i32(ptr align 1 %scevgep45, i8 0, i32 %4, i1 false)
  %5 = load i32, ptr %arrayidx6.us.1, align 4
  %tobool7.not.us.1 = icmp eq i32 %5, 0
  br i1 %tobool7.not.us.1, label %land.end.us.1, label %land.rhs.us.1

land.rhs:                                         ; preds = %for.body.lr.ph
  %6 = load i8, ptr %arrayidx12, align 1
  %tobool13 = zext i8 %6 to i32
  br label %land.end

land.end:                                         ; preds = %land.rhs, %for.body.lr.ph
  %7 = phi i32 [ 0, %for.body.lr.ph ], [ %tobool13, %land.rhs ]
  %scevgep = getelementptr [20 x [23 x [19 x i8]]], ptr @arr_183, i32 0, i32 0, i32 1, i32 %7
  %8 = sub nuw nsw i32 108, %7
  call void @llvm.memset.p0.i32(ptr align 1 %scevgep, i8 0, i32 %8, i1 false)
  br label %for.cond.backedge

for.cond.backedge:                                ; preds = %land.end, %for.cond
  br i1 %cmp43.1, label %for.body.lr.ph.1, label %for.cond.backedge.1

for.body.lr.ph.1:                                 ; preds = %for.cond.backedge
  %9 = load i32, ptr %arrayidx6.1, align 4
  %tobool7.not.1 = icmp eq i32 %9, 0
  br i1 %tobool7.not.1, label %land.end.1, label %land.rhs.1

land.rhs.1:                                       ; preds = %for.body.lr.ph.1
  %10 = load i8, ptr %arrayidx12.1, align 1
  %tobool13.1 = zext i8 %10 to i32
  br label %land.end.1

land.end.1:                                       ; preds = %land.rhs.1, %for.body.lr.ph.1
  %11 = phi i32 [ 0, %for.body.lr.ph.1 ], [ %tobool13.1, %land.rhs.1 ]
  %scevgep.1 = getelementptr [20 x [23 x [19 x i8]]], ptr @arr_183, i32 0, i32 0, i32 1, i32 %11
  %12 = sub nuw nsw i32 108, %11
  call void @llvm.memset.p0.i32(ptr align 1 %scevgep.1, i8 0, i32 %12, i1 false)
  br label %for.cond.backedge.1

for.cond.backedge.1:                              ; preds = %land.end.1, %for.cond.backedge
  br i1 %cmp43.2, label %for.body.lr.ph.2, label %for.cond.backedge.2

for.body.lr.ph.2:                                 ; preds = %for.cond.backedge.1
  %13 = load i32, ptr %arrayidx6.2, align 4
  %tobool7.not.2 = icmp eq i32 %13, 0
  br i1 %tobool7.not.2, label %land.end.2, label %land.rhs.2

land.rhs.2:                                       ; preds = %for.body.lr.ph.2
  %14 = load i8, ptr %arrayidx12.2, align 1
  %tobool13.2 = zext i8 %14 to i32
  br label %land.end.2

land.end.2:                                       ; preds = %land.rhs.2, %for.body.lr.ph.2
  %15 = phi i32 [ 0, %for.body.lr.ph.2 ], [ %tobool13.2, %land.rhs.2 ]
  %scevgep.2 = getelementptr [20 x [23 x [19 x i8]]], ptr @arr_183, i32 0, i32 0, i32 1, i32 %15
  %16 = sub nuw nsw i32 108, %15
  call void @llvm.memset.p0.i32(ptr align 1 %scevgep.2, i8 0, i32 %16, i1 false)
  br label %for.cond.backedge.2

for.cond.backedge.2:                              ; preds = %land.end.2, %for.cond.backedge.1
  br i1 %cmp43.3, label %for.body.lr.ph.3, label %for.cond.backedge.3

for.body.lr.ph.3:                                 ; preds = %for.cond.backedge.2
  %17 = load i32, ptr %arrayidx6.3, align 4
  %tobool7.not.3 = icmp eq i32 %17, 0
  br i1 %tobool7.not.3, label %land.end.3, label %land.rhs.3

land.rhs.3:                                       ; preds = %for.body.lr.ph.3
  %18 = load i8, ptr %arrayidx12.3, align 1
  %tobool13.3 = zext i8 %18 to i32
  br label %land.end.3

land.end.3:                                       ; preds = %land.rhs.3, %for.body.lr.ph.3
  %19 = phi i32 [ 0, %for.body.lr.ph.3 ], [ %tobool13.3, %land.rhs.3 ]
  %scevgep.3 = getelementptr [20 x [23 x [19 x i8]]], ptr @arr_183, i32 0, i32 0, i32 1, i32 %19
  %20 = sub nuw nsw i32 108, %19
  call void @llvm.memset.p0.i32(ptr align 1 %scevgep.3, i8 0, i32 %20, i1 false)
  br label %for.cond.backedge.3

for.cond.backedge.3:                              ; preds = %land.end.3, %for.cond.backedge.2
  br label %for.cond

land.rhs.us.1:                                    ; preds = %land.end.us
  %21 = load i8, ptr %arrayidx12.us.1, align 1
  %tobool13.us.1 = zext i8 %21 to i32
  br label %land.end.us.1

land.end.us.1:                                    ; preds = %land.rhs.us.1, %land.end.us
  %22 = phi i32 [ 0, %land.end.us ], [ %tobool13.us.1, %land.rhs.us.1 ]
  %scevgep45.1 = getelementptr [20 x [23 x [19 x i8]]], ptr @arr_183, i32 0, i32 0, i32 1, i32 %22
  %23 = sub nuw nsw i32 108, %22
  call void @llvm.memset.p0.i32(ptr align 1 %scevgep45.1, i8 0, i32 %23, i1 false)
  %24 = load i32, ptr %arrayidx6.us.2, align 4
  %tobool7.not.us.2 = icmp eq i32 %24, 0
  br i1 %tobool7.not.us.2, label %land.end.us.2, label %land.rhs.us.2

land.rhs.us.2:                                    ; preds = %land.end.us.1
  %25 = load i8, ptr %arrayidx12.us.2, align 1
  %tobool13.us.2 = zext i8 %25 to i32
  br label %land.end.us.2

land.end.us.2:                                    ; preds = %land.rhs.us.2, %land.end.us.1
  %26 = phi i32 [ 0, %land.end.us.1 ], [ %tobool13.us.2, %land.rhs.us.2 ]
  %scevgep45.2 = getelementptr [20 x [23 x [19 x i8]]], ptr @arr_183, i32 0, i32 0, i32 1, i32 %26
  %27 = sub nuw nsw i32 108, %26
  call void @llvm.memset.p0.i32(ptr align 1 %scevgep45.2, i8 0, i32 %27, i1 false)
  %28 = load i32, ptr %arrayidx6.us.3, align 4
  %tobool7.not.us.3 = icmp eq i32 %28, 0
  br i1 %tobool7.not.us.3, label %land.end.us.3, label %land.rhs.us.3

land.rhs.us.3:                                    ; preds = %land.end.us.2
  %29 = load i8, ptr %arrayidx12.us.3, align 1
  %tobool13.us.3 = zext i8 %29 to i32
  br label %land.end.us.3

land.end.us.3:                                    ; preds = %land.rhs.us.3, %land.end.us.2
  %30 = phi i32 [ 0, %land.end.us.2 ], [ %tobool13.us.3, %land.rhs.us.3 ]
  %scevgep45.3 = getelementptr [20 x [23 x [19 x i8]]], ptr @arr_183, i32 0, i32 0, i32 1, i32 %30
  %31 = sub nuw nsw i32 108, %30
  call void @llvm.memset.p0.i32(ptr align 1 %scevgep45.3, i8 0, i32 %31, i1 false)
  br label %for.body.us
}

declare void @llvm.memset.p0.i32(ptr, i8, i32, i1)