llvm/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -passes=loop-vectorize \
; RUN: -force-tail-folding-style=data-with-evl \
; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
; RUN: -mtriple=riscv64 -mattr=+v -S %s | FileCheck %s --check-prefix=IF-EVL

; RUN: opt -passes=loop-vectorize \
; RUN: -force-tail-folding-style=none \
; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
; RUN: -mtriple=riscv64 -mattr=+v -S %s | FileCheck %s --check-prefix=NO-VP


define void @test_and(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-LABEL: define void @test_and(
; IF-EVL-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0:[0-9]+]] {
; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; IF-EVL:       [[VECTOR_MEMCHECK]]:
; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[B1]], [[A2]]
; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; IF-EVL:       [[VECTOR_PH]]:
; IF-EVL-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 16
; IF-EVL-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP6]], 1
; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP7]]
; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]]
; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; IF-EVL-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 16
; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
; IF-EVL:       [[VECTOR_BODY]]:
; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
; IF-EVL-NEXT:    [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0
; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.vp.and.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
; IF-EVL-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr align 1 [[TMP17]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL:       [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; IF-EVL:       [[SCALAR_PH]]:
; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; IF-EVL-NEXT:    br label %[[LOOP:.*]]
; IF-EVL:       [[LOOP]]:
; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
; IF-EVL-NEXT:    [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; IF-EVL-NEXT:    [[TMP:%.*]] = and i8 [[TMP20]], 1
; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
; IF-EVL-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; IF-EVL:       [[FINISH_LOOPEXIT]]:
; IF-EVL-NEXT:    ret void
;
; NO-VP-LABEL: define void @test_and(
; NO-VP-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0:[0-9]+]] {
; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
; NO-VP-NEXT:    br label %[[LOOP:.*]]
; NO-VP:       [[LOOP]]:
; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
; NO-VP-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; NO-VP-NEXT:    [[TMP:%.*]] = and i8 [[TMP0]], 1
; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
; NO-VP-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
; NO-VP:       [[FINISH_LOOPEXIT]]:
; NO-VP-NEXT:    ret void
;
loop.preheader:
  br label %loop

loop:
  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
  %dec = add nsw i64 %len, 1
  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %len
  %0 = load i8, ptr %arrayidx, align 1
  %tmp = and i8 %0, 1
  %arrayidx1 = getelementptr inbounds i8, ptr %b, i64 %len
  store i8 %tmp, ptr %arrayidx1, align 1
  %.not = icmp eq i64 %dec, 100
  br i1 %.not, label %finish.loopexit, label %loop

finish.loopexit:
  ret void
}

define void @test_or(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-LABEL: define void @test_or(
; IF-EVL-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; IF-EVL:       [[VECTOR_MEMCHECK]]:
; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[B1]], [[A2]]
; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; IF-EVL:       [[VECTOR_PH]]:
; IF-EVL-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 16
; IF-EVL-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP6]], 1
; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP7]]
; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]]
; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; IF-EVL-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 16
; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
; IF-EVL:       [[VECTOR_BODY]]:
; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
; IF-EVL-NEXT:    [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0
; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.vp.or.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
; IF-EVL-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr align 1 [[TMP17]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; IF-EVL:       [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; IF-EVL:       [[SCALAR_PH]]:
; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; IF-EVL-NEXT:    br label %[[LOOP:.*]]
; IF-EVL:       [[LOOP]]:
; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
; IF-EVL-NEXT:    [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; IF-EVL-NEXT:    [[TMP:%.*]] = or i8 [[TMP20]], 1
; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
; IF-EVL-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; IF-EVL:       [[FINISH_LOOPEXIT]]:
; IF-EVL-NEXT:    ret void
;
; NO-VP-LABEL: define void @test_or(
; NO-VP-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
; NO-VP-NEXT:    br label %[[LOOP:.*]]
; NO-VP:       [[LOOP]]:
; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
; NO-VP-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; NO-VP-NEXT:    [[TMP:%.*]] = or i8 [[TMP0]], 1
; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
; NO-VP-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
; NO-VP:       [[FINISH_LOOPEXIT]]:
; NO-VP-NEXT:    ret void
;
loop.preheader:
  br label %loop

loop:
  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
  %dec = add nsw i64 %len, 1
  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %len
  %0 = load i8, ptr %arrayidx, align 1
  %tmp = or i8 %0, 1
  %arrayidx1 = getelementptr inbounds i8, ptr %b, i64 %len
  store i8 %tmp, ptr %arrayidx1, align 1
  %.not = icmp eq i64 %dec, 100
  br i1 %.not, label %finish.loopexit, label %loop

finish.loopexit:
  ret void
}

define void @test_xor(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-LABEL: define void @test_xor(
; IF-EVL-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; IF-EVL:       [[VECTOR_MEMCHECK]]:
; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[B1]], [[A2]]
; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; IF-EVL:       [[VECTOR_PH]]:
; IF-EVL-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 16
; IF-EVL-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP6]], 1
; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP7]]
; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]]
; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; IF-EVL-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 16
; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
; IF-EVL:       [[VECTOR_BODY]]:
; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
; IF-EVL-NEXT:    [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0
; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.vp.xor.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
; IF-EVL-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr align 1 [[TMP17]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; IF-EVL:       [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; IF-EVL:       [[SCALAR_PH]]:
; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; IF-EVL-NEXT:    br label %[[LOOP:.*]]
; IF-EVL:       [[LOOP]]:
; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
; IF-EVL-NEXT:    [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; IF-EVL-NEXT:    [[TMP:%.*]] = xor i8 [[TMP20]], 1
; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
; IF-EVL-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
; IF-EVL:       [[FINISH_LOOPEXIT]]:
; IF-EVL-NEXT:    ret void
;
; NO-VP-LABEL: define void @test_xor(
; NO-VP-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
; NO-VP-NEXT:    br label %[[LOOP:.*]]
; NO-VP:       [[LOOP]]:
; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
; NO-VP-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; NO-VP-NEXT:    [[TMP:%.*]] = xor i8 [[TMP0]], 1
; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
; NO-VP-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
; NO-VP:       [[FINISH_LOOPEXIT]]:
; NO-VP-NEXT:    ret void
;
loop.preheader:
  br label %loop

loop:
  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
  %dec = add nsw i64 %len, 1
  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %len
  %0 = load i8, ptr %arrayidx, align 1
  %tmp = xor i8 %0, 1
  %arrayidx1 = getelementptr inbounds i8, ptr %b, i64 %len
  store i8 %tmp, ptr %arrayidx1, align 1
  %.not = icmp eq i64 %dec, 100
  br i1 %.not, label %finish.loopexit, label %loop

finish.loopexit:
  ret void
}

define void @test_shl(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-LABEL: define void @test_shl(
; IF-EVL-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; IF-EVL:       [[VECTOR_MEMCHECK]]:
; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[B1]], [[A2]]
; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; IF-EVL:       [[VECTOR_PH]]:
; IF-EVL-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 16
; IF-EVL-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP6]], 1
; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP7]]
; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]]
; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; IF-EVL-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 16
; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
; IF-EVL:       [[VECTOR_BODY]]:
; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
; IF-EVL-NEXT:    [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0
; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.vp.shl.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
; IF-EVL-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr align 1 [[TMP17]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; IF-EVL:       [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; IF-EVL:       [[SCALAR_PH]]:
; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; IF-EVL-NEXT:    br label %[[LOOP:.*]]
; IF-EVL:       [[LOOP]]:
; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
; IF-EVL-NEXT:    [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; IF-EVL-NEXT:    [[TMP:%.*]] = shl i8 [[TMP20]], 1
; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
; IF-EVL-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
; IF-EVL:       [[FINISH_LOOPEXIT]]:
; IF-EVL-NEXT:    ret void
;
; NO-VP-LABEL: define void @test_shl(
; NO-VP-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
; NO-VP-NEXT:    br label %[[LOOP:.*]]
; NO-VP:       [[LOOP]]:
; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
; NO-VP-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; NO-VP-NEXT:    [[TMP:%.*]] = shl i8 [[TMP0]], 1
; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
; NO-VP-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
; NO-VP:       [[FINISH_LOOPEXIT]]:
; NO-VP-NEXT:    ret void
;
loop.preheader:
  br label %loop

loop:
  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
  %dec = add nsw i64 %len, 1
  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %len
  %0 = load i8, ptr %arrayidx, align 1
  %tmp = shl i8 %0, 1
  %arrayidx1 = getelementptr inbounds i8, ptr %b, i64 %len
  store i8 %tmp, ptr %arrayidx1, align 1
  %.not = icmp eq i64 %dec, 100
  br i1 %.not, label %finish.loopexit, label %loop

finish.loopexit:
  ret void
}

define void @test_lshr(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-LABEL: define void @test_lshr(
; IF-EVL-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; IF-EVL:       [[VECTOR_MEMCHECK]]:
; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[B1]], [[A2]]
; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; IF-EVL:       [[VECTOR_PH]]:
; IF-EVL-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 16
; IF-EVL-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP6]], 1
; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP7]]
; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]]
; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; IF-EVL-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 16
; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
; IF-EVL:       [[VECTOR_BODY]]:
; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
; IF-EVL-NEXT:    [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0
; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.vp.lshr.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
; IF-EVL-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr align 1 [[TMP17]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; IF-EVL:       [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; IF-EVL:       [[SCALAR_PH]]:
; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; IF-EVL-NEXT:    br label %[[LOOP:.*]]
; IF-EVL:       [[LOOP]]:
; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
; IF-EVL-NEXT:    [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; IF-EVL-NEXT:    [[TMP:%.*]] = lshr i8 [[TMP20]], 1
; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
; IF-EVL-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
; IF-EVL:       [[FINISH_LOOPEXIT]]:
; IF-EVL-NEXT:    ret void
;
; NO-VP-LABEL: define void @test_lshr(
; NO-VP-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
; NO-VP-NEXT:    br label %[[LOOP:.*]]
; NO-VP:       [[LOOP]]:
; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
; NO-VP-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; NO-VP-NEXT:    [[TMP:%.*]] = lshr i8 [[TMP0]], 1
; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
; NO-VP-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
; NO-VP:       [[FINISH_LOOPEXIT]]:
; NO-VP-NEXT:    ret void
;
loop.preheader:
  br label %loop

loop:
  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
  %dec = add nsw i64 %len, 1
  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %len
  %0 = load i8, ptr %arrayidx, align 1
  %tmp = lshr i8 %0, 1
  %arrayidx1 = getelementptr inbounds i8, ptr %b, i64 %len
  store i8 %tmp, ptr %arrayidx1, align 1
  %.not = icmp eq i64 %dec, 100
  br i1 %.not, label %finish.loopexit, label %loop

finish.loopexit:
  ret void
}

define void @test_ashr(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-LABEL: define void @test_ashr(
; IF-EVL-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; IF-EVL:       [[VECTOR_MEMCHECK]]:
; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[B1]], [[A2]]
; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; IF-EVL:       [[VECTOR_PH]]:
; IF-EVL-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 16
; IF-EVL-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP6]], 1
; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP7]]
; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]]
; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; IF-EVL-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 16
; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
; IF-EVL:       [[VECTOR_BODY]]:
; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
; IF-EVL-NEXT:    [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0
; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.vp.ashr.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
; IF-EVL-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr align 1 [[TMP17]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; IF-EVL:       [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; IF-EVL:       [[SCALAR_PH]]:
; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; IF-EVL-NEXT:    br label %[[LOOP:.*]]
; IF-EVL:       [[LOOP]]:
; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
; IF-EVL-NEXT:    [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; IF-EVL-NEXT:    [[TMP:%.*]] = ashr i8 [[TMP20]], 1
; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
; IF-EVL-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
; IF-EVL:       [[FINISH_LOOPEXIT]]:
; IF-EVL-NEXT:    ret void
;
; NO-VP-LABEL: define void @test_ashr(
; NO-VP-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
; NO-VP-NEXT:    br label %[[LOOP:.*]]
; NO-VP:       [[LOOP]]:
; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
; NO-VP-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; NO-VP-NEXT:    [[TMP:%.*]] = ashr i8 [[TMP0]], 1
; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
; NO-VP-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
; NO-VP:       [[FINISH_LOOPEXIT]]:
; NO-VP-NEXT:    ret void
;
loop.preheader:
  br label %loop

loop:
  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
  %dec = add nsw i64 %len, 1
  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %len
  %0 = load i8, ptr %arrayidx, align 1
  %tmp = ashr i8 %0, 1
  %arrayidx1 = getelementptr inbounds i8, ptr %b, i64 %len
  store i8 %tmp, ptr %arrayidx1, align 1
  %.not = icmp eq i64 %dec, 100
  br i1 %.not, label %finish.loopexit, label %loop

finish.loopexit:
  ret void
}

define void @test_add(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-LABEL: define void @test_add(
; IF-EVL-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; IF-EVL:       [[VECTOR_MEMCHECK]]:
; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[B1]], [[A2]]
; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; IF-EVL:       [[VECTOR_PH]]:
; IF-EVL-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 16
; IF-EVL-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP6]], 1
; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP7]]
; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]]
; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; IF-EVL-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 16
; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
; IF-EVL:       [[VECTOR_BODY]]:
; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
; IF-EVL-NEXT:    [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0
; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
; IF-EVL-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr align 1 [[TMP17]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; IF-EVL:       [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; IF-EVL:       [[SCALAR_PH]]:
; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; IF-EVL-NEXT:    br label %[[LOOP:.*]]
; IF-EVL:       [[LOOP]]:
; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
; IF-EVL-NEXT:    [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; IF-EVL-NEXT:    [[TMP:%.*]] = add i8 [[TMP20]], 1
; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
; IF-EVL-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
; IF-EVL:       [[FINISH_LOOPEXIT]]:
; IF-EVL-NEXT:    ret void
;
; NO-VP-LABEL: define void @test_add(
; NO-VP-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
; NO-VP-NEXT:    br label %[[LOOP:.*]]
; NO-VP:       [[LOOP]]:
; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
; NO-VP-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; NO-VP-NEXT:    [[TMP:%.*]] = add i8 [[TMP0]], 1
; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
; NO-VP-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
; NO-VP:       [[FINISH_LOOPEXIT]]:
; NO-VP-NEXT:    ret void
;
loop.preheader:
  br label %loop

loop:
  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
  %dec = add nsw i64 %len, 1
  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %len
  %0 = load i8, ptr %arrayidx, align 1
  %tmp = add i8 %0, 1
  %arrayidx1 = getelementptr inbounds i8, ptr %b, i64 %len
  store i8 %tmp, ptr %arrayidx1, align 1
  %.not = icmp eq i64 %dec, 100
  br i1 %.not, label %finish.loopexit, label %loop

finish.loopexit:
  ret void
}

define void @test_sub(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-LABEL: define void @test_sub(
; IF-EVL-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; IF-EVL:       [[VECTOR_MEMCHECK]]:
; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[B1]], [[A2]]
; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; IF-EVL:       [[VECTOR_PH]]:
; IF-EVL-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 16
; IF-EVL-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP6]], 1
; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP7]]
; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]]
; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; IF-EVL-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 16
; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
; IF-EVL:       [[VECTOR_BODY]]:
; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
; IF-EVL-NEXT:    [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0
; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.vp.sub.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
; IF-EVL-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr align 1 [[TMP17]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; IF-EVL:       [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; IF-EVL:       [[SCALAR_PH]]:
; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; IF-EVL-NEXT:    br label %[[LOOP:.*]]
; IF-EVL:       [[LOOP]]:
; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
; IF-EVL-NEXT:    [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; IF-EVL-NEXT:    [[TMP:%.*]] = sub i8 [[TMP20]], 1
; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
; IF-EVL-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP17:![0-9]+]]
; IF-EVL:       [[FINISH_LOOPEXIT]]:
; IF-EVL-NEXT:    ret void
;
; NO-VP-LABEL: define void @test_sub(
; NO-VP-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
; NO-VP-NEXT:    br label %[[LOOP:.*]]
; NO-VP:       [[LOOP]]:
; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
; NO-VP-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; NO-VP-NEXT:    [[TMP:%.*]] = sub i8 [[TMP0]], 1
; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
; NO-VP-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
; NO-VP:       [[FINISH_LOOPEXIT]]:
; NO-VP-NEXT:    ret void
;
loop.preheader:
  br label %loop

loop:
  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
  %dec = add nsw i64 %len, 1
  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %len
  %0 = load i8, ptr %arrayidx, align 1
  %tmp = sub i8 %0, 1
  %arrayidx1 = getelementptr inbounds i8, ptr %b, i64 %len
  store i8 %tmp, ptr %arrayidx1, align 1
  %.not = icmp eq i64 %dec, 100
  br i1 %.not, label %finish.loopexit, label %loop

finish.loopexit:
  ret void
}

define void @test_mul(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-LABEL: define void @test_mul(
; IF-EVL-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; IF-EVL:       [[VECTOR_MEMCHECK]]:
; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[B1]], [[A2]]
; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; IF-EVL:       [[VECTOR_PH]]:
; IF-EVL-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 16
; IF-EVL-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP6]], 1
; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP7]]
; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]]
; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; IF-EVL-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 16
; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
; IF-EVL:       [[VECTOR_BODY]]:
; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
; IF-EVL-NEXT:    [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0
; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 3, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
; IF-EVL-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr align 1 [[TMP17]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; IF-EVL:       [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; IF-EVL:       [[SCALAR_PH]]:
; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; IF-EVL-NEXT:    br label %[[LOOP:.*]]
; IF-EVL:       [[LOOP]]:
; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
; IF-EVL-NEXT:    [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; IF-EVL-NEXT:    [[TMP:%.*]] = mul i8 [[TMP20]], 3
; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
; IF-EVL-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP19:![0-9]+]]
; IF-EVL:       [[FINISH_LOOPEXIT]]:
; IF-EVL-NEXT:    ret void
;
; NO-VP-LABEL: define void @test_mul(
; NO-VP-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
; NO-VP-NEXT:    br label %[[LOOP:.*]]
; NO-VP:       [[LOOP]]:
; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
; NO-VP-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; NO-VP-NEXT:    [[TMP:%.*]] = mul i8 [[TMP0]], 3
; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
; NO-VP-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
; NO-VP:       [[FINISH_LOOPEXIT]]:
; NO-VP-NEXT:    ret void
;
loop.preheader:
  br label %loop

loop:
  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
  %dec = add nsw i64 %len, 1
  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %len
  %0 = load i8, ptr %arrayidx, align 1
  %tmp = mul i8 %0, 3
  %arrayidx1 = getelementptr inbounds i8, ptr %b, i64 %len
  store i8 %tmp, ptr %arrayidx1, align 1
  %.not = icmp eq i64 %dec, 100
  br i1 %.not, label %finish.loopexit, label %loop

finish.loopexit:
  ret void
}

define void @test_sdiv(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-LABEL: define void @test_sdiv(
; IF-EVL-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; IF-EVL:       [[VECTOR_MEMCHECK]]:
; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[B1]], [[A2]]
; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; IF-EVL:       [[VECTOR_PH]]:
; IF-EVL-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 16
; IF-EVL-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP6]], 1
; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP7]]
; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]]
; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; IF-EVL-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 16
; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
; IF-EVL:       [[VECTOR_BODY]]:
; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
; IF-EVL-NEXT:    [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0
; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.vp.sdiv.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 3, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
; IF-EVL-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr align 1 [[TMP17]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; IF-EVL:       [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; IF-EVL:       [[SCALAR_PH]]:
; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; IF-EVL-NEXT:    br label %[[LOOP:.*]]
; IF-EVL:       [[LOOP]]:
; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
; IF-EVL-NEXT:    [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; IF-EVL-NEXT:    [[TMP:%.*]] = sdiv i8 [[TMP20]], 3
; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
; IF-EVL-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP21:![0-9]+]]
; IF-EVL:       [[FINISH_LOOPEXIT]]:
; IF-EVL-NEXT:    ret void
;
; NO-VP-LABEL: define void @test_sdiv(
; NO-VP-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
; NO-VP-NEXT:    br label %[[LOOP:.*]]
; NO-VP:       [[LOOP]]:
; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
; NO-VP-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; NO-VP-NEXT:    [[TMP:%.*]] = sdiv i8 [[TMP0]], 3
; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
; NO-VP-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
; NO-VP:       [[FINISH_LOOPEXIT]]:
; NO-VP-NEXT:    ret void
;
loop.preheader:
  br label %loop

loop:
  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
  %dec = add nsw i64 %len, 1
  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %len
  %0 = load i8, ptr %arrayidx, align 1
  %tmp = sdiv i8 %0, 3
  %arrayidx1 = getelementptr inbounds i8, ptr %b, i64 %len
  store i8 %tmp, ptr %arrayidx1, align 1
  %.not = icmp eq i64 %dec, 100
  br i1 %.not, label %finish.loopexit, label %loop

finish.loopexit:
  ret void
}

define void @test_udiv(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-LABEL: define void @test_udiv(
; IF-EVL-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; IF-EVL:       [[VECTOR_MEMCHECK]]:
; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[B1]], [[A2]]
; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; IF-EVL:       [[VECTOR_PH]]:
; IF-EVL-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 16
; IF-EVL-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP6]], 1
; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP7]]
; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]]
; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; IF-EVL-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 16
; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
; IF-EVL:       [[VECTOR_BODY]]:
; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
; IF-EVL-NEXT:    [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0
; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.vp.udiv.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 3, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
; IF-EVL-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr align 1 [[TMP17]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; IF-EVL:       [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; IF-EVL:       [[SCALAR_PH]]:
; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; IF-EVL-NEXT:    br label %[[LOOP:.*]]
; IF-EVL:       [[LOOP]]:
; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
; IF-EVL-NEXT:    [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; IF-EVL-NEXT:    [[TMP:%.*]] = udiv i8 [[TMP20]], 3
; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
; IF-EVL-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP23:![0-9]+]]
; IF-EVL:       [[FINISH_LOOPEXIT]]:
; IF-EVL-NEXT:    ret void
;
; NO-VP-LABEL: define void @test_udiv(
; NO-VP-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
; NO-VP-NEXT:    br label %[[LOOP:.*]]
; NO-VP:       [[LOOP]]:
; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
; NO-VP-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; NO-VP-NEXT:    [[TMP:%.*]] = udiv i8 [[TMP0]], 3
; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
; NO-VP-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
; NO-VP:       [[FINISH_LOOPEXIT]]:
; NO-VP-NEXT:    ret void
;
loop.preheader:
  br label %loop

loop:
  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
  %dec = add nsw i64 %len, 1
  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %len
  %0 = load i8, ptr %arrayidx, align 1
  %tmp = udiv i8 %0, 3
  %arrayidx1 = getelementptr inbounds i8, ptr %b, i64 %len
  store i8 %tmp, ptr %arrayidx1, align 1
  %.not = icmp eq i64 %dec, 100
  br i1 %.not, label %finish.loopexit, label %loop

finish.loopexit:
  ret void
}

define void @test_srem(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-LABEL: define void @test_srem(
; IF-EVL-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; IF-EVL:       [[VECTOR_MEMCHECK]]:
; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[B1]], [[A2]]
; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; IF-EVL:       [[VECTOR_PH]]:
; IF-EVL-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 16
; IF-EVL-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP6]], 1
; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP7]]
; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]]
; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; IF-EVL-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 16
; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
; IF-EVL:       [[VECTOR_BODY]]:
; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
; IF-EVL-NEXT:    [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0
; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.vp.srem.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 3, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
; IF-EVL-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr align 1 [[TMP17]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; IF-EVL:       [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; IF-EVL:       [[SCALAR_PH]]:
; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; IF-EVL-NEXT:    br label %[[LOOP:.*]]
; IF-EVL:       [[LOOP]]:
; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
; IF-EVL-NEXT:    [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; IF-EVL-NEXT:    [[TMP:%.*]] = srem i8 [[TMP20]], 3
; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
; IF-EVL-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP25:![0-9]+]]
; IF-EVL:       [[FINISH_LOOPEXIT]]:
; IF-EVL-NEXT:    ret void
;
; NO-VP-LABEL: define void @test_srem(
; NO-VP-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
; NO-VP-NEXT:    br label %[[LOOP:.*]]
; NO-VP:       [[LOOP]]:
; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
; NO-VP-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; NO-VP-NEXT:    [[TMP:%.*]] = srem i8 [[TMP0]], 3
; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
; NO-VP-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
; NO-VP:       [[FINISH_LOOPEXIT]]:
; NO-VP-NEXT:    ret void
;
loop.preheader:
  br label %loop

loop:
  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
  %dec = add nsw i64 %len, 1
  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %len
  %0 = load i8, ptr %arrayidx, align 1
  %tmp = srem i8 %0, 3
  %arrayidx1 = getelementptr inbounds i8, ptr %b, i64 %len
  store i8 %tmp, ptr %arrayidx1, align 1
  %.not = icmp eq i64 %dec, 100
  br i1 %.not, label %finish.loopexit, label %loop

finish.loopexit:
  ret void
}

define void @test_urem(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-LABEL: define void @test_urem(
; IF-EVL-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; IF-EVL:       [[VECTOR_MEMCHECK]]:
; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[B1]], [[A2]]
; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; IF-EVL:       [[VECTOR_PH]]:
; IF-EVL-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 16
; IF-EVL-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP6]], 1
; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP7]]
; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]]
; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; IF-EVL-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 16
; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
; IF-EVL:       [[VECTOR_BODY]]:
; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
; IF-EVL-NEXT:    [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0
; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.vp.urem.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 3, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
; IF-EVL-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr align 1 [[TMP17]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; IF-EVL:       [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; IF-EVL:       [[SCALAR_PH]]:
; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; IF-EVL-NEXT:    br label %[[LOOP:.*]]
; IF-EVL:       [[LOOP]]:
; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
; IF-EVL-NEXT:    [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; IF-EVL-NEXT:    [[TMP:%.*]] = urem i8 [[TMP20]], 3
; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
; IF-EVL-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP27:![0-9]+]]
; IF-EVL:       [[FINISH_LOOPEXIT]]:
; IF-EVL-NEXT:    ret void
;
; NO-VP-LABEL: define void @test_urem(
; NO-VP-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
; NO-VP-NEXT:    br label %[[LOOP:.*]]
; NO-VP:       [[LOOP]]:
; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
; NO-VP-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; NO-VP-NEXT:    [[TMP:%.*]] = urem i8 [[TMP0]], 3
; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
; NO-VP-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
; NO-VP:       [[FINISH_LOOPEXIT]]:
; NO-VP-NEXT:    ret void
;
loop.preheader:
  br label %loop

loop:
  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
  %dec = add nsw i64 %len, 1
  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %len
  %0 = load i8, ptr %arrayidx, align 1
  %tmp = urem i8 %0, 3
  %arrayidx1 = getelementptr inbounds i8, ptr %b, i64 %len
  store i8 %tmp, ptr %arrayidx1, align 1
  %.not = icmp eq i64 %dec, 100
  br i1 %.not, label %finish.loopexit, label %loop

finish.loopexit:
  ret void
}

; Floating point tests

define void @test_fadd(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-LABEL: define void @test_fadd(
; IF-EVL-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; IF-EVL:       [[VECTOR_MEMCHECK]]:
; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
; IF-EVL-NEXT:    [[TMP3:%.*]] = sub i64 [[B1]], [[A2]]
; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]]
; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; IF-EVL:       [[VECTOR_PH]]:
; IF-EVL-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP6]], 4
; IF-EVL-NEXT:    [[TMP8:%.*]] = sub i64 [[TMP7]], 1
; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP8]]
; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP7]]
; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; IF-EVL-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP10:%.*]] = mul i64 [[TMP9]], 4
; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
; IF-EVL:       [[VECTOR_BODY]]:
; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[TMP11:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true)
; IF-EVL-NEXT:    [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0
; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]]
; IF-EVL-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
; IF-EVL-NEXT:    [[TMP16:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
; IF-EVL-NEXT:    [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0
; IF-EVL-NEXT:    call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[TMP16]], ptr align 4 [[TMP18]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
; IF-EVL-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]]
; IF-EVL-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IF-EVL-NEXT:    br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
; IF-EVL:       [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; IF-EVL:       [[SCALAR_PH]]:
; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; IF-EVL-NEXT:    br label %[[LOOP:.*]]
; IF-EVL:       [[LOOP]]:
; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[LEN]]
; IF-EVL-NEXT:    [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; IF-EVL-NEXT:    [[TMP:%.*]] = fadd fast float [[TMP21]], 3.000000e+00
; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[LEN]]
; IF-EVL-NEXT:    store float [[TMP]], ptr [[ARRAYIDX1]], align 4
; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP29:![0-9]+]]
; IF-EVL:       [[FINISH_LOOPEXIT]]:
; IF-EVL-NEXT:    ret void
;
; NO-VP-LABEL: define void @test_fadd(
; NO-VP-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
; NO-VP-NEXT:    br label %[[LOOP:.*]]
; NO-VP:       [[LOOP]]:
; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[LEN]]
; NO-VP-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; NO-VP-NEXT:    [[TMP:%.*]] = fadd fast float [[TMP0]], 3.000000e+00
; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[LEN]]
; NO-VP-NEXT:    store float [[TMP]], ptr [[ARRAYIDX1]], align 4
; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
; NO-VP:       [[FINISH_LOOPEXIT]]:
; NO-VP-NEXT:    ret void
;
loop.preheader:
  br label %loop

loop:
  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
  %dec = add nsw i64 %len, 1
  %arrayidx = getelementptr inbounds float, ptr %a, i64 %len
  %0 = load float, ptr %arrayidx, align 4
  %tmp = fadd fast float %0, 3.000000e+00
  %arrayidx1 = getelementptr inbounds float, ptr %b, i64 %len
  store float %tmp, ptr %arrayidx1, align 4
  %.not = icmp eq i64 %dec, 100
  br i1 %.not, label %finish.loopexit, label %loop

finish.loopexit:
  ret void
}

define void @test_fsub(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-LABEL: define void @test_fsub(
; IF-EVL-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; IF-EVL:       [[VECTOR_MEMCHECK]]:
; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
; IF-EVL-NEXT:    [[TMP3:%.*]] = sub i64 [[B1]], [[A2]]
; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]]
; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; IF-EVL:       [[VECTOR_PH]]:
; IF-EVL-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP6]], 4
; IF-EVL-NEXT:    [[TMP8:%.*]] = sub i64 [[TMP7]], 1
; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP8]]
; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP7]]
; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; IF-EVL-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP10:%.*]] = mul i64 [[TMP9]], 4
; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
; IF-EVL:       [[VECTOR_BODY]]:
; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[TMP11:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true)
; IF-EVL-NEXT:    [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0
; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]]
; IF-EVL-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
; IF-EVL-NEXT:    [[TMP16:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fsub.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
; IF-EVL-NEXT:    [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0
; IF-EVL-NEXT:    call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[TMP16]], ptr align 4 [[TMP18]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
; IF-EVL-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]]
; IF-EVL-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IF-EVL-NEXT:    br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
; IF-EVL:       [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; IF-EVL:       [[SCALAR_PH]]:
; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; IF-EVL-NEXT:    br label %[[LOOP:.*]]
; IF-EVL:       [[LOOP]]:
; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[LEN]]
; IF-EVL-NEXT:    [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; IF-EVL-NEXT:    [[TMP:%.*]] = fsub fast float [[TMP21]], 3.000000e+00
; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[LEN]]
; IF-EVL-NEXT:    store float [[TMP]], ptr [[ARRAYIDX1]], align 4
; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP31:![0-9]+]]
; IF-EVL:       [[FINISH_LOOPEXIT]]:
; IF-EVL-NEXT:    ret void
;
; NO-VP-LABEL: define void @test_fsub(
; NO-VP-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
; NO-VP-NEXT:    br label %[[LOOP:.*]]
; NO-VP:       [[LOOP]]:
; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[LEN]]
; NO-VP-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; NO-VP-NEXT:    [[TMP:%.*]] = fsub fast float [[TMP0]], 3.000000e+00
; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[LEN]]
; NO-VP-NEXT:    store float [[TMP]], ptr [[ARRAYIDX1]], align 4
; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
; NO-VP:       [[FINISH_LOOPEXIT]]:
; NO-VP-NEXT:    ret void
;
loop.preheader:
  br label %loop

loop:
  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
  %dec = add nsw i64 %len, 1
  %arrayidx = getelementptr inbounds float, ptr %a, i64 %len
  %0 = load float, ptr %arrayidx, align 4
  %tmp = fsub fast float %0, 3.000000e+00
  %arrayidx1 = getelementptr inbounds float, ptr %b, i64 %len
  store float %tmp, ptr %arrayidx1, align 4
  %.not = icmp eq i64 %dec, 100
  br i1 %.not, label %finish.loopexit, label %loop

finish.loopexit:
  ret void
}

define void @test_fmul(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-LABEL: define void @test_fmul(
; IF-EVL-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; IF-EVL:       [[VECTOR_MEMCHECK]]:
; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
; IF-EVL-NEXT:    [[TMP3:%.*]] = sub i64 [[B1]], [[A2]]
; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]]
; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; IF-EVL:       [[VECTOR_PH]]:
; IF-EVL-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP6]], 4
; IF-EVL-NEXT:    [[TMP8:%.*]] = sub i64 [[TMP7]], 1
; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP8]]
; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP7]]
; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; IF-EVL-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP10:%.*]] = mul i64 [[TMP9]], 4
; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
; IF-EVL:       [[VECTOR_BODY]]:
; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[TMP11:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true)
; IF-EVL-NEXT:    [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0
; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]]
; IF-EVL-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
; IF-EVL-NEXT:    [[TMP16:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fmul.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
; IF-EVL-NEXT:    [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0
; IF-EVL-NEXT:    call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[TMP16]], ptr align 4 [[TMP18]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
; IF-EVL-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]]
; IF-EVL-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IF-EVL-NEXT:    br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
; IF-EVL:       [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; IF-EVL:       [[SCALAR_PH]]:
; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; IF-EVL-NEXT:    br label %[[LOOP:.*]]
; IF-EVL:       [[LOOP]]:
; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[LEN]]
; IF-EVL-NEXT:    [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; IF-EVL-NEXT:    [[TMP:%.*]] = fmul fast float [[TMP21]], 3.000000e+00
; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[LEN]]
; IF-EVL-NEXT:    store float [[TMP]], ptr [[ARRAYIDX1]], align 4
; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP33:![0-9]+]]
; IF-EVL:       [[FINISH_LOOPEXIT]]:
; IF-EVL-NEXT:    ret void
;
; NO-VP-LABEL: define void @test_fmul(
; NO-VP-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
; NO-VP-NEXT:    br label %[[LOOP:.*]]
; NO-VP:       [[LOOP]]:
; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[LEN]]
; NO-VP-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; NO-VP-NEXT:    [[TMP:%.*]] = fmul fast float [[TMP0]], 3.000000e+00
; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[LEN]]
; NO-VP-NEXT:    store float [[TMP]], ptr [[ARRAYIDX1]], align 4
; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
; NO-VP:       [[FINISH_LOOPEXIT]]:
; NO-VP-NEXT:    ret void
;
loop.preheader:
  br label %loop

loop:
  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
  %dec = add nsw i64 %len, 1
  %arrayidx = getelementptr inbounds float, ptr %a, i64 %len
  %0 = load float, ptr %arrayidx, align 4
  %tmp = fmul fast float %0, 3.000000e+00
  %arrayidx1 = getelementptr inbounds float, ptr %b, i64 %len
  store float %tmp, ptr %arrayidx1, align 4
  %.not = icmp eq i64 %dec, 100
  br i1 %.not, label %finish.loopexit, label %loop

finish.loopexit:
  ret void
}

define void @test_fdiv(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-LABEL: define void @test_fdiv(
; IF-EVL-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; IF-EVL:       [[VECTOR_MEMCHECK]]:
; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
; IF-EVL-NEXT:    [[TMP3:%.*]] = sub i64 [[B1]], [[A2]]
; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]]
; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; IF-EVL:       [[VECTOR_PH]]:
; IF-EVL-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP6]], 4
; IF-EVL-NEXT:    [[TMP8:%.*]] = sub i64 [[TMP7]], 1
; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP8]]
; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP7]]
; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; IF-EVL-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP10:%.*]] = mul i64 [[TMP9]], 4
; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
; IF-EVL:       [[VECTOR_BODY]]:
; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[TMP11:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true)
; IF-EVL-NEXT:    [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0
; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]]
; IF-EVL-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
; IF-EVL-NEXT:    [[TMP16:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fdiv.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
; IF-EVL-NEXT:    [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0
; IF-EVL-NEXT:    call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[TMP16]], ptr align 4 [[TMP18]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
; IF-EVL-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]]
; IF-EVL-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IF-EVL-NEXT:    br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
; IF-EVL:       [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; IF-EVL:       [[SCALAR_PH]]:
; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; IF-EVL-NEXT:    br label %[[LOOP:.*]]
; IF-EVL:       [[LOOP]]:
; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[LEN]]
; IF-EVL-NEXT:    [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; IF-EVL-NEXT:    [[TMP:%.*]] = fdiv fast float [[TMP21]], 3.000000e+00
; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[LEN]]
; IF-EVL-NEXT:    store float [[TMP]], ptr [[ARRAYIDX1]], align 4
; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP35:![0-9]+]]
; IF-EVL:       [[FINISH_LOOPEXIT]]:
; IF-EVL-NEXT:    ret void
;
; NO-VP-LABEL: define void @test_fdiv(
; NO-VP-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
; NO-VP-NEXT:    br label %[[LOOP:.*]]
; NO-VP:       [[LOOP]]:
; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[LEN]]
; NO-VP-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; NO-VP-NEXT:    [[TMP:%.*]] = fdiv fast float [[TMP0]], 3.000000e+00
; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[LEN]]
; NO-VP-NEXT:    store float [[TMP]], ptr [[ARRAYIDX1]], align 4
; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
; NO-VP:       [[FINISH_LOOPEXIT]]:
; NO-VP-NEXT:    ret void
;
loop.preheader:
  br label %loop

loop:
  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
  %dec = add nsw i64 %len, 1
  %arrayidx = getelementptr inbounds float, ptr %a, i64 %len
  %0 = load float, ptr %arrayidx, align 4
  %tmp = fdiv fast float %0, 3.000000e+00
  %arrayidx1 = getelementptr inbounds float, ptr %b, i64 %len
  store float %tmp, ptr %arrayidx1, align 4
  %.not = icmp eq i64 %dec, 100
  br i1 %.not, label %finish.loopexit, label %loop

finish.loopexit:
  ret void
}

define void @test_frem(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-LABEL: define void @test_frem(
; IF-EVL-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
; IF-EVL-NEXT:    br label %[[LOOP:.*]]
; IF-EVL:       [[LOOP]]:
; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[LEN]]
; IF-EVL-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; IF-EVL-NEXT:    [[TMP:%.*]] = frem fast float [[TMP0]], 3.000000e+00
; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[LEN]]
; IF-EVL-NEXT:    store float [[TMP]], ptr [[ARRAYIDX1]], align 4
; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
; IF-EVL:       [[FINISH_LOOPEXIT]]:
; IF-EVL-NEXT:    ret void
;
; NO-VP-LABEL: define void @test_frem(
; NO-VP-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
; NO-VP-NEXT:    br label %[[LOOP:.*]]
; NO-VP:       [[LOOP]]:
; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[LEN]]
; NO-VP-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; NO-VP-NEXT:    [[TMP:%.*]] = frem fast float [[TMP0]], 3.000000e+00
; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[LEN]]
; NO-VP-NEXT:    store float [[TMP]], ptr [[ARRAYIDX1]], align 4
; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
; NO-VP:       [[FINISH_LOOPEXIT]]:
; NO-VP-NEXT:    ret void
;
loop.preheader:
  br label %loop

loop:
  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
  %dec = add nsw i64 %len, 1
  %arrayidx = getelementptr inbounds float, ptr %a, i64 %len
  %0 = load float, ptr %arrayidx, align 4
  %tmp = frem fast float %0, 3.000000e+00
  %arrayidx1 = getelementptr inbounds float, ptr %b, i64 %len
  store float %tmp, ptr %arrayidx1, align 4
  %.not = icmp eq i64 %dec, 100
  br i1 %.not, label %finish.loopexit, label %loop

finish.loopexit:
  ret void
}

define void @test_fneg(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-LABEL: define void @test_fneg(
; IF-EVL-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; IF-EVL:       [[VECTOR_MEMCHECK]]:
; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
; IF-EVL-NEXT:    [[TMP3:%.*]] = sub i64 [[B1]], [[A2]]
; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]]
; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; IF-EVL:       [[VECTOR_PH]]:
; IF-EVL-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP6]], 4
; IF-EVL-NEXT:    [[TMP8:%.*]] = sub i64 [[TMP7]], 1
; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP8]]
; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP7]]
; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; IF-EVL-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT:    [[TMP10:%.*]] = mul i64 [[TMP9]], 4
; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
; IF-EVL:       [[VECTOR_BODY]]:
; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT:    [[TMP11:%.*]] = sub i64 100, [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true)
; IF-EVL-NEXT:    [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0
; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]]
; IF-EVL-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
; IF-EVL-NEXT:    [[TMP16:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
; IF-EVL-NEXT:    [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0
; IF-EVL-NEXT:    call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[TMP16]], ptr align 4 [[TMP18]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
; IF-EVL-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]]
; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]]
; IF-EVL-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IF-EVL-NEXT:    br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
; IF-EVL:       [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; IF-EVL:       [[SCALAR_PH]]:
; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; IF-EVL-NEXT:    br label %[[LOOP:.*]]
; IF-EVL:       [[LOOP]]:
; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[LEN]]
; IF-EVL-NEXT:    [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; IF-EVL-NEXT:    [[TMP:%.*]] = fneg fast float [[TMP21]]
; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[LEN]]
; IF-EVL-NEXT:    store float [[TMP]], ptr [[ARRAYIDX1]], align 4
; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP37:![0-9]+]]
; IF-EVL:       [[FINISH_LOOPEXIT]]:
; IF-EVL-NEXT:    ret void
;
; NO-VP-LABEL: define void @test_fneg(
; NO-VP-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0]] {
; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
; NO-VP-NEXT:    br label %[[LOOP:.*]]
; NO-VP:       [[LOOP]]:
; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[LEN]]
; NO-VP-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; NO-VP-NEXT:    [[TMP:%.*]] = fneg fast float [[TMP0]]
; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[LEN]]
; NO-VP-NEXT:    store float [[TMP]], ptr [[ARRAYIDX1]], align 4
; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
; NO-VP:       [[FINISH_LOOPEXIT]]:
; NO-VP-NEXT:    ret void
;
loop.preheader:
  br label %loop

loop:
  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
  %dec = add nsw i64 %len, 1
  %arrayidx = getelementptr inbounds float, ptr %a, i64 %len
  %0 = load float, ptr %arrayidx, align 4
  %tmp = fneg fast float %0
  %arrayidx1 = getelementptr inbounds float, ptr %b, i64 %len
  store float %tmp, ptr %arrayidx1, align 4
  %.not = icmp eq i64 %dec, 100
  br i1 %.not, label %finish.loopexit, label %loop

finish.loopexit:
  ret void
}