llvm/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -p loop-vectorize -mtriple=x86_64-apple-macosx -mcpu=skylake-avx512 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=COST %s
; RUN: opt -p loop-vectorize -mtriple=x86_64-apple-macosx -mcpu=skylake-avx512 -force-vector-width=4 -force-vector-interleave=2 -S %s | FileCheck --check-prefixes=FORCED %s

define void @switch_default_to_latch_common_dest(ptr %start, ptr %end) {
; COST-LABEL: define void @switch_default_to_latch_common_dest(
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0:[0-9]+]] {
; COST-NEXT:  [[ENTRY:.*]]:
; COST-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
; COST-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
; COST-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
; COST-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
; COST-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
; COST-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; COST-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
; COST-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; COST:       [[VECTOR_PH]]:
; COST-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
; COST-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; COST-NEXT:    [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
; COST-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
; COST-NEXT:    br label %[[VECTOR_BODY:.*]]
; COST:       [[VECTOR_BODY]]:
; COST-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; COST-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; COST-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
; COST-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
; COST-NEXT:    [[TMP6:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
; COST-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP6]], align 1
; COST-NEXT:    [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 -12, i64 -12, i64 -12, i64 -12>
; COST-NEXT:    [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 13, i64 13, i64 13, i64 13>
; COST-NEXT:    [[TMP10:%.*]] = or <4 x i1> [[TMP7]], [[TMP8]]
; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP6]], i32 1, <4 x i1> [[TMP10]])
; COST-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; COST-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; COST-NEXT:    br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; COST:       [[MIDDLE_BLOCK]]:
; COST-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; COST-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; COST:       [[SCALAR_PH]]:
; COST-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
; COST-NEXT:    br label %[[LOOP_HEADER:.*]]
; COST:       [[LOOP_HEADER]]:
; COST-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; COST-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; COST-NEXT:    switch i64 [[L]], label %[[LOOP_LATCH]] [
; COST-NEXT:      i64 -12, label %[[IF_THEN:.*]]
; COST-NEXT:      i64 13, label %[[IF_THEN]]
; COST-NEXT:    ]
; COST:       [[IF_THEN]]:
; COST-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
; COST-NEXT:    br label %[[LOOP_LATCH]]
; COST:       [[LOOP_LATCH]]:
; COST-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; COST-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; COST-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
; COST:       [[EXIT]]:
; COST-NEXT:    ret void
;
; FORCED-LABEL: define void @switch_default_to_latch_common_dest(
; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0:[0-9]+]] {
; FORCED-NEXT:  [[ENTRY:.*]]:
; FORCED-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
; FORCED-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
; FORCED-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
; FORCED-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
; FORCED-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
; FORCED-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; FORCED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
; FORCED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; FORCED:       [[VECTOR_PH]]:
; FORCED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
; FORCED-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; FORCED-NEXT:    [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
; FORCED-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
; FORCED-NEXT:    br label %[[VECTOR_BODY:.*]]
; FORCED:       [[VECTOR_BODY]]:
; FORCED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; FORCED-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; FORCED-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
; FORCED-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
; FORCED-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
; FORCED-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
; FORCED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1
; FORCED-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
; FORCED-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 -12, i64 -12, i64 -12, i64 -12>
; FORCED-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 -12, i64 -12, i64 -12, i64 -12>
; FORCED-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 13, i64 13, i64 13, i64 13>
; FORCED-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 13, i64 13, i64 13, i64 13>
; FORCED-NEXT:    [[TMP15:%.*]] = or <4 x i1> [[TMP9]], [[TMP11]]
; FORCED-NEXT:    [[TMP16:%.*]] = or <4 x i1> [[TMP10]], [[TMP12]]
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP15]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]])
; FORCED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; FORCED-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; FORCED-NEXT:    br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; FORCED:       [[MIDDLE_BLOCK]]:
; FORCED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; FORCED-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; FORCED:       [[SCALAR_PH]]:
; FORCED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
; FORCED-NEXT:    br label %[[LOOP_HEADER:.*]]
; FORCED:       [[LOOP_HEADER]]:
; FORCED-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; FORCED-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    switch i64 [[L]], label %[[LOOP_LATCH]] [
; FORCED-NEXT:      i64 -12, label %[[IF_THEN:.*]]
; FORCED-NEXT:      i64 13, label %[[IF_THEN]]
; FORCED-NEXT:    ]
; FORCED:       [[IF_THEN]]:
; FORCED-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    br label %[[LOOP_LATCH]]
; FORCED:       [[LOOP_LATCH]]:
; FORCED-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; FORCED-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; FORCED-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
; FORCED:       [[EXIT]]:
; FORCED-NEXT:    ret void
;
entry:
  br label %loop.header

loop.header:
  %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
  %l = load i64, ptr %ptr.iv, align 1
  switch i64 %l, label %loop.latch [
  i64 -12, label %if.then
  i64 13, label %if.then
  ]

if.then:
  store i64 42, ptr %ptr.iv, align 1
  br label %loop.latch

loop.latch:
  %ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
  %ec = icmp eq ptr %ptr.iv.next, %end
  br i1 %ec, label %exit, label %loop.header

exit:
  ret void
}

define void @switch_default_to_latch_common_dest_using_branches(ptr %start, ptr %end) {
; COST-LABEL: define void @switch_default_to_latch_common_dest_using_branches(
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
; COST-NEXT:  [[ENTRY:.*]]:
; COST-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
; COST-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
; COST-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
; COST-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
; COST-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
; COST-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; COST-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
; COST-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; COST:       [[VECTOR_PH]]:
; COST-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
; COST-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; COST-NEXT:    [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
; COST-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
; COST-NEXT:    br label %[[VECTOR_BODY:.*]]
; COST:       [[VECTOR_BODY]]:
; COST-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; COST-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; COST-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
; COST-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
; COST-NEXT:    [[TMP6:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
; COST-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP6]], align 1
; COST-NEXT:    [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 -12, i64 -12, i64 -12, i64 -12>
; COST-NEXT:    [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 13, i64 13, i64 13, i64 13>
; COST-NEXT:    [[TMP9:%.*]] = xor <4 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true>
; COST-NEXT:    [[TMP10:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer
; COST-NEXT:    [[TMP11:%.*]] = or <4 x i1> [[TMP10]], [[TMP7]]
; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP6]], i32 1, <4 x i1> [[TMP11]])
; COST-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; COST-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; COST-NEXT:    br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; COST:       [[MIDDLE_BLOCK]]:
; COST-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; COST-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; COST:       [[SCALAR_PH]]:
; COST-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
; COST-NEXT:    br label %[[LOOP_HEADER:.*]]
; COST:       [[LOOP_HEADER]]:
; COST-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; COST-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; COST-NEXT:    [[EQ_1:%.*]] = icmp eq i64 [[L]], -12
; COST-NEXT:    [[EQ_2:%.*]] = icmp eq i64 [[L]], 13
; COST-NEXT:    br i1 [[EQ_1]], label %[[IF_THEN:.*]], label %[[ELSE:.*]]
; COST:       [[IF_THEN]]:
; COST-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
; COST-NEXT:    br label %[[LOOP_LATCH]]
; COST:       [[ELSE]]:
; COST-NEXT:    br i1 [[EQ_2]], label %[[IF_THEN]], label %[[LOOP_LATCH]]
; COST:       [[LOOP_LATCH]]:
; COST-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; COST-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; COST-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
; COST:       [[EXIT]]:
; COST-NEXT:    ret void
;
; FORCED-LABEL: define void @switch_default_to_latch_common_dest_using_branches(
; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
; FORCED-NEXT:  [[ENTRY:.*]]:
; FORCED-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
; FORCED-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
; FORCED-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
; FORCED-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
; FORCED-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
; FORCED-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; FORCED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
; FORCED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; FORCED:       [[VECTOR_PH]]:
; FORCED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
; FORCED-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; FORCED-NEXT:    [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
; FORCED-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
; FORCED-NEXT:    br label %[[VECTOR_BODY:.*]]
; FORCED:       [[VECTOR_BODY]]:
; FORCED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; FORCED-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; FORCED-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
; FORCED-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
; FORCED-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
; FORCED-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
; FORCED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1
; FORCED-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
; FORCED-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 -12, i64 -12, i64 -12, i64 -12>
; FORCED-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 -12, i64 -12, i64 -12, i64 -12>
; FORCED-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 13, i64 13, i64 13, i64 13>
; FORCED-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 13, i64 13, i64 13, i64 13>
; FORCED-NEXT:    [[TMP13:%.*]] = xor <4 x i1> [[TMP9]], <i1 true, i1 true, i1 true, i1 true>
; FORCED-NEXT:    [[TMP14:%.*]] = xor <4 x i1> [[TMP10]], <i1 true, i1 true, i1 true, i1 true>
; FORCED-NEXT:    [[TMP15:%.*]] = select <4 x i1> [[TMP13]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer
; FORCED-NEXT:    [[TMP16:%.*]] = select <4 x i1> [[TMP14]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer
; FORCED-NEXT:    [[TMP17:%.*]] = or <4 x i1> [[TMP15]], [[TMP9]]
; FORCED-NEXT:    [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP10]]
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP17]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP18]])
; FORCED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; FORCED-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; FORCED-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; FORCED:       [[MIDDLE_BLOCK]]:
; FORCED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; FORCED-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; FORCED:       [[SCALAR_PH]]:
; FORCED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
; FORCED-NEXT:    br label %[[LOOP_HEADER:.*]]
; FORCED:       [[LOOP_HEADER]]:
; FORCED-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; FORCED-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    [[EQ_1:%.*]] = icmp eq i64 [[L]], -12
; FORCED-NEXT:    [[EQ_2:%.*]] = icmp eq i64 [[L]], 13
; FORCED-NEXT:    br i1 [[EQ_1]], label %[[IF_THEN:.*]], label %[[ELSE:.*]]
; FORCED:       [[IF_THEN]]:
; FORCED-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    br label %[[LOOP_LATCH]]
; FORCED:       [[ELSE]]:
; FORCED-NEXT:    br i1 [[EQ_2]], label %[[IF_THEN]], label %[[LOOP_LATCH]]
; FORCED:       [[LOOP_LATCH]]:
; FORCED-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; FORCED-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; FORCED-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
; FORCED:       [[EXIT]]:
; FORCED-NEXT:    ret void
;
entry:
  br label %loop.header

loop.header:
  %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
  %l = load i64, ptr %ptr.iv, align 1
  %eq.1 = icmp eq i64 %l, -12
  %eq.2 = icmp eq i64 %l, 13
  br i1 %eq.1, label %if.then, label %else

if.then:
  store i64 42, ptr %ptr.iv, align 1
  br label %loop.latch

else:
  br i1 %eq.2, label %if.then, label %loop.latch

loop.latch:
  %ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
  %ec = icmp eq ptr %ptr.iv.next, %end
  br i1 %ec, label %exit, label %loop.header

exit:
  ret void
}

; TODO: Instead of using masked stores, the store can be sunk, executed
; unconditionally and fed by selects.
define void @switch_all_dests_distinct(ptr %start, ptr %end) {
; COST-LABEL: define void @switch_all_dests_distinct(
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
; COST-NEXT:  [[ENTRY:.*]]:
; COST-NEXT:    br label %[[LOOP_HEADER:.*]]
; COST:       [[LOOP_HEADER]]:
; COST-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; COST-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; COST-NEXT:    switch i64 [[L]], label %[[DEFAULT:.*]] [
; COST-NEXT:      i64 -12, label %[[IF_THEN_1:.*]]
; COST-NEXT:      i64 13, label %[[IF_THEN_2:.*]]
; COST-NEXT:      i64 0, label %[[IF_THEN_3:.*]]
; COST-NEXT:    ]
; COST:       [[IF_THEN_1]]:
; COST-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
; COST-NEXT:    br label %[[LOOP_LATCH]]
; COST:       [[IF_THEN_2]]:
; COST-NEXT:    store i64 0, ptr [[PTR_IV]], align 1
; COST-NEXT:    br label %[[LOOP_LATCH]]
; COST:       [[IF_THEN_3]]:
; COST-NEXT:    store i64 1, ptr [[PTR_IV]], align 1
; COST-NEXT:    br label %[[LOOP_LATCH]]
; COST:       [[DEFAULT]]:
; COST-NEXT:    store i64 2, ptr [[PTR_IV]], align 1
; COST-NEXT:    br label %[[LOOP_LATCH]]
; COST:       [[LOOP_LATCH]]:
; COST-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; COST-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; COST-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
; COST:       [[EXIT]]:
; COST-NEXT:    ret void
;
; FORCED-LABEL: define void @switch_all_dests_distinct(
; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
; FORCED-NEXT:  [[ENTRY:.*]]:
; FORCED-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
; FORCED-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
; FORCED-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
; FORCED-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
; FORCED-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
; FORCED-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; FORCED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
; FORCED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; FORCED:       [[VECTOR_PH]]:
; FORCED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
; FORCED-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; FORCED-NEXT:    [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
; FORCED-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
; FORCED-NEXT:    br label %[[VECTOR_BODY:.*]]
; FORCED:       [[VECTOR_BODY]]:
; FORCED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; FORCED-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; FORCED-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
; FORCED-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
; FORCED-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
; FORCED-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
; FORCED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1
; FORCED-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
; FORCED-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 -12, i64 -12, i64 -12, i64 -12>
; FORCED-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 -12, i64 -12, i64 -12, i64 -12>
; FORCED-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 13, i64 13, i64 13, i64 13>
; FORCED-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 13, i64 13, i64 13, i64 13>
; FORCED-NEXT:    [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer
; FORCED-NEXT:    [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], zeroinitializer
; FORCED-NEXT:    [[TMP15:%.*]] = or <4 x i1> [[TMP9]], [[TMP11]]
; FORCED-NEXT:    [[TMP16:%.*]] = or <4 x i1> [[TMP10]], [[TMP12]]
; FORCED-NEXT:    [[TMP17:%.*]] = or <4 x i1> [[TMP15]], [[TMP13]]
; FORCED-NEXT:    [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP14]]
; FORCED-NEXT:    [[TMP19:%.*]] = xor <4 x i1> [[TMP17]], <i1 true, i1 true, i1 true, i1 true>
; FORCED-NEXT:    [[TMP20:%.*]] = xor <4 x i1> [[TMP18]], <i1 true, i1 true, i1 true, i1 true>
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 1, i64 1, i64 1, i64 1>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP13]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 1, i64 1, i64 1, i64 1>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP14]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP11]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP12]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP9]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP10]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP19]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP20]])
; FORCED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; FORCED-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; FORCED-NEXT:    br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; FORCED:       [[MIDDLE_BLOCK]]:
; FORCED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; FORCED-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; FORCED:       [[SCALAR_PH]]:
; FORCED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
; FORCED-NEXT:    br label %[[LOOP_HEADER:.*]]
; FORCED:       [[LOOP_HEADER]]:
; FORCED-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; FORCED-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    switch i64 [[L]], label %[[DEFAULT:.*]] [
; FORCED-NEXT:      i64 -12, label %[[IF_THEN_1:.*]]
; FORCED-NEXT:      i64 13, label %[[IF_THEN_2:.*]]
; FORCED-NEXT:      i64 0, label %[[IF_THEN_3:.*]]
; FORCED-NEXT:    ]
; FORCED:       [[IF_THEN_1]]:
; FORCED-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    br label %[[LOOP_LATCH]]
; FORCED:       [[IF_THEN_2]]:
; FORCED-NEXT:    store i64 0, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    br label %[[LOOP_LATCH]]
; FORCED:       [[IF_THEN_3]]:
; FORCED-NEXT:    store i64 1, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    br label %[[LOOP_LATCH]]
; FORCED:       [[DEFAULT]]:
; FORCED-NEXT:    store i64 2, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    br label %[[LOOP_LATCH]]
; FORCED:       [[LOOP_LATCH]]:
; FORCED-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; FORCED-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; FORCED-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]]
; FORCED:       [[EXIT]]:
; FORCED-NEXT:    ret void
;
entry:
  br label %loop.header

loop.header:
  %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
  %l = load i64, ptr %ptr.iv, align 1
  switch i64 %l, label %default [
  i64 -12, label %if.then.1
  i64 13, label %if.then.2
  i64 0, label %if.then.3
  ]

if.then.1:
  store i64 42, ptr %ptr.iv, align 1
  br label %loop.latch

if.then.2:
  store i64 0, ptr %ptr.iv, align 1
  br label %loop.latch

if.then.3:
  store i64 1, ptr %ptr.iv, align 1
  br label %loop.latch

default:
  store i64 2, ptr %ptr.iv, align 1
  br label %loop.latch

loop.latch:
  %ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
  %ec = icmp eq ptr %ptr.iv.next, %end
  br i1 %ec, label %exit, label %loop.header

exit:
  ret void
}

define void @switch_all_dests_distinct_variant_using_branches(ptr %start, ptr %end) {
; COST-LABEL: define void @switch_all_dests_distinct_variant_using_branches(
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
; COST-NEXT:  [[ENTRY:.*]]:
; COST-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
; COST-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
; COST-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
; COST-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
; COST-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
; COST-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; COST-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
; COST-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; COST:       [[VECTOR_PH]]:
; COST-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
; COST-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; COST-NEXT:    [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
; COST-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
; COST-NEXT:    br label %[[VECTOR_BODY:.*]]
; COST:       [[VECTOR_BODY]]:
; COST-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; COST-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; COST-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
; COST-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
; COST-NEXT:    [[TMP6:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
; COST-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP6]], align 1
; COST-NEXT:    [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 -12, i64 -12, i64 -12, i64 -12>
; COST-NEXT:    [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 13, i64 13, i64 13, i64 13>
; COST-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer
; COST-NEXT:    [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true>
; COST-NEXT:    [[TMP11:%.*]] = xor <4 x i1> [[TMP8]], <i1 true, i1 true, i1 true, i1 true>
; COST-NEXT:    [[TMP12:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer
; COST-NEXT:    [[TMP13:%.*]] = select <4 x i1> [[TMP12]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer
; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 1, i64 1, i64 1, i64 1>, ptr [[TMP6]], i32 1, <4 x i1> [[TMP13]])
; COST-NEXT:    [[TMP14:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer
; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP6]], i32 1, <4 x i1> [[TMP14]])
; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP6]], i32 1, <4 x i1> [[TMP7]])
; COST-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; COST-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; COST-NEXT:    br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; COST:       [[MIDDLE_BLOCK]]:
; COST-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; COST-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; COST:       [[SCALAR_PH]]:
; COST-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
; COST-NEXT:    br label %[[LOOP_HEADER:.*]]
; COST:       [[LOOP_HEADER]]:
; COST-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; COST-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; COST-NEXT:    [[EQ_0:%.*]] = icmp eq i64 [[L]], -12
; COST-NEXT:    [[EQ_1:%.*]] = icmp eq i64 [[L]], 13
; COST-NEXT:    [[EQ_2:%.*]] = icmp eq i64 [[L]], 0
; COST-NEXT:    br i1 [[EQ_0]], label %[[IF_THEN_1:.*]], label %[[ELSE_1:.*]]
; COST:       [[ELSE_1]]:
; COST-NEXT:    br i1 [[EQ_1]], label %[[IF_THEN_2:.*]], label %[[ELSE_2:.*]]
; COST:       [[ELSE_2]]:
; COST-NEXT:    br i1 [[EQ_2]], label %[[IF_THEN_3:.*]], label %[[LOOP_LATCH]]
; COST:       [[IF_THEN_1]]:
; COST-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
; COST-NEXT:    br label %[[LOOP_LATCH]]
; COST:       [[IF_THEN_2]]:
; COST-NEXT:    store i64 0, ptr [[PTR_IV]], align 1
; COST-NEXT:    br label %[[LOOP_LATCH]]
; COST:       [[IF_THEN_3]]:
; COST-NEXT:    store i64 1, ptr [[PTR_IV]], align 1
; COST-NEXT:    br label %[[LOOP_LATCH]]
; COST:       [[DEFAULT:.*:]]
; COST-NEXT:    store i64 2, ptr poison, align 1
; COST-NEXT:    unreachable
; COST:       [[LOOP_LATCH]]:
; COST-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; COST-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; COST-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]]
; COST:       [[EXIT]]:
; COST-NEXT:    ret void
;
; FORCED-LABEL: define void @switch_all_dests_distinct_variant_using_branches(
; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
; FORCED-NEXT:  [[ENTRY:.*]]:
; FORCED-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
; FORCED-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
; FORCED-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
; FORCED-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
; FORCED-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
; FORCED-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; FORCED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
; FORCED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; FORCED:       [[VECTOR_PH]]:
; FORCED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
; FORCED-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; FORCED-NEXT:    [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
; FORCED-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
; FORCED-NEXT:    br label %[[VECTOR_BODY:.*]]
; FORCED:       [[VECTOR_BODY]]:
; FORCED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; FORCED-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; FORCED-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
; FORCED-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
; FORCED-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
; FORCED-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
; FORCED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1
; FORCED-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
; FORCED-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 -12, i64 -12, i64 -12, i64 -12>
; FORCED-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 -12, i64 -12, i64 -12, i64 -12>
; FORCED-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 13, i64 13, i64 13, i64 13>
; FORCED-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 13, i64 13, i64 13, i64 13>
; FORCED-NEXT:    [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer
; FORCED-NEXT:    [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], zeroinitializer
; FORCED-NEXT:    [[TMP15:%.*]] = xor <4 x i1> [[TMP9]], <i1 true, i1 true, i1 true, i1 true>
; FORCED-NEXT:    [[TMP16:%.*]] = xor <4 x i1> [[TMP10]], <i1 true, i1 true, i1 true, i1 true>
; FORCED-NEXT:    [[TMP17:%.*]] = xor <4 x i1> [[TMP11]], <i1 true, i1 true, i1 true, i1 true>
; FORCED-NEXT:    [[TMP18:%.*]] = xor <4 x i1> [[TMP12]], <i1 true, i1 true, i1 true, i1 true>
; FORCED-NEXT:    [[TMP19:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP17]], <4 x i1> zeroinitializer
; FORCED-NEXT:    [[TMP20:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP18]], <4 x i1> zeroinitializer
; FORCED-NEXT:    [[TMP21:%.*]] = select <4 x i1> [[TMP19]], <4 x i1> [[TMP13]], <4 x i1> zeroinitializer
; FORCED-NEXT:    [[TMP22:%.*]] = select <4 x i1> [[TMP20]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 1, i64 1, i64 1, i64 1>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP21]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 1, i64 1, i64 1, i64 1>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP22]])
; FORCED-NEXT:    [[TMP23:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer
; FORCED-NEXT:    [[TMP24:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP23]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP24]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP9]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP10]])
; FORCED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; FORCED-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; FORCED-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; FORCED:       [[MIDDLE_BLOCK]]:
; FORCED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; FORCED-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; FORCED:       [[SCALAR_PH]]:
; FORCED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
; FORCED-NEXT:    br label %[[LOOP_HEADER:.*]]
; FORCED:       [[LOOP_HEADER]]:
; FORCED-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; FORCED-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    [[EQ_0:%.*]] = icmp eq i64 [[L]], -12
; FORCED-NEXT:    [[EQ_1:%.*]] = icmp eq i64 [[L]], 13
; FORCED-NEXT:    [[EQ_2:%.*]] = icmp eq i64 [[L]], 0
; FORCED-NEXT:    br i1 [[EQ_0]], label %[[IF_THEN_1:.*]], label %[[ELSE_1:.*]]
; FORCED:       [[ELSE_1]]:
; FORCED-NEXT:    br i1 [[EQ_1]], label %[[IF_THEN_2:.*]], label %[[ELSE_2:.*]]
; FORCED:       [[ELSE_2]]:
; FORCED-NEXT:    br i1 [[EQ_2]], label %[[IF_THEN_3:.*]], label %[[LOOP_LATCH]]
; FORCED:       [[IF_THEN_1]]:
; FORCED-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    br label %[[LOOP_LATCH]]
; FORCED:       [[IF_THEN_2]]:
; FORCED-NEXT:    store i64 0, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    br label %[[LOOP_LATCH]]
; FORCED:       [[IF_THEN_3]]:
; FORCED-NEXT:    store i64 1, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    br label %[[LOOP_LATCH]]
; FORCED:       [[DEFAULT:.*:]]
; FORCED-NEXT:    store i64 2, ptr poison, align 1
; FORCED-NEXT:    unreachable
; FORCED:       [[LOOP_LATCH]]:
; FORCED-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; FORCED-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; FORCED-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP9:![0-9]+]]
; FORCED:       [[EXIT]]:
; FORCED-NEXT:    ret void
;
entry:
  br label %loop.header

loop.header:
  %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
  %l = load i64, ptr %ptr.iv, align 1
  %eq.0 = icmp eq i64 %l, -12
  %eq.1 = icmp eq i64 %l, 13
  %eq.2 = icmp eq i64 %l, 0
  br i1 %eq.0, label %if.then.1, label %else.1

else.1:
  br i1 %eq.1, label %if.then.2, label %else.2

else.2:
  br i1 %eq.2, label %if.then.3, label %loop.latch

if.then.1:
  store i64 42, ptr %ptr.iv, align 1
  br label %loop.latch

if.then.2:
  store i64 0, ptr %ptr.iv, align 1
  br label %loop.latch

if.then.3:
  store i64 1, ptr %ptr.iv, align 1
  br label %loop.latch

default:
  store i64 2, ptr %ptr.iv, align 1
  br label %loop.latch

loop.latch:
  %ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
  %ec = icmp eq ptr %ptr.iv.next, %end
  br i1 %ec, label %exit, label %loop.header

exit:
  ret void
}



define void @switch_multiple_common_dests(ptr %start, ptr %end) {
; COST-LABEL: define void @switch_multiple_common_dests(
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
; COST-NEXT:  [[ENTRY:.*]]:
; COST-NEXT:    br label %[[LOOP_HEADER:.*]]
; COST:       [[LOOP_HEADER]]:
; COST-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; COST-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; COST-NEXT:    switch i64 [[L]], label %[[DEFAULT:.*]] [
; COST-NEXT:      i64 -12, label %[[IF_THEN_1:.*]]
; COST-NEXT:      i64 0, label %[[IF_THEN_1]]
; COST-NEXT:      i64 13, label %[[IF_THEN_2:.*]]
; COST-NEXT:      i64 14, label %[[IF_THEN_2]]
; COST-NEXT:      i64 15, label %[[IF_THEN_2]]
; COST-NEXT:    ]
; COST:       [[IF_THEN_1]]:
; COST-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
; COST-NEXT:    br label %[[LOOP_LATCH]]
; COST:       [[IF_THEN_2]]:
; COST-NEXT:    store i64 0, ptr [[PTR_IV]], align 1
; COST-NEXT:    br label %[[LOOP_LATCH]]
; COST:       [[DEFAULT]]:
; COST-NEXT:    store i64 2, ptr [[PTR_IV]], align 1
; COST-NEXT:    br label %[[LOOP_LATCH]]
; COST:       [[LOOP_LATCH]]:
; COST-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; COST-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; COST-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
; COST:       [[EXIT]]:
; COST-NEXT:    ret void
;
; FORCED-LABEL: define void @switch_multiple_common_dests(
; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
; FORCED-NEXT:  [[ENTRY:.*]]:
; FORCED-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
; FORCED-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
; FORCED-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
; FORCED-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
; FORCED-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
; FORCED-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; FORCED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
; FORCED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; FORCED:       [[VECTOR_PH]]:
; FORCED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
; FORCED-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; FORCED-NEXT:    [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
; FORCED-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
; FORCED-NEXT:    br label %[[VECTOR_BODY:.*]]
; FORCED:       [[VECTOR_BODY]]:
; FORCED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; FORCED-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; FORCED-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
; FORCED-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
; FORCED-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
; FORCED-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
; FORCED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1
; FORCED-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
; FORCED-NEXT:    [[TMP23:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 -12, i64 -12, i64 -12, i64 -12>
; FORCED-NEXT:    [[TMP24:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 -12, i64 -12, i64 -12, i64 -12>
; FORCED-NEXT:    [[TMP25:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer
; FORCED-NEXT:    [[TMP26:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], zeroinitializer
; FORCED-NEXT:    [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 13, i64 13, i64 13, i64 13>
; FORCED-NEXT:    [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 13, i64 13, i64 13, i64 13>
; FORCED-NEXT:    [[TMP17:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 14, i64 14, i64 14, i64 14>
; FORCED-NEXT:    [[TMP18:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 14, i64 14, i64 14, i64 14>
; FORCED-NEXT:    [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 15, i64 15, i64 15, i64 15>
; FORCED-NEXT:    [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 15, i64 15, i64 15, i64 15>
; FORCED-NEXT:    [[TMP27:%.*]] = or <4 x i1> [[TMP23]], [[TMP25]]
; FORCED-NEXT:    [[TMP28:%.*]] = or <4 x i1> [[TMP24]], [[TMP26]]
; FORCED-NEXT:    [[TMP21:%.*]] = or <4 x i1> [[TMP13]], [[TMP17]]
; FORCED-NEXT:    [[TMP22:%.*]] = or <4 x i1> [[TMP14]], [[TMP18]]
; FORCED-NEXT:    [[TMP35:%.*]] = or <4 x i1> [[TMP21]], [[TMP15]]
; FORCED-NEXT:    [[TMP36:%.*]] = or <4 x i1> [[TMP22]], [[TMP16]]
; FORCED-NEXT:    [[TMP37:%.*]] = or <4 x i1> [[TMP27]], [[TMP35]]
; FORCED-NEXT:    [[TMP38:%.*]] = or <4 x i1> [[TMP28]], [[TMP36]]
; FORCED-NEXT:    [[TMP39:%.*]] = xor <4 x i1> [[TMP37]], <i1 true, i1 true, i1 true, i1 true>
; FORCED-NEXT:    [[TMP40:%.*]] = xor <4 x i1> [[TMP38]], <i1 true, i1 true, i1 true, i1 true>
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP35]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP36]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP27]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP28]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP39]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP40]])
; FORCED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; FORCED-NEXT:    [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; FORCED-NEXT:    br i1 [[TMP41]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; FORCED:       [[MIDDLE_BLOCK]]:
; FORCED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; FORCED-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; FORCED:       [[SCALAR_PH]]:
; FORCED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
; FORCED-NEXT:    br label %[[LOOP_HEADER:.*]]
; FORCED:       [[LOOP_HEADER]]:
; FORCED-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; FORCED-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    switch i64 [[L]], label %[[DEFAULT:.*]] [
; FORCED-NEXT:      i64 -12, label %[[IF_THEN_1:.*]]
; FORCED-NEXT:      i64 0, label %[[IF_THEN_1]]
; FORCED-NEXT:      i64 13, label %[[IF_THEN_2:.*]]
; FORCED-NEXT:      i64 14, label %[[IF_THEN_2]]
; FORCED-NEXT:      i64 15, label %[[IF_THEN_2]]
; FORCED-NEXT:    ]
; FORCED:       [[IF_THEN_1]]:
; FORCED-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    br label %[[LOOP_LATCH]]
; FORCED:       [[IF_THEN_2]]:
; FORCED-NEXT:    store i64 0, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    br label %[[LOOP_LATCH]]
; FORCED:       [[DEFAULT]]:
; FORCED-NEXT:    store i64 2, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    br label %[[LOOP_LATCH]]
; FORCED:       [[LOOP_LATCH]]:
; FORCED-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; FORCED-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; FORCED-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]]
; FORCED:       [[EXIT]]:
; FORCED-NEXT:    ret void
;
entry:
  br label %loop.header

loop.header:
  %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
  %l = load i64, ptr %ptr.iv, align 1
  switch i64 %l, label %default [
  i64 -12, label %if.then.1
  i64 0, label %if.then.1
  i64 13, label %if.then.2
  i64 14, label %if.then.2
  i64 15, label %if.then.2
  ]

if.then.1:
  store i64 42, ptr %ptr.iv, align 1
  br label %loop.latch

if.then.2:
  store i64 0, ptr %ptr.iv, align 1
  br label %loop.latch

default:
  store i64 2, ptr %ptr.iv, align 1
  br label %loop.latch

loop.latch:
  %ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
  %ec = icmp eq ptr %ptr.iv.next, %end
  br i1 %ec, label %exit, label %loop.header

exit:
  ret void
}

define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
; COST-LABEL: define void @switch4_default_common_dest_with_case(
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
; COST-NEXT:  [[ENTRY:.*]]:
; COST-NEXT:    br label %[[LOOP_HEADER:.*]]
; COST:       [[LOOP_HEADER]]:
; COST-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; COST-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; COST-NEXT:    switch i64 [[L]], label %[[DEFAULT:.*]] [
; COST-NEXT:      i64 -12, label %[[IF_THEN_1:.*]]
; COST-NEXT:      i64 13, label %[[IF_THEN_2:.*]]
; COST-NEXT:      i64 0, label %[[DEFAULT]]
; COST-NEXT:    ]
; COST:       [[IF_THEN_1]]:
; COST-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
; COST-NEXT:    br label %[[LOOP_LATCH]]
; COST:       [[IF_THEN_2]]:
; COST-NEXT:    store i64 0, ptr [[PTR_IV]], align 1
; COST-NEXT:    br label %[[LOOP_LATCH]]
; COST:       [[DEFAULT]]:
; COST-NEXT:    store i64 2, ptr [[PTR_IV]], align 1
; COST-NEXT:    br label %[[LOOP_LATCH]]
; COST:       [[LOOP_LATCH]]:
; COST-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; COST-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; COST-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
; COST:       [[EXIT]]:
; COST-NEXT:    ret void
;
; FORCED-LABEL: define void @switch4_default_common_dest_with_case(
; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
; FORCED-NEXT:  [[ENTRY:.*]]:
; FORCED-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
; FORCED-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
; FORCED-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
; FORCED-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
; FORCED-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
; FORCED-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; FORCED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
; FORCED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; FORCED:       [[VECTOR_PH]]:
; FORCED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
; FORCED-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; FORCED-NEXT:    [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
; FORCED-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
; FORCED-NEXT:    br label %[[VECTOR_BODY:.*]]
; FORCED:       [[VECTOR_BODY]]:
; FORCED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; FORCED-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; FORCED-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
; FORCED-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
; FORCED-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
; FORCED-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
; FORCED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1
; FORCED-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
; FORCED-NEXT:    [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 -12, i64 -12, i64 -12, i64 -12>
; FORCED-NEXT:    [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 -12, i64 -12, i64 -12, i64 -12>
; FORCED-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 13, i64 13, i64 13, i64 13>
; FORCED-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 13, i64 13, i64 13, i64 13>
; FORCED-NEXT:    [[TMP17:%.*]] = or <4 x i1> [[TMP15]], [[TMP11]]
; FORCED-NEXT:    [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP12]]
; FORCED-NEXT:    [[TMP20:%.*]] = xor <4 x i1> [[TMP17]], <i1 true, i1 true, i1 true, i1 true>
; FORCED-NEXT:    [[TMP21:%.*]] = xor <4 x i1> [[TMP18]], <i1 true, i1 true, i1 true, i1 true>
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP11]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP12]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP15]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP20]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP21]])
; FORCED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; FORCED-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; FORCED-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; FORCED:       [[MIDDLE_BLOCK]]:
; FORCED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; FORCED-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; FORCED:       [[SCALAR_PH]]:
; FORCED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
; FORCED-NEXT:    br label %[[LOOP_HEADER:.*]]
; FORCED:       [[LOOP_HEADER]]:
; FORCED-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; FORCED-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    switch i64 [[L]], label %[[DEFAULT:.*]] [
; FORCED-NEXT:      i64 -12, label %[[IF_THEN_1:.*]]
; FORCED-NEXT:      i64 13, label %[[IF_THEN_2:.*]]
; FORCED-NEXT:      i64 0, label %[[DEFAULT]]
; FORCED-NEXT:    ]
; FORCED:       [[IF_THEN_1]]:
; FORCED-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    br label %[[LOOP_LATCH]]
; FORCED:       [[IF_THEN_2]]:
; FORCED-NEXT:    store i64 0, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    br label %[[LOOP_LATCH]]
; FORCED:       [[DEFAULT]]:
; FORCED-NEXT:    store i64 2, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    br label %[[LOOP_LATCH]]
; FORCED:       [[LOOP_LATCH]]:
; FORCED-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; FORCED-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; FORCED-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP13:![0-9]+]]
; FORCED:       [[EXIT]]:
; FORCED-NEXT:    ret void
;
entry:
  br label %loop.header

loop.header:
  %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
  %l = load i64, ptr %ptr.iv, align 1
  switch i64 %l, label %default [
  i64 -12, label %if.then.1
  i64 13, label %if.then.2
  i64 0, label %default
  ]

if.then.1:
  store i64 42, ptr %ptr.iv, align 1
  br label %loop.latch

if.then.2:
  store i64 0, ptr %ptr.iv, align 1
  br label %loop.latch

default:
  store i64 2, ptr %ptr.iv, align 1
  br label %loop.latch

loop.latch:
  %ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
  %ec = icmp eq ptr %ptr.iv.next, %end
  br i1 %ec, label %exit, label %loop.header

exit:
  ret void
}

define void @switch_under_br_default_common_dest_with_case(ptr %start, ptr %end, i64 %x) {
; COST-LABEL: define void @switch_under_br_default_common_dest_with_case(
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
; COST-NEXT:  [[ENTRY:.*]]:
; COST-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
; COST-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
; COST-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
; COST-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
; COST-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
; COST-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; COST-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
; COST-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; COST:       [[VECTOR_PH]]:
; COST-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
; COST-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; COST-NEXT:    [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
; COST-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
; COST-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0
; COST-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; COST-NEXT:    br label %[[VECTOR_BODY:.*]]
; COST:       [[VECTOR_BODY]]:
; COST-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; COST-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; COST-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
; COST-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
; COST-NEXT:    [[TMP6:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
; COST-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP6]], align 1
; COST-NEXT:    [[TMP7:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; COST-NEXT:    [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 -12, i64 -12, i64 -12, i64 -12>
; COST-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 13, i64 13, i64 13, i64 13>
; COST-NEXT:    [[TMP10:%.*]] = select <4 x i1> [[TMP7]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer
; COST-NEXT:    [[TMP11:%.*]] = select <4 x i1> [[TMP7]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer
; COST-NEXT:    [[TMP12:%.*]] = or <4 x i1> [[TMP10]], [[TMP11]]
; COST-NEXT:    [[TMP13:%.*]] = xor <4 x i1> [[TMP12]], <i1 true, i1 true, i1 true, i1 true>
; COST-NEXT:    [[TMP14:%.*]] = select <4 x i1> [[TMP7]], <4 x i1> [[TMP13]], <4 x i1> zeroinitializer
; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP6]], i32 1, <4 x i1> [[TMP11]])
; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP6]], i32 1, <4 x i1> [[TMP10]])
; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP6]], i32 1, <4 x i1> [[TMP14]])
; COST-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; COST-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; COST-NEXT:    br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; COST:       [[MIDDLE_BLOCK]]:
; COST-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; COST-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; COST:       [[SCALAR_PH]]:
; COST-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
; COST-NEXT:    br label %[[LOOP_HEADER:.*]]
; COST:       [[LOOP_HEADER]]:
; COST-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; COST-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; COST-NEXT:    [[C:%.*]] = icmp ule i64 [[L]], [[X]]
; COST-NEXT:    br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
; COST:       [[THEN]]:
; COST-NEXT:    switch i64 [[L]], label %[[DEFAULT:.*]] [
; COST-NEXT:      i64 -12, label %[[IF_THEN_1:.*]]
; COST-NEXT:      i64 13, label %[[IF_THEN_2:.*]]
; COST-NEXT:      i64 0, label %[[DEFAULT]]
; COST-NEXT:    ]
; COST:       [[IF_THEN_1]]:
; COST-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
; COST-NEXT:    br label %[[LOOP_LATCH]]
; COST:       [[IF_THEN_2]]:
; COST-NEXT:    store i64 0, ptr [[PTR_IV]], align 1
; COST-NEXT:    br label %[[LOOP_LATCH]]
; COST:       [[DEFAULT]]:
; COST-NEXT:    store i64 2, ptr [[PTR_IV]], align 1
; COST-NEXT:    br label %[[LOOP_LATCH]]
; COST:       [[LOOP_LATCH]]:
; COST-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; COST-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; COST-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP9:![0-9]+]]
; COST:       [[EXIT]]:
; COST-NEXT:    ret void
;
; FORCED-LABEL: define void @switch_under_br_default_common_dest_with_case(
; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
; FORCED-NEXT:  [[ENTRY:.*]]:
; FORCED-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
; FORCED-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
; FORCED-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
; FORCED-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
; FORCED-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
; FORCED-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; FORCED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
; FORCED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; FORCED:       [[VECTOR_PH]]:
; FORCED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
; FORCED-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; FORCED-NEXT:    [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
; FORCED-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
; FORCED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0
; FORCED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; FORCED-NEXT:    br label %[[VECTOR_BODY:.*]]
; FORCED:       [[VECTOR_BODY]]:
; FORCED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; FORCED-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; FORCED-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
; FORCED-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
; FORCED-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
; FORCED-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
; FORCED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1
; FORCED-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
; FORCED-NEXT:    [[TMP9:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; FORCED-NEXT:    [[TMP10:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD4]], [[BROADCAST_SPLAT]]
; FORCED-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 -12, i64 -12, i64 -12, i64 -12>
; FORCED-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 -12, i64 -12, i64 -12, i64 -12>
; FORCED-NEXT:    [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 13, i64 13, i64 13, i64 13>
; FORCED-NEXT:    [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 13, i64 13, i64 13, i64 13>
; FORCED-NEXT:    [[TMP15:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer
; FORCED-NEXT:    [[TMP16:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer
; FORCED-NEXT:    [[TMP25:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP13]], <4 x i1> zeroinitializer
; FORCED-NEXT:    [[TMP26:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer
; FORCED-NEXT:    [[TMP27:%.*]] = or <4 x i1> [[TMP15]], [[TMP25]]
; FORCED-NEXT:    [[TMP20:%.*]] = or <4 x i1> [[TMP16]], [[TMP26]]
; FORCED-NEXT:    [[TMP21:%.*]] = xor <4 x i1> [[TMP27]], <i1 true, i1 true, i1 true, i1 true>
; FORCED-NEXT:    [[TMP22:%.*]] = xor <4 x i1> [[TMP20]], <i1 true, i1 true, i1 true, i1 true>
; FORCED-NEXT:    [[TMP23:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP21]], <4 x i1> zeroinitializer
; FORCED-NEXT:    [[TMP24:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP22]], <4 x i1> zeroinitializer
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP25]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP26]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP15]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP23]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP24]])
; FORCED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; FORCED-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; FORCED-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; FORCED:       [[MIDDLE_BLOCK]]:
; FORCED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; FORCED-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; FORCED:       [[SCALAR_PH]]:
; FORCED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
; FORCED-NEXT:    br label %[[LOOP_HEADER:.*]]
; FORCED:       [[LOOP_HEADER]]:
; FORCED-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; FORCED-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    [[C:%.*]] = icmp ule i64 [[L]], [[X]]
; FORCED-NEXT:    br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
; FORCED:       [[THEN]]:
; FORCED-NEXT:    switch i64 [[L]], label %[[DEFAULT:.*]] [
; FORCED-NEXT:      i64 -12, label %[[IF_THEN_1:.*]]
; FORCED-NEXT:      i64 13, label %[[IF_THEN_2:.*]]
; FORCED-NEXT:      i64 0, label %[[DEFAULT]]
; FORCED-NEXT:    ]
; FORCED:       [[IF_THEN_1]]:
; FORCED-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    br label %[[LOOP_LATCH]]
; FORCED:       [[IF_THEN_2]]:
; FORCED-NEXT:    store i64 0, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    br label %[[LOOP_LATCH]]
; FORCED:       [[DEFAULT]]:
; FORCED-NEXT:    store i64 2, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    br label %[[LOOP_LATCH]]
; FORCED:       [[LOOP_LATCH]]:
; FORCED-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; FORCED-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; FORCED-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP15:![0-9]+]]
; FORCED:       [[EXIT]]:
; FORCED-NEXT:    ret void
;
entry:
  br label %loop.header

loop.header:
  %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
  %l = load i64, ptr %ptr.iv, align 1
  %c = icmp ule i64 %l, %x
  br i1 %c, label %then, label %loop.latch

then:
  switch i64 %l, label %default [
  i64 -12, label %if.then.1
  i64 13, label %if.then.2
  i64 0, label %default
  ]

if.then.1:
  store i64 42, ptr %ptr.iv, align 1
  br label %loop.latch

if.then.2:
  store i64 0, ptr %ptr.iv, align 1
  br label %loop.latch

default:
  store i64 2, ptr %ptr.iv, align 1
  br label %loop.latch

loop.latch:
  %ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
  %ec = icmp eq ptr %ptr.iv.next, %end
  br i1 %ec, label %exit, label %loop.header

exit:
  ret void
}

define void @br_under_switch_default_common_dest_with_case(ptr %start, ptr %end, i64 %x) {
; COST-LABEL: define void @br_under_switch_default_common_dest_with_case(
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
; COST-NEXT:  [[ENTRY:.*]]:
; COST-NEXT:    br label %[[LOOP_HEADER:.*]]
; COST:       [[LOOP_HEADER]]:
; COST-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; COST-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; COST-NEXT:    switch i64 [[L]], label %[[DEFAULT:.*]] [
; COST-NEXT:      i64 -12, label %[[IF_THEN_1:.*]]
; COST-NEXT:      i64 13, label %[[IF_THEN_2:.*]]
; COST-NEXT:      i64 0, label %[[DEFAULT]]
; COST-NEXT:    ]
; COST:       [[IF_THEN_1]]:
; COST-NEXT:    [[C:%.*]] = icmp ule i64 [[L]], [[X]]
; COST-NEXT:    br i1 [[C]], label %[[THEN:.*]], label %[[IF_THEN_2]]
; COST:       [[THEN]]:
; COST-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
; COST-NEXT:    br label %[[DEFAULT]]
; COST:       [[IF_THEN_2]]:
; COST-NEXT:    store i64 0, ptr [[PTR_IV]], align 1
; COST-NEXT:    br label %[[LOOP_LATCH]]
; COST:       [[DEFAULT]]:
; COST-NEXT:    store i64 2, ptr [[PTR_IV]], align 1
; COST-NEXT:    br label %[[LOOP_LATCH]]
; COST:       [[LOOP_LATCH]]:
; COST-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; COST-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; COST-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
; COST:       [[EXIT]]:
; COST-NEXT:    ret void
;
; FORCED-LABEL: define void @br_under_switch_default_common_dest_with_case(
; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
; FORCED-NEXT:  [[ENTRY:.*]]:
; FORCED-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
; FORCED-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
; FORCED-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
; FORCED-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
; FORCED-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
; FORCED-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; FORCED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
; FORCED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; FORCED:       [[VECTOR_PH]]:
; FORCED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
; FORCED-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; FORCED-NEXT:    [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
; FORCED-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
; FORCED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0
; FORCED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; FORCED-NEXT:    br label %[[VECTOR_BODY:.*]]
; FORCED:       [[VECTOR_BODY]]:
; FORCED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; FORCED-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; FORCED-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
; FORCED-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
; FORCED-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
; FORCED-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
; FORCED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1
; FORCED-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
; FORCED-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 -12, i64 -12, i64 -12, i64 -12>
; FORCED-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 -12, i64 -12, i64 -12, i64 -12>
; FORCED-NEXT:    [[TMP25:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 13, i64 13, i64 13, i64 13>
; FORCED-NEXT:    [[TMP26:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 13, i64 13, i64 13, i64 13>
; FORCED-NEXT:    [[TMP13:%.*]] = or <4 x i1> [[TMP9]], [[TMP25]]
; FORCED-NEXT:    [[TMP14:%.*]] = or <4 x i1> [[TMP10]], [[TMP26]]
; FORCED-NEXT:    [[TMP15:%.*]] = xor <4 x i1> [[TMP13]], <i1 true, i1 true, i1 true, i1 true>
; FORCED-NEXT:    [[TMP16:%.*]] = xor <4 x i1> [[TMP14]], <i1 true, i1 true, i1 true, i1 true>
; FORCED-NEXT:    [[TMP17:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; FORCED-NEXT:    [[TMP18:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD4]], [[BROADCAST_SPLAT]]
; FORCED-NEXT:    [[TMP19:%.*]] = xor <4 x i1> [[TMP17]], <i1 true, i1 true, i1 true, i1 true>
; FORCED-NEXT:    [[TMP20:%.*]] = xor <4 x i1> [[TMP18]], <i1 true, i1 true, i1 true, i1 true>
; FORCED-NEXT:    [[TMP27:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP19]], <4 x i1> zeroinitializer
; FORCED-NEXT:    [[TMP28:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP20]], <4 x i1> zeroinitializer
; FORCED-NEXT:    [[TMP29:%.*]] = or <4 x i1> [[TMP27]], [[TMP25]]
; FORCED-NEXT:    [[TMP30:%.*]] = or <4 x i1> [[TMP28]], [[TMP26]]
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP29]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP30]])
; FORCED-NEXT:    [[TMP32:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP17]], <4 x i1> zeroinitializer
; FORCED-NEXT:    [[TMP33:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP18]], <4 x i1> zeroinitializer
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP32]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP33]])
; FORCED-NEXT:    [[TMP36:%.*]] = or <4 x i1> [[TMP32]], [[TMP15]]
; FORCED-NEXT:    [[TMP37:%.*]] = or <4 x i1> [[TMP33]], [[TMP16]]
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP36]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP37]])
; FORCED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; FORCED-NEXT:    [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; FORCED-NEXT:    br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; FORCED:       [[MIDDLE_BLOCK]]:
; FORCED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; FORCED-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; FORCED:       [[SCALAR_PH]]:
; FORCED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
; FORCED-NEXT:    br label %[[LOOP_HEADER:.*]]
; FORCED:       [[LOOP_HEADER]]:
; FORCED-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; FORCED-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    switch i64 [[L]], label %[[DEFAULT:.*]] [
; FORCED-NEXT:      i64 -12, label %[[IF_THEN_1:.*]]
; FORCED-NEXT:      i64 13, label %[[IF_THEN_2:.*]]
; FORCED-NEXT:      i64 0, label %[[DEFAULT]]
; FORCED-NEXT:    ]
; FORCED:       [[IF_THEN_1]]:
; FORCED-NEXT:    [[C:%.*]] = icmp ule i64 [[L]], [[X]]
; FORCED-NEXT:    br i1 [[C]], label %[[THEN:.*]], label %[[IF_THEN_2]]
; FORCED:       [[THEN]]:
; FORCED-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    br label %[[DEFAULT]]
; FORCED:       [[IF_THEN_2]]:
; FORCED-NEXT:    store i64 0, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    br label %[[LOOP_LATCH]]
; FORCED:       [[DEFAULT]]:
; FORCED-NEXT:    store i64 2, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    br label %[[LOOP_LATCH]]
; FORCED:       [[LOOP_LATCH]]:
; FORCED-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; FORCED-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; FORCED-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP17:![0-9]+]]
; FORCED:       [[EXIT]]:
; FORCED-NEXT:    ret void
;
entry:
  br label %loop.header

loop.header:
  %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
  %l = load i64, ptr %ptr.iv, align 1
  switch i64 %l, label %default [
  i64 -12, label %if.then.1
  i64 13, label %if.then.2
  i64 0, label %default
  ]

if.then.1:
  %c = icmp ule i64 %l, %x
  br i1 %c, label %then, label %if.then.2

then:
  store i64 42, ptr %ptr.iv, align 1
  br label %default

if.then.2:
  store i64 0, ptr %ptr.iv, align 1
  br label %loop.latch

default:
  store i64 2, ptr %ptr.iv, align 1
  br label %loop.latch

loop.latch:
  %ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
  %ec = icmp eq ptr %ptr.iv.next, %end
  br i1 %ec, label %exit, label %loop.header

exit:
  ret void
}

define void @large_number_of_cases(ptr %start, ptr %end) {
; COST-LABEL: define void @large_number_of_cases(
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
; COST-NEXT:  [[ENTRY:.*]]:
; COST-NEXT:    br label %[[LOOP_HEADER:.*]]
; COST:       [[LOOP_HEADER]]:
; COST-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; COST-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; COST-NEXT:    switch i64 [[L]], label %[[LOOP_LATCH]] [
; COST-NEXT:      i64 1, label %[[IF_THEN:.*]]
; COST-NEXT:      i64 3, label %[[IF_THEN]]
; COST-NEXT:      i64 11, label %[[IF_THEN]]
; COST-NEXT:      i64 99, label %[[IF_THEN]]
; COST-NEXT:      i64 213, label %[[IF_THEN]]
; COST-NEXT:      i64 238, label %[[IF_THEN]]
; COST-NEXT:      i64 513, label %[[IF_THEN]]
; COST-NEXT:      i64 791, label %[[IF_THEN]]
; COST-NEXT:      i64 899, label %[[IF_THEN]]
; COST-NEXT:    ]
; COST:       [[IF_THEN]]:
; COST-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
; COST-NEXT:    br label %[[LOOP_LATCH]]
; COST:       [[LOOP_LATCH]]:
; COST-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; COST-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; COST-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
; COST:       [[EXIT]]:
; COST-NEXT:    ret void
;
; FORCED-LABEL: define void @large_number_of_cases(
; FORCED-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
; FORCED-NEXT:  [[ENTRY:.*]]:
; FORCED-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
; FORCED-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
; FORCED-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
; FORCED-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
; FORCED-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
; FORCED-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; FORCED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
; FORCED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; FORCED:       [[VECTOR_PH]]:
; FORCED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
; FORCED-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; FORCED-NEXT:    [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
; FORCED-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
; FORCED-NEXT:    br label %[[VECTOR_BODY:.*]]
; FORCED:       [[VECTOR_BODY]]:
; FORCED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; FORCED-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; FORCED-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
; FORCED-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
; FORCED-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
; FORCED-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
; FORCED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1
; FORCED-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
; FORCED-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 1, i64 1, i64 1, i64 1>
; FORCED-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 1, i64 1, i64 1, i64 1>
; FORCED-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 3, i64 3, i64 3, i64 3>
; FORCED-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 3, i64 3, i64 3, i64 3>
; FORCED-NEXT:    [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 11, i64 11, i64 11, i64 11>
; FORCED-NEXT:    [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 11, i64 11, i64 11, i64 11>
; FORCED-NEXT:    [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 99, i64 99, i64 99, i64 99>
; FORCED-NEXT:    [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 99, i64 99, i64 99, i64 99>
; FORCED-NEXT:    [[TMP17:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 213, i64 213, i64 213, i64 213>
; FORCED-NEXT:    [[TMP18:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 213, i64 213, i64 213, i64 213>
; FORCED-NEXT:    [[TMP19:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 238, i64 238, i64 238, i64 238>
; FORCED-NEXT:    [[TMP20:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 238, i64 238, i64 238, i64 238>
; FORCED-NEXT:    [[TMP21:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 513, i64 513, i64 513, i64 513>
; FORCED-NEXT:    [[TMP22:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 513, i64 513, i64 513, i64 513>
; FORCED-NEXT:    [[TMP23:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 791, i64 791, i64 791, i64 791>
; FORCED-NEXT:    [[TMP24:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 791, i64 791, i64 791, i64 791>
; FORCED-NEXT:    [[TMP25:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 899, i64 899, i64 899, i64 899>
; FORCED-NEXT:    [[TMP26:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 899, i64 899, i64 899, i64 899>
; FORCED-NEXT:    [[TMP27:%.*]] = or <4 x i1> [[TMP9]], [[TMP11]]
; FORCED-NEXT:    [[TMP28:%.*]] = or <4 x i1> [[TMP10]], [[TMP12]]
; FORCED-NEXT:    [[TMP29:%.*]] = or <4 x i1> [[TMP27]], [[TMP13]]
; FORCED-NEXT:    [[TMP30:%.*]] = or <4 x i1> [[TMP28]], [[TMP14]]
; FORCED-NEXT:    [[TMP31:%.*]] = or <4 x i1> [[TMP29]], [[TMP15]]
; FORCED-NEXT:    [[TMP32:%.*]] = or <4 x i1> [[TMP30]], [[TMP16]]
; FORCED-NEXT:    [[TMP33:%.*]] = or <4 x i1> [[TMP31]], [[TMP17]]
; FORCED-NEXT:    [[TMP34:%.*]] = or <4 x i1> [[TMP32]], [[TMP18]]
; FORCED-NEXT:    [[TMP35:%.*]] = or <4 x i1> [[TMP33]], [[TMP19]]
; FORCED-NEXT:    [[TMP36:%.*]] = or <4 x i1> [[TMP34]], [[TMP20]]
; FORCED-NEXT:    [[TMP37:%.*]] = or <4 x i1> [[TMP35]], [[TMP21]]
; FORCED-NEXT:    [[TMP38:%.*]] = or <4 x i1> [[TMP36]], [[TMP22]]
; FORCED-NEXT:    [[TMP39:%.*]] = or <4 x i1> [[TMP37]], [[TMP23]]
; FORCED-NEXT:    [[TMP40:%.*]] = or <4 x i1> [[TMP38]], [[TMP24]]
; FORCED-NEXT:    [[TMP57:%.*]] = or <4 x i1> [[TMP39]], [[TMP25]]
; FORCED-NEXT:    [[TMP58:%.*]] = or <4 x i1> [[TMP40]], [[TMP26]]
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP57]])
; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP58]])
; FORCED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; FORCED-NEXT:    [[TMP59:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; FORCED-NEXT:    br i1 [[TMP59]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; FORCED:       [[MIDDLE_BLOCK]]:
; FORCED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; FORCED-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; FORCED:       [[SCALAR_PH]]:
; FORCED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
; FORCED-NEXT:    br label %[[LOOP_HEADER:.*]]
; FORCED:       [[LOOP_HEADER]]:
; FORCED-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; FORCED-NEXT:    [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    switch i64 [[L]], label %[[LOOP_LATCH]] [
; FORCED-NEXT:      i64 1, label %[[IF_THEN:.*]]
; FORCED-NEXT:      i64 3, label %[[IF_THEN]]
; FORCED-NEXT:      i64 11, label %[[IF_THEN]]
; FORCED-NEXT:      i64 99, label %[[IF_THEN]]
; FORCED-NEXT:      i64 213, label %[[IF_THEN]]
; FORCED-NEXT:      i64 238, label %[[IF_THEN]]
; FORCED-NEXT:      i64 513, label %[[IF_THEN]]
; FORCED-NEXT:      i64 791, label %[[IF_THEN]]
; FORCED-NEXT:      i64 899, label %[[IF_THEN]]
; FORCED-NEXT:    ]
; FORCED:       [[IF_THEN]]:
; FORCED-NEXT:    store i64 42, ptr [[PTR_IV]], align 1
; FORCED-NEXT:    br label %[[LOOP_LATCH]]
; FORCED:       [[LOOP_LATCH]]:
; FORCED-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
; FORCED-NEXT:    [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; FORCED-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP19:![0-9]+]]
; FORCED:       [[EXIT]]:
; FORCED-NEXT:    ret void
;
entry:
  br label %loop.header

loop.header:
  %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
  %l = load i64, ptr %ptr.iv, align 1
  switch i64 %l, label %loop.latch [
  i64 1, label %if.then
  i64 3, label %if.then
  i64 11, label %if.then
  i64 99, label %if.then
  i64 213, label %if.then
  i64 238, label %if.then
  i64 513, label %if.then
  i64 791, label %if.then
  i64 899, label %if.then
  ]

if.then:
  store i64 42, ptr %ptr.iv, align 1
  br label %loop.latch

loop.latch:
  %ptr.iv.next = getelementptr inbounds i64, ptr %ptr.iv, i64 1
  %ec = icmp eq ptr %ptr.iv.next, %end
  br i1 %ec, label %exit, label %loop.header

exit:
  ret void
}

;.
; COST: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; COST: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; COST: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; COST: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
; COST: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
; COST: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
; COST: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
; COST: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
; COST: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
; COST: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
;.
; FORCED: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; FORCED: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; FORCED: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; FORCED: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
; FORCED: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
; FORCED: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
; FORCED: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
; FORCED: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
; FORCED: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
; FORCED: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
; FORCED: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
; FORCED: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
; FORCED: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
; FORCED: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]}
; FORCED: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]}
; FORCED: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META1]]}
; FORCED: [[LOOP16]] = distinct !{[[LOOP16]], [[META1]], [[META2]]}
; FORCED: [[LOOP17]] = distinct !{[[LOOP17]], [[META2]], [[META1]]}
; FORCED: [[LOOP18]] = distinct !{[[LOOP18]], [[META1]], [[META2]]}
; FORCED: [[LOOP19]] = distinct !{[[LOOP19]], [[META2]], [[META1]]}
;.