; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -passes='simplifycfg<hoist-loads-stores-with-cond-faulting>' -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s
;; Basic case: check masked.load/store is generated for i16/i32/i64.
define void @basic(i1 %cond, ptr %b, ptr %p, ptr %q) {
; CHECK-LABEL: @basic(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison)
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[B:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> poison)
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to i64
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <1 x i16>
; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP7]], ptr [[B]], i32 2, <1 x i1> [[TMP0]])
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP8]], ptr [[P]], i32 4, <1 x i1> [[TMP0]])
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP6]] to <1 x i64>
; CHECK-NEXT: call void @llvm.masked.store.v1i64.p0(<1 x i64> [[TMP9]], ptr [[Q]], i32 8, <1 x i1> [[TMP0]])
; CHECK-NEXT: ret void
;
entry:
br i1 %cond, label %if.true, label %if.false
if.false:
br label %if.end
if.true:
%0 = load i16, ptr %p, align 2
%1 = load i32, ptr %q, align 4
%2 = load i64, ptr %b, align 8
store i16 %0, ptr %b, align 2
store i32 %1, ptr %p, align 4
store i64 %2, ptr %q, align 8
br label %if.false
if.end:
ret void
}
;; Successor 1 branches to successor 0.
define void @succ1to0(ptr %p, ptr %q, i32 %a) {
; CHECK-LABEL: @succ1to0(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[TOBOOL]], true
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to <1 x i32>
; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP4]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP1]])
; CHECK-NEXT: ret void
;
entry:
%tobool = icmp ne i32 %a, 0
br i1 %tobool, label %if.end, label %if.then
if.end:
ret void
if.then:
%0 = load i32, ptr %q
store i32 %0, ptr %p
br label %if.end
}
;; Successor 1 branches to successor 0 and there is a phi node.
define i32 @succ1to0_phi(ptr %p) {
; CHECK-LABEL: @succ1to0_phi(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
; CHECK-NEXT: ret i32 [[TMP3]]
;
entry:
%cond = icmp eq ptr %p, null
br i1 %cond, label %if.true, label %if.false
if.false:
%0 = load i32, ptr %p
br label %if.true
if.true:
%res = phi i32 [ %0, %if.false ], [ 0, %entry ]
ret i32 %res
}
;; Successor 0 branches to successor 1.
define void @succ0to1(i32 %a, ptr %b, ptr %p, ptr %q) {
; CHECK-LABEL: @succ0to1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison)
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i32> [[TMP1]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <1 x i32>
; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4
; CHECK-NEXT: ret void
;
entry:
%cond = icmp eq i32 %a, 0
br i1 %cond, label %if.true, label %if.false
if.false:
store i32 1, ptr %q
br label %if.end
if.true:
%0 = load i32, ptr %b
store i32 %0, ptr %p
br label %if.false
if.end:
ret void
}
;; Load after store can be hoisted.
define i64 @load_after_store(i32 %a, ptr %b, ptr %p) {
; CHECK-LABEL: @load_after_store(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 1>, ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]])
; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
; CHECK-NEXT: [[ZEXT:%.*]] = zext i16 [[TMP2]] to i64
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i64 [[ZEXT]], i64 0
; CHECK-NEXT: ret i64 [[SPEC_SELECT]]
;
entry:
%cond = icmp eq i32 %a, 0
br i1 %cond, label %if.true, label %if.end
if.true:
store i32 1, ptr %b
%0 = load i16, ptr %p
%zext = zext i16 %0 to i64
ret i64 %zext
if.end:
ret i64 0
}
;; Speculatable memory read doesn't prevent the hoist.
define void @load_skip_speculatable_memory_read(i32 %a, ptr %p, ptr %q) {
; CHECK-LABEL: @load_skip_speculatable_memory_read(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
; CHECK-NEXT: [[READ:%.*]] = call i32 @read_memory_only()
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[READ]] to <1 x i32>
; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP1]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4
; CHECK-NEXT: ret void
;
entry:
%cond = icmp eq i32 %a, 0
br i1 %cond, label %if.true, label %if.false
if.false:
store i32 1, ptr %q
br label %if.end
if.true:
%read = call i32 @read_memory_only()
store i32 %read, ptr %p
br label %if.false
if.end:
ret void
}
;; Source of the load can be a GEP.
define i32 @load_from_gep(ptr %p) {
; CHECK-LABEL: @load_from_gep(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 16
; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[ARRAYIDX]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
; CHECK-NEXT: ret i32 [[TMP3]]
;
entry:
%cond = icmp eq ptr %p, null
br i1 %cond, label %if.true, label %if.false
if.false:
%arrayidx = getelementptr inbounds i8, ptr %p, i64 16
%0 = load i32, ptr %arrayidx
br label %if.true
if.true:
%res = phi i32 [ %0, %if.false ], [ 0, %entry ]
ret i32 %res
}
;; Metadata range/annotation are kept.
define void @nondebug_metadata(i1 %cond, ptr %p, ptr %q) {
; CHECK-LABEL: @nondebug_metadata(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
; CHECK-NEXT: [[TMP1:%.*]] = call range(i16 0, 10) <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison), !annotation [[META5:![0-9]+]]
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <1 x i16>
; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP5]], ptr [[Q]], i32 4, <1 x i1> [[TMP0]]), !annotation [[META5]]
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP6]], ptr [[P]], i32 2, <1 x i1> [[TMP0]])
; CHECK-NEXT: ret void
;
entry:
br i1 %cond, label %if.true, label %if.false
if.false:
ret void
if.true:
%0 = load i16, ptr %p, align 2, !range !{i16 0, i16 10}
%1 = load i32, ptr %q, align 4, !annotation !11
store i16 %0, ptr %q, align 4, !annotation !11
store i32 %1, ptr %p, align 2
br label %if.false
}
define i16 @debug_metadata_diassign(i1 %cond, i16 %a, ptr %p) {
; CHECK-LABEL: @debug_metadata_diassign(
; CHECK-NEXT: bb0:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> <i16 7>, ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i16 3, i16 2
; CHECK-NEXT: ret i16 [[SPEC_SELECT]]
;
bb0:
br i1 %cond, label %if.true, label %if.false
if.true:
store i16 7, ptr %p, align 4, !DIAssignID !9
br label %if.false
if.false:
%ret = phi i16 [ 2, %bb0 ], [ 3, %if.true ]
call void @llvm.dbg.assign(metadata i16 %ret, metadata !8, metadata !DIExpression(), metadata !9, metadata ptr %p, metadata !DIExpression()), !dbg !7
ret i16 %ret
}
;; Not crash when working with opt controlled by simplifycfg-hoist-cond-stores.
define i32 @hoist_cond_stores(i1 %cond, ptr %p) {
; CHECK-LABEL: @hoist_cond_stores(
; CHECK-NEXT: entry:
; CHECK-NEXT: store i1 false, ptr [[P:%.*]], align 2
; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[COND:%.*]], i1 false, i1 false
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> zeroinitializer, ptr [[P]], i32 8, <1 x i1> [[TMP0]])
; CHECK-NEXT: store i1 [[SPEC_STORE_SELECT]], ptr [[P]], align 2
; CHECK-NEXT: ret i32 0
;
entry:
store i1 false, ptr %p, align 2
br i1 %cond, label %if.true, label %if.false
if.true: ; preds = %entry
store i32 0, ptr %p, align 8
store i1 false, ptr %p, align 2
br label %if.false
if.false: ; preds = %if.true, %entry
ret i32 0
}
;; Both of successor 0 and successor 1 have a single predecessor.
;; TODO: Support transform for this case.
define void @single_predecessor(ptr %p, ptr %q, i32 %a) {
; CHECK-LABEL: @single_predecessor(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK: common.ret:
; CHECK-NEXT: ret void
; CHECK: if.end:
; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4
; CHECK-NEXT: br label [[COMMON_RET:%.*]]
; CHECK: if.then:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Q]], align 4
; CHECK-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
; CHECK-NEXT: br label [[COMMON_RET]]
;
entry:
%tobool = icmp ne i32 %a, 0
br i1 %tobool, label %if.end, label %if.then
if.end:
store i32 1, ptr %q
ret void
if.then:
%0 = load i32, ptr %q
store i32 %0, ptr %p
ret void
}
;; Hoist 6 stores.
define void @threshold_6(i1 %cond, ptr %p1, ptr %p2, ptr %p3, ptr %p4, ptr %p5, ptr %p6) {
; CHECK-LABEL: @threshold_6(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 1>, ptr [[P1:%.*]], i32 4, <1 x i1> [[TMP0]])
; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 2>, ptr [[P2:%.*]], i32 4, <1 x i1> [[TMP0]])
; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 3>, ptr [[P3:%.*]], i32 4, <1 x i1> [[TMP0]])
; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 4>, ptr [[P4:%.*]], i32 4, <1 x i1> [[TMP0]])
; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 5>, ptr [[P5:%.*]], i32 4, <1 x i1> [[TMP0]])
; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 6>, ptr [[P6:%.*]], i32 4, <1 x i1> [[TMP0]])
; CHECK-NEXT: ret void
;
entry:
br i1 %cond, label %if.true, label %if.false
if.true:
store i32 1, ptr %p1, align 4
store i32 2, ptr %p2, align 4
store i32 3, ptr %p3, align 4
store i32 4, ptr %p4, align 4
store i32 5, ptr %p5, align 4
store i32 6, ptr %p6, align 4
br label %if.false
if.false:
ret void
}
;; Not hoist 7 stores.
define void @threshold_7(i1 %cond, ptr %p1, ptr %p2, ptr %p3, ptr %p4, ptr %p5, ptr %p6, ptr %p7) {
; CHECK-LABEL: @threshold_7(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
; CHECK: if.true:
; CHECK-NEXT: store i32 1, ptr [[P1:%.*]], align 4
; CHECK-NEXT: store i32 2, ptr [[P2:%.*]], align 4
; CHECK-NEXT: store i32 3, ptr [[P3:%.*]], align 4
; CHECK-NEXT: store i32 4, ptr [[P4:%.*]], align 4
; CHECK-NEXT: store i32 5, ptr [[P5:%.*]], align 4
; CHECK-NEXT: store i32 6, ptr [[P6:%.*]], align 4
; CHECK-NEXT: store i32 7, ptr [[P7:%.*]], align 4
; CHECK-NEXT: br label [[IF_FALSE]]
; CHECK: if.false:
; CHECK-NEXT: ret void
;
entry:
br i1 %cond, label %if.true, label %if.false
if.true:
store i32 1, ptr %p1, align 4
store i32 2, ptr %p2, align 4
store i32 3, ptr %p3, align 4
store i32 4, ptr %p4, align 4
store i32 5, ptr %p5, align 4
store i32 6, ptr %p6, align 4
store i32 7, ptr %p7, align 4
br label %if.false
if.false:
ret void
}
;; Not do hoist if the cost of instructions to be hoisted is expensive.
define i32 @not_cheap_to_hoist(i32 %a, ptr %b, ptr %p, ptr %q, i32 %v0, i32 %v1, i32 %v2, i1 %cc) {
; CHECK-LABEL: @not_cheap_to_hoist(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
; CHECK-NEXT: br i1 [[COND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
; CHECK: common.ret:
; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ [[VVVV:%.*]], [[IF_FALSE]] ], [ 0, [[IF_TRUE]] ]
; CHECK-NEXT: ret i32 [[COMMON_RET_OP]]
; CHECK: if.false:
; CHECK-NEXT: store i64 1, ptr [[P:%.*]], align 8
; CHECK-NEXT: store i16 2, ptr [[Q:%.*]], align 2
; CHECK-NEXT: [[V:%.*]] = udiv i32 [[A]], 12345
; CHECK-NEXT: [[VV:%.*]] = mul i32 [[V]], [[V0:%.*]]
; CHECK-NEXT: [[VVV:%.*]] = mul i32 [[VV]], [[V1:%.*]]
; CHECK-NEXT: [[VVVV]] = select i1 [[CC:%.*]], i32 [[V2:%.*]], i32 [[VVV]]
; CHECK-NEXT: br label [[COMMON_RET:%.*]]
; CHECK: if.true:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4
; CHECK-NEXT: store i32 [[TMP0]], ptr [[P]], align 4
; CHECK-NEXT: br label [[COMMON_RET]]
;
entry:
%cond = icmp eq i32 %a, 0
br i1 %cond, label %if.true, label %if.false
if.false:
store i64 1, ptr %p
store i16 2, ptr %q
%v = udiv i32 %a, 12345
%vv = mul i32 %v, %v0
%vvv = mul i32 %vv, %v1
%vvvv = select i1 %cc, i32 %v2, i32 %vvv
ret i32 %vvvv
if.true:
%0 = load i32, ptr %b
store i32 %0, ptr %p
br label %if.end
if.end:
ret i32 0
}
;; Not hoist if there is more than 1 prodecessor.
define void @not_single_predecessor(ptr %p, ptr %q, i32 %a) {
; CHECK-LABEL: @not_single_predecessor(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK: if.end:
; CHECK-NEXT: br label [[IF_THEN]]
; CHECK: if.then:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Q:%.*]], align 4
; CHECK-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
; CHECK-NEXT: br label [[IF_END]]
;
entry:
%tobool = icmp ne i32 %a, 0
br i1 %tobool, label %if.end, label %if.then
if.end:
br label %if.then
if.then:
%1 = load i32, ptr %q
store i32 %1, ptr %p
br label %if.end
}
;; Not hoist b/c i8 is not supported by conditional faulting.
define void @not_supported_type(i8 %a, ptr %b, ptr %p, ptr %q) {
; CHECK-LABEL: @not_supported_type(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[A:%.*]], 0
; CHECK-NEXT: br i1 [[COND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
; CHECK: if.false:
; CHECK-NEXT: store i8 1, ptr [[Q:%.*]], align 1
; CHECK-NEXT: br label [[IF_END:%.*]]
; CHECK: if.true:
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[B:%.*]], align 1
; CHECK-NEXT: store i8 [[TMP0]], ptr [[P:%.*]], align 1
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: ret void
;
entry:
%cond = icmp eq i8 %a, 0
br i1 %cond, label %if.true, label %if.false
if.false:
store i8 1, ptr %q
br label %if.end
if.true:
%0 = load i8, ptr %b
store i8 %0, ptr %p
br label %if.end
if.end:
ret void
}
;; Not hoist if the terminator is not br.
define void @not_br_terminator(i32 %a, ptr %b, ptr %p, ptr %q) {
; CHECK-LABEL: @not_br_terminator(
; CHECK-NEXT: entry:
; CHECK-NEXT: switch i32 [[A:%.*]], label [[IF_END:%.*]] [
; CHECK-NEXT: i32 1, label [[IF_FALSE:%.*]]
; CHECK-NEXT: i32 2, label [[IF_TRUE:%.*]]
; CHECK-NEXT: ]
; CHECK: if.false:
; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.true:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4
; CHECK-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
; CHECK-NEXT: br label [[IF_FALSE]]
; CHECK: if.end:
; CHECK-NEXT: ret void
;
entry:
switch i32 %a, label %if.end [
i32 1, label %if.false
i32 2, label %if.true
]
if.false:
store i32 1, ptr %q, align 4
br label %if.end
if.true:
%0 = load i32, ptr %b, align 4
store i32 %0, ptr %p, align 4
br label %if.false
if.end:
ret void
}
;; Not hoist if the instruction to be hoist is atomic.
define void @not_atomic(i1 %cond, ptr %p) {
; CHECK-LABEL: @not_atomic(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
; CHECK: if.false:
; CHECK-NEXT: store atomic i32 1, ptr [[P:%.*]] seq_cst, align 4
; CHECK-NEXT: br label [[IF_TRUE]]
; CHECK: if.true:
; CHECK-NEXT: ret void
;
entry:
br i1 %cond, label %if.true, label %if.false
if.false:
store atomic i32 1, ptr %p seq_cst, align 4
br label %if.true
if.true:
ret void
}
;; Not hoist if the instruction to be hoist is volatile.
define void @not_volatile(i1 %cond, ptr %p) {
; CHECK-LABEL: @not_volatile(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
; CHECK: if.false:
; CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr [[P:%.*]], align 4
; CHECK-NEXT: br label [[IF_TRUE]]
; CHECK: if.true:
; CHECK-NEXT: ret void
;
entry:
br i1 %cond, label %if.true, label %if.false
if.false:
%0 = load volatile i32, ptr %p, align 4
br label %if.true
if.true:
ret void
}
;; Not hoist if there is an instruction that has side effect in the same bb.
define void @not_hoistable_sideeffect(i1 %cond, ptr %p, ptr %q) {
; CHECK-LABEL: @not_hoistable_sideeffect(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
; CHECK: if.false:
; CHECK-NEXT: [[RMW:%.*]] = atomicrmw xchg ptr [[Q:%.*]], double 4.000000e+00 seq_cst, align 8
; CHECK-NEXT: store i32 1, ptr [[P:%.*]], align 4
; CHECK-NEXT: br label [[IF_TRUE]]
; CHECK: if.true:
; CHECK-NEXT: ret void
;
entry:
br i1 %cond, label %if.true, label %if.false
if.false:
%rmw= atomicrmw xchg ptr %q, double 4.0 seq_cst
store i32 1, ptr %p, align 4
br label %if.true
if.true:
ret void
}
;; Not hoist if the branch is predictable and the `then` BB is not likely to execute.
define void @not_likely_to_execute(ptr %p, ptr %q, i32 %a) {
; CHECK-LABEL: @not_likely_to_execute(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]], !prof [[PROF6:![0-9]+]]
; CHECK: if.end:
; CHECK-NEXT: ret void
; CHECK: if.then:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Q:%.*]], align 4
; CHECK-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
; CHECK-NEXT: br label [[IF_END]]
;
entry:
%tobool = icmp ne i32 %a, 0
br i1 %tobool, label %if.then, label %if.end, !prof !10
if.end:
ret void
if.then:
%0 = load i32, ptr %q
store i32 %0, ptr %p
br label %if.end
}
;; Now the optimization hoist-loads-stores-with-cond-faulting is run in codegen,
;; which is after sroa and alloca is optimized away. So we don't need to do the transform
;; for this case. But in the future, it is probably moved before sroa.
define void @not_alloca(ptr %p, ptr %q, i32 %a) {
; CHECK-LABEL: @not_alloca(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8
; CHECK-NEXT: [[Q_ADDR:%.*]] = alloca ptr, align 8
; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT: store ptr [[P:%.*]], ptr [[P_ADDR]], align 8
; CHECK-NEXT: store ptr [[Q:%.*]], ptr [[Q_ADDR]], align 8
; CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; CHECK: if.then:
; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Q_ADDR]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[P_ADDR]], align 8
; CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: ret void
;
entry:
%p.addr = alloca ptr
%q.addr = alloca ptr
%a.addr = alloca i32
store ptr %p, ptr %p.addr
store ptr %q, ptr %q.addr
store i32 %a, ptr %a.addr
%0 = load i32, ptr %a.addr
%tobool = icmp ne i32 %0, 0
br i1 %tobool, label %if.then, label %if.end
if.then:
%1 = load ptr, ptr %q.addr
%2 = load i32, ptr %1
%3 = load ptr, ptr %p.addr
store i32 %2, ptr %3
br label %if.end
if.end:
ret void
}
;; Not transform if alignment = 2^32.
define void @not_maximum_alignment(i1 %cond, ptr %p) {
; CHECK-LABEL: @not_maximum_alignment(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
; CHECK: if.true:
; CHECK-NEXT: store i32 0, ptr [[P:%.*]], align 4294967296
; CHECK-NEXT: br label [[IF_FALSE]]
; CHECK: if.false:
; CHECK-NEXT: ret void
;
entry:
br i1 %cond, label %if.true, label %if.false
if.true:
store i32 0, ptr %p, align 4294967296
br label %if.false
if.false:
ret void
}
define i32 @succ_phi_has_3input(i1 %cond1, ptr %p, i1 %cond2) {
; CHECK-LABEL: @succ_phi_has_3input(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[COND1:%.*]], label [[BB3:%.*]], label [[BB1:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND2:%.*]] to <1 x i1>
; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[P:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> zeroinitializer)
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[TMP1]] to i64
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: [[Y:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB1]] ]
; CHECK-NEXT: store i64 [[Y]], ptr [[P]], align 8
; CHECK-NEXT: ret i32 0
;
entry:
br i1 %cond1, label %bb3, label %bb1
bb1: ; preds = %entry
br i1 %cond2, label %bb2, label %bb3
bb2: ; preds = %bb1
%x = load i64, ptr %p, align 8
br label %bb3
bb3: ; preds = %bb2, %bb1, %entry
%y = phi i64 [ %x, %bb2 ], [ 0, %bb1 ], [ 0, %entry ]
store i64 %y, ptr %p, align 8
ret i32 0
}
define i32 @succ1to0_phi2(ptr %p, ptr %p2) {
; CHECK-LABEL: @succ1to0_phi2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to <1 x i32>
; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP4]], ptr [[P2:%.*]], i32 4, <1 x i1> [[TMP1]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
entry:
%cond = icmp eq ptr %p, null
br i1 %cond, label %if.true, label %if.false
if.false:
%0 = load i32, ptr %p
store i32 %0, ptr %p2
br label %if.true
if.true:
%res = phi i32 [ %0, %if.false ], [ 0, %entry ]
ret i32 %res
}
define i32 @succ1to0_phi3(ptr %p, ptr %p2, i32 %x) {
; CHECK-LABEL: @succ1to0_phi3(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[X:%.*]] to <1 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> [[TMP2]])
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP5]], ptr [[P2:%.*]], i32 4, <1 x i1> [[TMP1]])
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i32 0, i32 [[TMP4]]
; CHECK-NEXT: [[RES:%.*]] = add i32 [[SPEC_SELECT]], [[TMP4]]
; CHECK-NEXT: ret i32 [[RES]]
;
entry:
%cond = icmp eq ptr %p, null
br i1 %cond, label %if.true, label %if.false
if.false:
%0 = load i32, ptr %p
store i32 %0, ptr %p2
br label %if.true
if.true:
%res0 = phi i32 [ %0, %if.false ], [ 0, %entry ]
%res1 = phi i32 [ %0, %if.false ], [ %x, %entry ]
%res = add i32 %res0, %res1
ret i32 %res
}
declare i32 @read_memory_only() readonly nounwind willreturn speculatable
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!2, !3}
!llvm.ident = !{!4}
!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "clang")
!1 = !DIFile(filename: "foo.c", directory: "/tmp")
!2 = !{i32 2, !"Dwarf Version", i32 4}
!3 = !{i32 2, !"Debug Info Version", i32 3}
!4 = !{!"clang"}
!5 = !DIBasicType(name: "int", size: 16, encoding: DW_ATE_signed)
!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 4, unit: !0)
!7 = !DILocation(line: 5, column: 7, scope: !6)
!8 = !DILocalVariable(name: "a", scope: !6, line: 6, type: !5)
!9 = distinct !DIAssignID()
!10 = !{!"branch_weights", i32 1, i32 99}
!11 = !{ !"auto-init" }