llvm/llvm/test/Transforms/SimplifyCFG/speculate-store.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S < %s | FileCheck %s

define void @ifconvertstore(ptr %A, i32 %B, i32 %C, i32 %D) {
; CHECK-LABEL: @ifconvertstore(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    store i32 [[B:%.*]], ptr [[A:%.*]], align 4
; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 42
; CHECK-NEXT:    [[SPEC_STORE_SELECT:%.*]] = select i1 [[CMP]], i32 [[C:%.*]], i32 [[B]], !prof [[PROF0:![0-9]+]]
; CHECK-NEXT:    store i32 [[SPEC_STORE_SELECT]], ptr [[A]], align 4
; CHECK-NEXT:    ret void
;
entry:
; First store to the location.
  store i32 %B, ptr %A
  %cmp = icmp sgt i32 %D, 42
  br i1 %cmp, label %if.then, label %ret.end, !prof !0

; Make sure we speculate stores like the following one. It is cheap compared to
; a mispredicated branch.
if.then:
  store i32 %C, ptr %A
  br label %ret.end

ret.end:
  ret void
}

; Store to a different location.

define void @noifconvertstore1(ptr %A1, ptr %A2, i32 %B, i32 %C, i32 %D) {
; CHECK-LABEL: @noifconvertstore1(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    store i32 [[B:%.*]], ptr [[A1:%.*]], align 4
; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 42
; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RET_END:%.*]]
; CHECK:       if.then:
; CHECK-NEXT:    store i32 [[C:%.*]], ptr [[A2:%.*]], align 4
; CHECK-NEXT:    br label [[RET_END]]
; CHECK:       ret.end:
; CHECK-NEXT:    ret void
;
entry:
  store i32 %B, ptr %A1
  %cmp = icmp sgt i32 %D, 42
  br i1 %cmp, label %if.then, label %ret.end

if.then:
  store i32 %C, ptr %A2
  br label %ret.end

ret.end:
  ret void
}

; This function could store to our address, so we can't repeat the first store a second time.
declare void @unknown_fun()

define void @noifconvertstore2(ptr %A, i32 %B, i32 %C, i32 %D) {
; CHECK-LABEL: @noifconvertstore2(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    store i32 [[B:%.*]], ptr [[A:%.*]], align 4
; CHECK-NEXT:    call void @unknown_fun()
; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[D:%.*]], 42
; CHECK-NEXT:    br i1 [[CMP6]], label [[IF_THEN:%.*]], label [[RET_END:%.*]]
; CHECK:       if.then:
; CHECK-NEXT:    store i32 [[C:%.*]], ptr [[A]], align 4
; CHECK-NEXT:    br label [[RET_END]]
; CHECK:       ret.end:
; CHECK-NEXT:    ret void
;
entry:
; First store to the location.
  store i32 %B, ptr %A
  call void @unknown_fun()
  %cmp6 = icmp sgt i32 %D, 42
  br i1 %cmp6, label %if.then, label %ret.end

if.then:
  store i32 %C, ptr %A
  br label %ret.end

ret.end:
  ret void
}

; Make sure we don't speculate volatile stores.

define void @noifconvertstore_volatile(ptr %A, i32 %B, i32 %C, i32 %D) {
; CHECK-LABEL: @noifconvertstore_volatile(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    store i32 [[B:%.*]], ptr [[A:%.*]], align 4
; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[D:%.*]], 42
; CHECK-NEXT:    br i1 [[CMP6]], label [[IF_THEN:%.*]], label [[RET_END:%.*]]
; CHECK:       if.then:
; CHECK-NEXT:    store volatile i32 [[C:%.*]], ptr [[A]], align 4
; CHECK-NEXT:    br label [[RET_END]]
; CHECK:       ret.end:
; CHECK-NEXT:    ret void
;
entry:
; First store to the location.
  store i32 %B, ptr %A
  %cmp6 = icmp sgt i32 %D, 42
  br i1 %cmp6, label %if.then, label %ret.end

if.then:
  store volatile i32 %C, ptr %A
  br label %ret.end

ret.end:
  ret void
}


;; Speculate a store, preceded by a local, non-escaping load
define i32 @load_before_store_noescape(i64 %i, i32 %b)  {
; CHECK-LABEL: @load_before_store_noescape(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A:%.*]] = alloca [2 x i32], align 8
; CHECK-NEXT:    store i64 4294967296, ptr [[A]], align 8
; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 [[I:%.*]]
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[B:%.*]]
; CHECK-NEXT:    [[SPEC_STORE_SELECT:%.*]] = select i1 [[CMP]], i32 [[B]], i32 [[TMP0]]
; CHECK-NEXT:    store i32 [[SPEC_STORE_SELECT]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 1
; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
; CHECK-NEXT:    ret i32 [[ADD]]
;
entry:
  %a = alloca [2 x i32], align 8
  store i64 4294967296, ptr %a, align 8
  %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 %i
  %0 = load i32, ptr %arrayidx, align 4
  %cmp = icmp slt i32 %0, %b
  br i1 %cmp, label %if.then, label %if.end

if.then:
  store i32 %b, ptr %arrayidx, align 4
  br label %if.end

if.end:
  %1 = load i32, ptr %a, align 4
  %arrayidx2 = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 1
  %2 = load i32, ptr %arrayidx2, align 4
  %add = add nsw i32 %1, %2
  ret i32 %add
}

;; Don't speculate a store, preceded by a local, escaping load
define i32 @load_before_store_escape(i64 %i, i32 %b)  {
; CHECK-LABEL: @load_before_store_escape(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A:%.*]] = alloca [2 x i32], align 8
; CHECK-NEXT:    store i64 4294967296, ptr [[A]], align 8
; CHECK-NEXT:    call void @fork_some_threads(ptr [[A]])
; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 [[I:%.*]]
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[B:%.*]]
; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; CHECK:       if.then:
; CHECK-NEXT:    store i32 [[B]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT:    br label [[IF_END]]
; CHECK:       if.end:
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 1
; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
; CHECK-NEXT:    call void @join_some_threads()
; CHECK-NEXT:    ret i32 [[ADD]]
;
entry:
  %a = alloca [2 x i32], align 8
  store i64 4294967296, ptr %a, align 8
  call void @fork_some_threads(ptr %a)
  %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 %i
  %0 = load i32, ptr %arrayidx, align 4
  %cmp = icmp slt i32 %0, %b
  br i1 %cmp, label %if.then, label %if.end

if.then:
  store i32 %b, ptr %arrayidx, align 4
  br label %if.end

if.end:
  %1 = load i32, ptr %a, align 4
  %arrayidx2 = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 1
  %2 = load i32, ptr %arrayidx2, align 4
  %add = add nsw i32 %1, %2
  call void @join_some_threads()
  ret i32 %add
}

define i64 @load_before_store_noescape_byval(ptr byval([2 x i32]) %a, i64 %i, i32 %b)  {
; CHECK-LABEL: @load_before_store_noescape_byval(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    store i64 -1, ptr [[A:%.*]], align 8
; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 [[I:%.*]]
; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[V]], [[B:%.*]]
; CHECK-NEXT:    [[SPEC_STORE_SELECT:%.*]] = select i1 [[CMP]], i32 [[B]], i32 [[V]]
; CHECK-NEXT:    store i32 [[SPEC_STORE_SELECT]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT:    [[V2:%.*]] = load i64, ptr [[A]], align 8
; CHECK-NEXT:    ret i64 [[V2]]
;
entry:
  store i64 -1, ptr %a, align 8
  %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 %i
  %v = load i32, ptr %arrayidx, align 4
  %cmp = icmp slt i32 %v, %b
  br i1 %cmp, label %if.then, label %if.end

if.then:
  store i32 %b, ptr %arrayidx, align 4
  br label %if.end

if.end:
  %v2 = load i64, ptr %a, align 8
  ret i64 %v2
}

declare noalias ptr @malloc(i64 %size)

define i64 @load_before_store_noescape_malloc(i64 %i, i32 %b)  {
; CHECK-LABEL: @load_before_store_noescape_malloc(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A:%.*]] = call ptr @malloc(i64 8)
; CHECK-NEXT:    store i64 -1, ptr [[A]], align 8
; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 [[I:%.*]]
; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[V]], [[B:%.*]]
; CHECK-NEXT:    [[SPEC_STORE_SELECT:%.*]] = select i1 [[CMP]], i32 [[B]], i32 [[V]]
; CHECK-NEXT:    store i32 [[SPEC_STORE_SELECT]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT:    [[V2:%.*]] = load i64, ptr [[A]], align 8
; CHECK-NEXT:    ret i64 [[V2]]
;
entry:
  %a = call ptr @malloc(i64 8)
  store i64 -1, ptr %a, align 8
  %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 %i
  %v = load i32, ptr %arrayidx, align 4
  %cmp = icmp slt i32 %v, %b
  br i1 %cmp, label %if.then, label %if.end

if.then:
  store i32 %b, ptr %arrayidx, align 4
  br label %if.end

if.end:
  %v2 = load i64, ptr %a, align 8
  ret i64 %v2
}

define i64 @load_before_store_noescape_writable(ptr noalias writable dereferenceable(8) %a, i64 %i, i32 %b)  {
; CHECK-LABEL: @load_before_store_noescape_writable(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    store i64 -1, ptr [[A:%.*]], align 8
; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 1
; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[V]], [[B:%.*]]
; CHECK-NEXT:    [[SPEC_STORE_SELECT:%.*]] = select i1 [[CMP]], i32 [[B]], i32 [[V]]
; CHECK-NEXT:    store i32 [[SPEC_STORE_SELECT]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT:    [[V2:%.*]] = load i64, ptr [[A]], align 8
; CHECK-NEXT:    ret i64 [[V2]]
;
entry:
  store i64 -1, ptr %a, align 8
  %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 1
  %v = load i32, ptr %arrayidx, align 4
  %cmp = icmp slt i32 %v, %b
  br i1 %cmp, label %if.then, label %if.end

if.then:
  store i32 %b, ptr %arrayidx, align 4
  br label %if.end

if.end:
  %v2 = load i64, ptr %a, align 8
  ret i64 %v2
}

define i64 @load_before_store_noescape_writable_missing_noalias(ptr writable dereferenceable(8) %a, i64 %i, i32 %b)  {
; CHECK-LABEL: @load_before_store_noescape_writable_missing_noalias(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    store i64 -1, ptr [[A:%.*]], align 8
; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 1
; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[V]], [[B:%.*]]
; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; CHECK:       if.then:
; CHECK-NEXT:    store i32 [[B]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT:    br label [[IF_END]]
; CHECK:       if.end:
; CHECK-NEXT:    [[V2:%.*]] = load i64, ptr [[A]], align 8
; CHECK-NEXT:    ret i64 [[V2]]
;
entry:
  store i64 -1, ptr %a, align 8
  %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 1
  %v = load i32, ptr %arrayidx, align 4
  %cmp = icmp slt i32 %v, %b
  br i1 %cmp, label %if.then, label %if.end

if.then:
  store i32 %b, ptr %arrayidx, align 4
  br label %if.end

if.end:
  %v2 = load i64, ptr %a, align 8
  ret i64 %v2
}

define i64 @load_before_store_noescape_writable_missing_derefable(ptr noalias writable %a, i64 %i, i32 %b)  {
; CHECK-LABEL: @load_before_store_noescape_writable_missing_derefable(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    store i64 -1, ptr [[A:%.*]], align 8
; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 1
; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[V]], [[B:%.*]]
; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; CHECK:       if.then:
; CHECK-NEXT:    store i32 [[B]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT:    br label [[IF_END]]
; CHECK:       if.end:
; CHECK-NEXT:    [[V2:%.*]] = load i64, ptr [[A]], align 8
; CHECK-NEXT:    ret i64 [[V2]]
;
entry:
  store i64 -1, ptr %a, align 8
  %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 1
  %v = load i32, ptr %arrayidx, align 4
  %cmp = icmp slt i32 %v, %b
  br i1 %cmp, label %if.then, label %if.end

if.then:
  store i32 %b, ptr %arrayidx, align 4
  br label %if.end

if.end:
  %v2 = load i64, ptr %a, align 8
  ret i64 %v2
}

declare void @fork_some_threads(ptr);
declare void @join_some_threads();

; Don't speculate if it's not the only instruction in the block (not counting
; the terminator)
define i32 @not_alone_in_block(i64 %i, i32 %b)  {
; CHECK-LABEL: @not_alone_in_block(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A:%.*]] = alloca [2 x i32], align 8
; CHECK-NEXT:    store i64 4294967296, ptr [[A]], align 8
; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 [[I:%.*]]
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[B:%.*]]
; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; CHECK:       if.then:
; CHECK-NEXT:    store i32 [[B]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT:    store i32 [[B]], ptr [[A]], align 4
; CHECK-NEXT:    br label [[IF_END]]
; CHECK:       if.end:
; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 1
; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
; CHECK-NEXT:    ret i32 [[ADD]]
;
entry:
  %a = alloca [2 x i32], align 8
  store i64 4294967296, ptr %a, align 8
  %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 %i
  %0 = load i32, ptr %arrayidx, align 4
  %cmp = icmp slt i32 %0, %b
  br i1 %cmp, label %if.then, label %if.end

if.then:
  store i32 %b, ptr %arrayidx, align 4
  store i32 %b, ptr %a, align 4
  br label %if.end

if.end:
  %1 = load i32, ptr %a, align 4
  %arrayidx2 = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 1
  %2 = load i32, ptr %arrayidx2, align 4
  %add = add nsw i32 %1, %2
  ret i32 %add
}

define void @wrong_align_store(ptr %A, i32 %B, i32 %C, i32 %D) {
; CHECK-LABEL: @wrong_align_store(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    store i32 [[B:%.*]], ptr [[A:%.*]], align 4
; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 42
; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RET_END:%.*]]
; CHECK:       if.then:
; CHECK-NEXT:    store i32 [[C:%.*]], ptr [[A]], align 8
; CHECK-NEXT:    br label [[RET_END]]
; CHECK:       ret.end:
; CHECK-NEXT:    ret void
;
entry:
  store i32 %B, ptr %A, align 4
  %cmp = icmp sgt i32 %D, 42
  br i1 %cmp, label %if.then, label %ret.end

if.then:
  store i32 %C, ptr %A, align 8
  br label %ret.end

ret.end:
  ret void
}

define void @wrong_align_load(i32 %C, i32 %D) {
; CHECK-LABEL: @wrong_align_load(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 42
; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RET_END:%.*]]
; CHECK:       if.then:
; CHECK-NEXT:    store i32 [[C:%.*]], ptr [[A]], align 8
; CHECK-NEXT:    br label [[RET_END]]
; CHECK:       ret.end:
; CHECK-NEXT:    ret void
;
entry:
  %A = alloca i32, align 4
  load i32, ptr %A, align 4
  %cmp = icmp sgt i32 %D, 42
  br i1 %cmp, label %if.then, label %ret.end

if.then:
  store i32 %C, ptr %A, align 8
  br label %ret.end

ret.end:
  ret void
}

; CHECK: !0 = !{!"branch_weights", i32 3, i32 5}
!0 = !{!"branch_weights", i32 3, i32 5}