llvm/llvm/test/CodeGen/X86/x86-cmov-converter.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -verify-machineinstrs -disable-block-placement < %s | FileCheck -allow-deprecated-dag-overlap %s
; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -x86-cmov-converter-force-all=true -verify-machineinstrs -disable-block-placement < %s | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=CHECK-FORCEALL

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; This test checks that x86-cmov-converter optimization transform CMOV
;; instruction into branches when it is profitable.
;; There are 5 cases below:
;;   1. CmovInCriticalPath:
;;        CMOV depends on the condition and it is in the hot path.
;;        Thus, it worths transforming.
;;
;;   2. CmovNotInCriticalPath:
;;        Similar test like in (1), just that CMOV is not in the hot path.
;;        Thus, it does not worth transforming.
;;
;;   3. MaxIndex:
;;        Maximum calculation algorithm that is looking for the max index,
;;        calculating CMOV value is cheaper than calculating CMOV condition.
;;        Thus, it worths transforming.
;;
;;   4. MaxValue:
;;        Maximum calculation algorithm that is looking for the max value,
;;        calculating CMOV value is not cheaper than calculating CMOV condition.
;;        Thus, it does not worth transforming.
;;
;;   5. BinarySearch:
;;        Usually, binary search CMOV is not predicted.
;;        Thus, it does not worth transforming.
;;
;; Test was created using the following command line:
;; > clang -S -O2 -m64 -fno-vectorize -fno-unroll-loops -emit-llvm foo.c -o -
;; Where foo.c is:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;void CmovInHotPath(int n, int a, int b, int *c, int *d) {
;;  for (int i = 0; i < n; i++) {
;;    int t = c[i] + 1;
;;    if (cptr a > b)
;;      t = 10;
;;    c[i] = (c[i] + 1) * t;
;;  }
;;}
;;
;;
;;void CmovNotInHotPath(int n, int a, int b, int *c, int *d) {
;;  for (int i = 0; i < n; i++) {
;;    int t = c[i];
;;    if (cptr a > b)
;;      t = 10;
;;    c[i] = t;
;;    d[i] /= b;
;;  }
;;}
;;
;;
;;int MaxIndex(int n, int *a) {
;;  int t = 0;
;;  for (int i = 1; i < n; i++) {
;;    if (a[i] > a[t])
;;      t = i;
;;  }
;;  return t;
;;}
;;
;;
;;int MaxValue(int n, int *a) {
;;  int t = a[0];
;;  for (int i = 1; i < n; i++) {
;;    if (a[i] > t)
;;      t = a[i];
;;  }
;;  return t;
;;}
;;
;;typedef struct Node Node;
;;struct Node {
;;  unsigned Val;
;;  Node *Right;
;;  Node *Left;
;;};
;;
;;unsigned BinarySearch(unsigned Mask, Node *Curr, Node *Next) {
;;  while (Curr->Val > Next->Val) {
;;    Curr = Next;
;;    if (Mask & (0x1 << Curr->Val))
;;      Next = Curr->Right;
;;    else
;;      Next = Curr->Left;
;;  }
;;  return Curr->Val;
;;}
;;
;;
;;void SmallGainPerLoop(int n, int a, int b, int *c, int *d) {
;;  for (int i = 0; i < n; i++) {
;;    int t = c[i];
;;    if (cptr a > b)
;;      t = 10;
;;    c[i] = t;
;;  }
;;}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

%struct.Node = type { i32, ptr, ptr }

define void @CmovInHotPath(i32 %n, i32 %a, i32 %b, ptr nocapture %c, ptr nocapture readnone %d) #0 {
; CHECK-LABEL: CmovInHotPath:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    testl %edi, %edi
; CHECK-NEXT:    jle .LBB0_5
; CHECK-NEXT:  # %bb.1: # %for.body.preheader
; CHECK-NEXT:    movl %edi, %eax
; CHECK-NEXT:    xorl %edi, %edi
; CHECK-NEXT:  .LBB0_2: # %for.body
; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    movl (%rcx,%rdi,4), %r10d
; CHECK-NEXT:    leal 1(%r10), %r8d
; CHECK-NEXT:    imull %esi, %r10d
; CHECK-NEXT:    movl $10, %r9d
; CHECK-NEXT:    cmpl %edx, %r10d
; CHECK-NEXT:    jg .LBB0_4
; CHECK-NEXT:  # %bb.3: # %for.body
; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT:    movl %r8d, %r9d
; CHECK-NEXT:  .LBB0_4: # %for.body
; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT:    imull %r8d, %r9d
; CHECK-NEXT:    movl %r9d, (%rcx,%rdi,4)
; CHECK-NEXT:    addq $1, %rdi
; CHECK-NEXT:    cmpq %rdi, %rax
; CHECK-NEXT:    jne .LBB0_2
; CHECK-NEXT:  .LBB0_5: # %for.cond.cleanup
; CHECK-NEXT:    retq
;
; CHECK-FORCEALL-LABEL: CmovInHotPath:
; CHECK-FORCEALL:       # %bb.0: # %entry
; CHECK-FORCEALL-NEXT:    testl %edi, %edi
; CHECK-FORCEALL-NEXT:    jle .LBB0_5
; CHECK-FORCEALL-NEXT:  # %bb.1: # %for.body.preheader
; CHECK-FORCEALL-NEXT:    movl %edi, %eax
; CHECK-FORCEALL-NEXT:    xorl %edi, %edi
; CHECK-FORCEALL-NEXT:  .LBB0_2: # %for.body
; CHECK-FORCEALL-NEXT:    # =>This Inner Loop Header: Depth=1
; CHECK-FORCEALL-NEXT:    movl (%rcx,%rdi,4), %r10d
; CHECK-FORCEALL-NEXT:    leal 1(%r10), %r8d
; CHECK-FORCEALL-NEXT:    imull %esi, %r10d
; CHECK-FORCEALL-NEXT:    movl $10, %r9d
; CHECK-FORCEALL-NEXT:    cmpl %edx, %r10d
; CHECK-FORCEALL-NEXT:    jg .LBB0_4
; CHECK-FORCEALL-NEXT:  # %bb.3: # %for.body
; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB0_2 Depth=1
; CHECK-FORCEALL-NEXT:    movl %r8d, %r9d
; CHECK-FORCEALL-NEXT:  .LBB0_4: # %for.body
; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB0_2 Depth=1
; CHECK-FORCEALL-NEXT:    imull %r8d, %r9d
; CHECK-FORCEALL-NEXT:    movl %r9d, (%rcx,%rdi,4)
; CHECK-FORCEALL-NEXT:    addq $1, %rdi
; CHECK-FORCEALL-NEXT:    cmpq %rdi, %rax
; CHECK-FORCEALL-NEXT:    jne .LBB0_2
; CHECK-FORCEALL-NEXT:  .LBB0_5: # %for.cond.cleanup
; CHECK-FORCEALL-NEXT:    retq
entry:
  %cmp14 = icmp sgt i32 %n, 0
  br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader:                               ; preds = %entry
  %wide.trip.count = zext i32 %n to i64
  br label %for.body

for.cond.cleanup:                                 ; preds = %for.body, %entry
  ret void

for.body:                                         ; preds = %for.body.preheader, %for.body
  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
  %arrayidx = getelementptr inbounds i32, ptr %c, i64 %indvars.iv
  %0 = load i32, ptr %arrayidx, align 4
  %add = add nsw i32 %0, 1
  %mul = mul nsw i32 %0, %a
  %cmp3 = icmp sgt i32 %mul, %b
  %. = select i1 %cmp3, i32 10, i32 %add
  %mul7 = mul nsw i32 %., %add
  store i32 %mul7, ptr %arrayidx, align 4
  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
  br i1 %exitcond, label %for.cond.cleanup, label %for.body
}

define void @CmovNotInHotPath(i32 %n, i32 %a, i32 %b, ptr nocapture %c, ptr nocapture %d) #0 {
; CHECK-LABEL: CmovNotInHotPath:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    testl %edi, %edi
; CHECK-NEXT:    jle .LBB1_3
; CHECK-NEXT:  # %bb.1: # %for.body.preheader
; CHECK-NEXT:    movl %edx, %r9d
; CHECK-NEXT:    movl %edi, %edi
; CHECK-NEXT:    xorl %r10d, %r10d
; CHECK-NEXT:    movl $10, %r11d
; CHECK-NEXT:  .LBB1_2: # %for.body
; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    movl (%rcx,%r10,4), %eax
; CHECK-NEXT:    movl %eax, %edx
; CHECK-NEXT:    imull %esi, %edx
; CHECK-NEXT:    cmpl %r9d, %edx
; CHECK-NEXT:    cmovgl %r11d, %eax
; CHECK-NEXT:    movl %eax, (%rcx,%r10,4)
; CHECK-NEXT:    movl (%r8,%r10,4), %eax
; CHECK-NEXT:    cltd
; CHECK-NEXT:    idivl %r9d
; CHECK-NEXT:    movl %eax, (%r8,%r10,4)
; CHECK-NEXT:    addq $1, %r10
; CHECK-NEXT:    cmpq %r10, %rdi
; CHECK-NEXT:    jne .LBB1_2
; CHECK-NEXT:  .LBB1_3: # %for.cond.cleanup
; CHECK-NEXT:    retq
;
; CHECK-FORCEALL-LABEL: CmovNotInHotPath:
; CHECK-FORCEALL:       # %bb.0: # %entry
; CHECK-FORCEALL-NEXT:    testl %edi, %edi
; CHECK-FORCEALL-NEXT:    jle .LBB1_5
; CHECK-FORCEALL-NEXT:  # %bb.1: # %for.body.preheader
; CHECK-FORCEALL-NEXT:    movl %edx, %r9d
; CHECK-FORCEALL-NEXT:    movl %edi, %edi
; CHECK-FORCEALL-NEXT:    xorl %r10d, %r10d
; CHECK-FORCEALL-NEXT:  .LBB1_2: # %for.body
; CHECK-FORCEALL-NEXT:    # =>This Inner Loop Header: Depth=1
; CHECK-FORCEALL-NEXT:    movl (%rcx,%r10,4), %eax
; CHECK-FORCEALL-NEXT:    movl %eax, %r11d
; CHECK-FORCEALL-NEXT:    imull %esi, %r11d
; CHECK-FORCEALL-NEXT:    movl $10, %edx
; CHECK-FORCEALL-NEXT:    cmpl %r9d, %r11d
; CHECK-FORCEALL-NEXT:    jg .LBB1_4
; CHECK-FORCEALL-NEXT:  # %bb.3: # %for.body
; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB1_2 Depth=1
; CHECK-FORCEALL-NEXT:    movl %eax, %edx
; CHECK-FORCEALL-NEXT:  .LBB1_4: # %for.body
; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB1_2 Depth=1
; CHECK-FORCEALL-NEXT:    movl %edx, (%rcx,%r10,4)
; CHECK-FORCEALL-NEXT:    movl (%r8,%r10,4), %eax
; CHECK-FORCEALL-NEXT:    cltd
; CHECK-FORCEALL-NEXT:    idivl %r9d
; CHECK-FORCEALL-NEXT:    movl %eax, (%r8,%r10,4)
; CHECK-FORCEALL-NEXT:    addq $1, %r10
; CHECK-FORCEALL-NEXT:    cmpq %r10, %rdi
; CHECK-FORCEALL-NEXT:    jne .LBB1_2
; CHECK-FORCEALL-NEXT:  .LBB1_5: # %for.cond.cleanup
; CHECK-FORCEALL-NEXT:    retq
entry:
  %cmp18 = icmp sgt i32 %n, 0
  br i1 %cmp18, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader:                               ; preds = %entry
  %wide.trip.count = zext i32 %n to i64
  br label %for.body

for.cond.cleanup:                                 ; preds = %for.body, %entry
  ret void

for.body:                                         ; preds = %for.body.preheader, %for.body
  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
  %arrayidx = getelementptr inbounds i32, ptr %c, i64 %indvars.iv
  %0 = load i32, ptr %arrayidx, align 4
  %mul = mul nsw i32 %0, %a
  %cmp3 = icmp sgt i32 %mul, %b
  %. = select i1 %cmp3, i32 10, i32 %0
  store i32 %., ptr %arrayidx, align 4
  %arrayidx7 = getelementptr inbounds i32, ptr %d, i64 %indvars.iv
  %1 = load i32, ptr %arrayidx7, align 4
  %div = sdiv i32 %1, %b
  store i32 %div, ptr %arrayidx7, align 4
  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
  br i1 %exitcond, label %for.cond.cleanup, label %for.body
}

define i32 @MaxIndex(i32 %n, ptr nocapture readonly %a) #0 {
; CHECK-LABEL: MaxIndex:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    xorl %eax, %eax
; CHECK-NEXT:    cmpl $2, %edi
; CHECK-NEXT:    jl .LBB2_5
; CHECK-NEXT:  # %bb.1: # %for.body.preheader
; CHECK-NEXT:    movl %edi, %ecx
; CHECK-NEXT:    xorl %edi, %edi
; CHECK-NEXT:    movl $1, %edx
; CHECK-NEXT:  .LBB2_2: # %for.body
; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    movl (%rsi,%rdx,4), %r8d
; CHECK-NEXT:    movslq %edi, %r9
; CHECK-NEXT:    movl %edx, %eax
; CHECK-NEXT:    cmpl (%rsi,%r9,4), %r8d
; CHECK-NEXT:    jg .LBB2_4
; CHECK-NEXT:  # %bb.3: # %for.body
; CHECK-NEXT:    # in Loop: Header=BB2_2 Depth=1
; CHECK-NEXT:    movl %edi, %eax
; CHECK-NEXT:  .LBB2_4: # %for.body
; CHECK-NEXT:    # in Loop: Header=BB2_2 Depth=1
; CHECK-NEXT:    addq $1, %rdx
; CHECK-NEXT:    movl %eax, %edi
; CHECK-NEXT:    cmpq %rdx, %rcx
; CHECK-NEXT:    jne .LBB2_2
; CHECK-NEXT:  .LBB2_5: # %for.cond.cleanup
; CHECK-NEXT:    retq
;
; CHECK-FORCEALL-LABEL: MaxIndex:
; CHECK-FORCEALL:       # %bb.0: # %entry
; CHECK-FORCEALL-NEXT:    xorl %eax, %eax
; CHECK-FORCEALL-NEXT:    cmpl $2, %edi
; CHECK-FORCEALL-NEXT:    jl .LBB2_5
; CHECK-FORCEALL-NEXT:  # %bb.1: # %for.body.preheader
; CHECK-FORCEALL-NEXT:    movl %edi, %ecx
; CHECK-FORCEALL-NEXT:    xorl %edi, %edi
; CHECK-FORCEALL-NEXT:    movl $1, %edx
; CHECK-FORCEALL-NEXT:  .LBB2_2: # %for.body
; CHECK-FORCEALL-NEXT:    # =>This Inner Loop Header: Depth=1
; CHECK-FORCEALL-NEXT:    movl (%rsi,%rdx,4), %r8d
; CHECK-FORCEALL-NEXT:    movslq %edi, %r9
; CHECK-FORCEALL-NEXT:    movl %edx, %eax
; CHECK-FORCEALL-NEXT:    cmpl (%rsi,%r9,4), %r8d
; CHECK-FORCEALL-NEXT:    jg .LBB2_4
; CHECK-FORCEALL-NEXT:  # %bb.3: # %for.body
; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB2_2 Depth=1
; CHECK-FORCEALL-NEXT:    movl %edi, %eax
; CHECK-FORCEALL-NEXT:  .LBB2_4: # %for.body
; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB2_2 Depth=1
; CHECK-FORCEALL-NEXT:    addq $1, %rdx
; CHECK-FORCEALL-NEXT:    movl %eax, %edi
; CHECK-FORCEALL-NEXT:    cmpq %rdx, %rcx
; CHECK-FORCEALL-NEXT:    jne .LBB2_2
; CHECK-FORCEALL-NEXT:  .LBB2_5: # %for.cond.cleanup
; CHECK-FORCEALL-NEXT:    retq
entry:
  %cmp14 = icmp sgt i32 %n, 1
  br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader:                               ; preds = %entry
  %wide.trip.count = zext i32 %n to i64
  br label %for.body

for.cond.cleanup:                                 ; preds = %for.body, %entry
  %t.0.lcssa = phi i32 [ 0, %entry ], [ %i.0.t.0, %for.body ]
  ret i32 %t.0.lcssa

for.body:                                         ; preds = %for.body.preheader, %for.body
  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ]
  %t.015 = phi i32 [ %i.0.t.0, %for.body ], [ 0, %for.body.preheader ]
  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
  %0 = load i32, ptr %arrayidx, align 4
  %idxprom1 = sext i32 %t.015 to i64
  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %idxprom1
  %1 = load i32, ptr %arrayidx2, align 4
  %cmp3 = icmp sgt i32 %0, %1
  %2 = trunc i64 %indvars.iv to i32
  %i.0.t.0 = select i1 %cmp3, i32 %2, i32 %t.015
  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
  br i1 %exitcond, label %for.cond.cleanup, label %for.body
}

; If cmov instruction is marked as unpredictable, do not convert it to branch.
define i32 @MaxIndex_unpredictable(i32 %n, ptr nocapture readonly %a) #0 {
; CHECK-LABEL: MaxIndex_unpredictable:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    xorl %eax, %eax
; CHECK-NEXT:    cmpl $2, %edi
; CHECK-NEXT:    jl .LBB3_3
; CHECK-NEXT:  # %bb.1: # %for.body.preheader
; CHECK-NEXT:    movl %edi, %ecx
; CHECK-NEXT:    xorl %eax, %eax
; CHECK-NEXT:    movl $1, %edx
; CHECK-NEXT:  .LBB3_2: # %for.body
; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    movl (%rsi,%rdx,4), %edi
; CHECK-NEXT:    cltq
; CHECK-NEXT:    cmpl (%rsi,%rax,4), %edi
; CHECK-NEXT:    cmovgl %edx, %eax
; CHECK-NEXT:    addq $1, %rdx
; CHECK-NEXT:    cmpq %rdx, %rcx
; CHECK-NEXT:    jne .LBB3_2
; CHECK-NEXT:  .LBB3_3: # %for.cond.cleanup
; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
; CHECK-NEXT:    retq
;
; CHECK-FORCEALL-LABEL: MaxIndex_unpredictable:
; CHECK-FORCEALL:       # %bb.0: # %entry
; CHECK-FORCEALL-NEXT:    xorl %eax, %eax
; CHECK-FORCEALL-NEXT:    cmpl $2, %edi
; CHECK-FORCEALL-NEXT:    jl .LBB3_3
; CHECK-FORCEALL-NEXT:  # %bb.1: # %for.body.preheader
; CHECK-FORCEALL-NEXT:    movl %edi, %ecx
; CHECK-FORCEALL-NEXT:    xorl %eax, %eax
; CHECK-FORCEALL-NEXT:    movl $1, %edx
; CHECK-FORCEALL-NEXT:  .LBB3_2: # %for.body
; CHECK-FORCEALL-NEXT:    # =>This Inner Loop Header: Depth=1
; CHECK-FORCEALL-NEXT:    movl (%rsi,%rdx,4), %edi
; CHECK-FORCEALL-NEXT:    cltq
; CHECK-FORCEALL-NEXT:    cmpl (%rsi,%rax,4), %edi
; CHECK-FORCEALL-NEXT:    cmovgl %edx, %eax
; CHECK-FORCEALL-NEXT:    addq $1, %rdx
; CHECK-FORCEALL-NEXT:    cmpq %rdx, %rcx
; CHECK-FORCEALL-NEXT:    jne .LBB3_2
; CHECK-FORCEALL-NEXT:  .LBB3_3: # %for.cond.cleanup
; CHECK-FORCEALL-NEXT:    # kill: def $eax killed $eax killed $rax
; CHECK-FORCEALL-NEXT:    retq
entry:
  %cmp14 = icmp sgt i32 %n, 1
  br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader:                               ; preds = %entry
  %wide.trip.count = zext i32 %n to i64
  br label %for.body

for.cond.cleanup:                                 ; preds = %for.body, %entry
  %t.0.lcssa = phi i32 [ 0, %entry ], [ %i.0.t.0, %for.body ]
  ret i32 %t.0.lcssa

for.body:                                         ; preds = %for.body.preheader, %for.body
  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ]
  %t.015 = phi i32 [ %i.0.t.0, %for.body ], [ 0, %for.body.preheader ]
  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
  %0 = load i32, ptr %arrayidx, align 4
  %idxprom1 = sext i32 %t.015 to i64
  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %idxprom1
  %1 = load i32, ptr %arrayidx2, align 4
  %cmp3 = icmp sgt i32 %0, %1
  %2 = trunc i64 %indvars.iv to i32
  %i.0.t.0 = select i1 %cmp3, i32 %2, i32 %t.015, !unpredictable !0
  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
  br i1 %exitcond, label %for.cond.cleanup, label %for.body
}

define i32 @MaxValue(i32 %n, ptr nocapture readonly %a) #0 {
; CHECK-LABEL: MaxValue:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    movl (%rsi), %eax
; CHECK-NEXT:    cmpl $2, %edi
; CHECK-NEXT:    jl .LBB4_3
; CHECK-NEXT:  # %bb.1: # %for.body.preheader
; CHECK-NEXT:    movl %edi, %ecx
; CHECK-NEXT:    movl $1, %edx
; CHECK-NEXT:  .LBB4_2: # %for.body
; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    movl (%rsi,%rdx,4), %edi
; CHECK-NEXT:    cmpl %eax, %edi
; CHECK-NEXT:    cmovgl %edi, %eax
; CHECK-NEXT:    addq $1, %rdx
; CHECK-NEXT:    cmpq %rdx, %rcx
; CHECK-NEXT:    jne .LBB4_2
; CHECK-NEXT:  .LBB4_3: # %for.cond.cleanup
; CHECK-NEXT:    retq
;
; CHECK-FORCEALL-LABEL: MaxValue:
; CHECK-FORCEALL:       # %bb.0: # %entry
; CHECK-FORCEALL-NEXT:    movl (%rsi), %r8d
; CHECK-FORCEALL-NEXT:    cmpl $2, %edi
; CHECK-FORCEALL-NEXT:    jge .LBB4_3
; CHECK-FORCEALL-NEXT:  # %bb.1:
; CHECK-FORCEALL-NEXT:    movl %r8d, %eax
; CHECK-FORCEALL-NEXT:  .LBB4_2: # %for.cond.cleanup
; CHECK-FORCEALL-NEXT:    retq
; CHECK-FORCEALL-NEXT:  .LBB4_3: # %for.body.preheader
; CHECK-FORCEALL-NEXT:    movl %edi, %ecx
; CHECK-FORCEALL-NEXT:    movl $1, %edx
; CHECK-FORCEALL-NEXT:  .LBB4_4: # %for.body
; CHECK-FORCEALL-NEXT:    # =>This Inner Loop Header: Depth=1
; CHECK-FORCEALL-NEXT:    movl (%rsi,%rdx,4), %eax
; CHECK-FORCEALL-NEXT:    cmpl %r8d, %eax
; CHECK-FORCEALL-NEXT:    jg .LBB4_6
; CHECK-FORCEALL-NEXT:  # %bb.5: # %for.body
; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB4_4 Depth=1
; CHECK-FORCEALL-NEXT:    movl %r8d, %eax
; CHECK-FORCEALL-NEXT:  .LBB4_6: # %for.body
; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB4_4 Depth=1
; CHECK-FORCEALL-NEXT:    addq $1, %rdx
; CHECK-FORCEALL-NEXT:    movl %eax, %r8d
; CHECK-FORCEALL-NEXT:    cmpq %rdx, %rcx
; CHECK-FORCEALL-NEXT:    je .LBB4_2
; CHECK-FORCEALL-NEXT:    jmp .LBB4_4
entry:
  %0 = load i32, ptr %a, align 4
  %cmp13 = icmp sgt i32 %n, 1
  br i1 %cmp13, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader:                               ; preds = %entry
  %wide.trip.count = zext i32 %n to i64
  br label %for.body

for.cond.cleanup:                                 ; preds = %for.body, %entry
  %t.0.lcssa = phi i32 [ %0, %entry ], [ %.t.0, %for.body ]
  ret i32 %t.0.lcssa

for.body:                                         ; preds = %for.body.preheader, %for.body
  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ]
  %t.014 = phi i32 [ %.t.0, %for.body ], [ %0, %for.body.preheader ]
  %arrayidx1 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
  %1 = load i32, ptr %arrayidx1, align 4
  %cmp2 = icmp sgt i32 %1, %t.014
  %.t.0 = select i1 %cmp2, i32 %1, i32 %t.014
  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
  br i1 %exitcond, label %for.cond.cleanup, label %for.body
}

define i32 @BinarySearch(i32 %Mask, ptr nocapture readonly %Curr, ptr nocapture readonly %Next) #0 {
; CHECK-LABEL: BinarySearch:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    movl (%rsi), %eax
; CHECK-NEXT:    jmp .LBB5_2
; CHECK-NEXT:  .LBB5_1: # %while.body
; CHECK-NEXT:    # in Loop: Header=BB5_2 Depth=1
; CHECK-NEXT:    movl %ecx, %eax
; CHECK-NEXT:    xorl %ecx, %ecx
; CHECK-NEXT:    btl %eax, %edi
; CHECK-NEXT:    setae %cl
; CHECK-NEXT:    movq 8(%rdx,%rcx,8), %rdx
; CHECK-NEXT:  .LBB5_2: # %while.body
; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    movl (%rdx), %ecx
; CHECK-NEXT:    cmpl %ecx, %eax
; CHECK-NEXT:    ja .LBB5_1
; CHECK-NEXT:  # %bb.3: # %while.end
; CHECK-NEXT:    retq
;
; CHECK-FORCEALL-LABEL: BinarySearch:
; CHECK-FORCEALL:       # %bb.0: # %entry
; CHECK-FORCEALL-NEXT:    movl (%rsi), %eax
; CHECK-FORCEALL-NEXT:    jmp .LBB5_2
; CHECK-FORCEALL-NEXT:  .LBB5_1: # %while.body
; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB5_2 Depth=1
; CHECK-FORCEALL-NEXT:    movl %ecx, %eax
; CHECK-FORCEALL-NEXT:    xorl %ecx, %ecx
; CHECK-FORCEALL-NEXT:    btl %eax, %edi
; CHECK-FORCEALL-NEXT:    setae %cl
; CHECK-FORCEALL-NEXT:    movq 8(%rdx,%rcx,8), %rdx
; CHECK-FORCEALL-NEXT:  .LBB5_2: # %while.body
; CHECK-FORCEALL-NEXT:    # =>This Inner Loop Header: Depth=1
; CHECK-FORCEALL-NEXT:    movl (%rdx), %ecx
; CHECK-FORCEALL-NEXT:    cmpl %ecx, %eax
; CHECK-FORCEALL-NEXT:    ja .LBB5_1
; CHECK-FORCEALL-NEXT:  # %bb.3: # %while.end
; CHECK-FORCEALL-NEXT:    retq
entry:
  %0 = load i32, ptr %Curr, align 8
  %1 = load i32, ptr %Next, align 8
  %cmp10 = icmp ugt i32 %0, %1
  br i1 %cmp10, label %while.body, label %while.end

while.body:                                       ; preds = %entry, %while.body
  %2 = phi i32 [ %4, %while.body ], [ %1, %entry ]
  %Next.addr.011 = phi ptr [ %3, %while.body ], [ %Next, %entry ]
  %shl = shl i32 1, %2
  %and = and i32 %shl, %Mask
  %tobool = icmp eq i32 %and, 0
  %Left = getelementptr inbounds %struct.Node, ptr %Next.addr.011, i64 0, i32 2
  %Right = getelementptr inbounds %struct.Node, ptr %Next.addr.011, i64 0, i32 1
  %Left.sink = select i1 %tobool, ptr %Left, ptr %Right
  %3 = load ptr, ptr %Left.sink, align 8
  %4 = load i32, ptr %3, align 8
  %cmp = icmp ugt i32 %2, %4
  br i1 %cmp, label %while.body, label %while.end

while.end:                                        ; preds = %while.body, %entry
  %.lcssa = phi i32 [ %0, %entry ], [ %2, %while.body ]
  ret i32 %.lcssa
}

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The following test checks that x86-cmov-converter optimization transforms
;; CMOV instructions into branch correctly.
;;
;; MBB:
;;   cond = cmp ...
;;   v1 = CMOVgt t1, f1, cond
;;   v2 = CMOVle s1, f2, cond
;;
;; Where: t1 = 11, f1 = 22, f2 = a
;;
;; After CMOV transformation
;; -------------------------
;; MBB:
;;   cond = cmp ...
;;   ja %SinkMBB
;;
;; FalseMBB:
;;   jmp %SinkMBB
;;
;; SinkMBB:
;;   %v1 = phi[%f1, %FalseMBB], [%t1, %MBB]
;;   %v2 = phi[%f1, %FalseMBB], [%f2, %MBB] ; For CMOV with OppCC switch
;;                                          ; true-value with false-value
;;                                          ; Phi instruction cannot use
;;                                          ; previous Phi instruction result
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

define void @Transform(ptr%arr, ptr%arr2, i32 %a, i32 %b, i32 %c, i32 %n) #0 {
; CHECK-LABEL: Transform:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    movb $1, %al
; CHECK-NEXT:    testb %al, %al
; CHECK-NEXT:    jne .LBB6_5
; CHECK-NEXT:  # %bb.1: # %while.body.preheader
; CHECK-NEXT:    movl %edx, %ecx
; CHECK-NEXT:    xorl %esi, %esi
; CHECK-NEXT:  .LBB6_2: # %while.body
; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    movslq %esi, %rsi
; CHECK-NEXT:    movl (%rdi,%rsi,4), %eax
; CHECK-NEXT:    xorl %edx, %edx
; CHECK-NEXT:    divl %ecx
; CHECK-NEXT:    movl %eax, %edx
; CHECK-NEXT:    movl $11, %eax
; CHECK-NEXT:    movl %ecx, %r8d
; CHECK-NEXT:    cmpl %ecx, %edx
; CHECK-NEXT:    ja .LBB6_4
; CHECK-NEXT:  # %bb.3: # %while.body
; CHECK-NEXT:    # in Loop: Header=BB6_2 Depth=1
; CHECK-NEXT:    movl $22, %eax
; CHECK-NEXT:    movl $22, %r8d
; CHECK-NEXT:  .LBB6_4: # %while.body
; CHECK-NEXT:    # in Loop: Header=BB6_2 Depth=1
; CHECK-NEXT:    xorl %edx, %edx
; CHECK-NEXT:    divl %r8d
; CHECK-NEXT:    movl %edx, (%rdi,%rsi,4)
; CHECK-NEXT:    addl $1, %esi
; CHECK-NEXT:    cmpl %r9d, %esi
; CHECK-NEXT:    ja .LBB6_2
; CHECK-NEXT:  .LBB6_5: # %while.end
; CHECK-NEXT:    retq
;
; CHECK-FORCEALL-LABEL: Transform:
; CHECK-FORCEALL:       # %bb.0: # %entry
; CHECK-FORCEALL-NEXT:    movb $1, %al
; CHECK-FORCEALL-NEXT:    testb %al, %al
; CHECK-FORCEALL-NEXT:    jne .LBB6_5
; CHECK-FORCEALL-NEXT:  # %bb.1: # %while.body.preheader
; CHECK-FORCEALL-NEXT:    movl %edx, %ecx
; CHECK-FORCEALL-NEXT:    xorl %esi, %esi
; CHECK-FORCEALL-NEXT:  .LBB6_2: # %while.body
; CHECK-FORCEALL-NEXT:    # =>This Inner Loop Header: Depth=1
; CHECK-FORCEALL-NEXT:    movslq %esi, %rsi
; CHECK-FORCEALL-NEXT:    movl (%rdi,%rsi,4), %eax
; CHECK-FORCEALL-NEXT:    xorl %edx, %edx
; CHECK-FORCEALL-NEXT:    divl %ecx
; CHECK-FORCEALL-NEXT:    movl %eax, %edx
; CHECK-FORCEALL-NEXT:    movl $11, %eax
; CHECK-FORCEALL-NEXT:    movl %ecx, %r8d
; CHECK-FORCEALL-NEXT:    cmpl %ecx, %edx
; CHECK-FORCEALL-NEXT:    ja .LBB6_4
; CHECK-FORCEALL-NEXT:  # %bb.3: # %while.body
; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB6_2 Depth=1
; CHECK-FORCEALL-NEXT:    movl $22, %eax
; CHECK-FORCEALL-NEXT:    movl $22, %r8d
; CHECK-FORCEALL-NEXT:  .LBB6_4: # %while.body
; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB6_2 Depth=1
; CHECK-FORCEALL-NEXT:    xorl %edx, %edx
; CHECK-FORCEALL-NEXT:    divl %r8d
; CHECK-FORCEALL-NEXT:    movl %edx, (%rdi,%rsi,4)
; CHECK-FORCEALL-NEXT:    addl $1, %esi
; CHECK-FORCEALL-NEXT:    cmpl %r9d, %esi
; CHECK-FORCEALL-NEXT:    ja .LBB6_2
; CHECK-FORCEALL-NEXT:  .LBB6_5: # %while.end
; CHECK-FORCEALL-NEXT:    retq
entry:
  %cmp10 = icmp ugt i32 0, %n
  br i1 %cmp10, label %while.body, label %while.end

while.body:                                       ; preds = %entry, %while.body
  %i = phi i32 [ %i_inc, %while.body ], [ 0, %entry ]
  %arr_i = getelementptr inbounds i32, ptr %arr, i32 %i
  %x = load i32, ptr %arr_i, align 4
  %div = udiv i32 %x, %a
  %cond = icmp ugt i32 %div, %a
  %condOpp = icmp ule i32 %div, %a
  %s1 = select i1 %cond, i32 11, i32 22
  %s2 = select i1 %condOpp, i32 %s1, i32 %a
  %sum = urem i32 %s1, %s2
  store i32 %sum, ptr %arr_i, align 4
  %i_inc = add i32 %i, 1
  %cmp = icmp ugt i32 %i_inc, %n
  br i1 %cmp, label %while.body, label %while.end

while.end:                                        ; preds = %while.body, %entry
  ret void
}

; Test that we always will convert a cmov with a memory operand into a branch,
; even outside of a loop.
define i32 @test_cmov_memoperand(i32 %a, i32 %b, i32 %x, ptr %y) #0 {
; CHECK-LABEL: test_cmov_memoperand:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    movl %edx, %eax
; CHECK-NEXT:    cmpl %esi, %edi
; CHECK-NEXT:    ja .LBB7_2
; CHECK-NEXT:  # %bb.1: # %entry
; CHECK-NEXT:    movl (%rcx), %eax
; CHECK-NEXT:  .LBB7_2: # %entry
; CHECK-NEXT:    retq
;
; CHECK-FORCEALL-LABEL: test_cmov_memoperand:
; CHECK-FORCEALL:       # %bb.0: # %entry
; CHECK-FORCEALL-NEXT:    movl %edx, %eax
; CHECK-FORCEALL-NEXT:    cmpl %esi, %edi
; CHECK-FORCEALL-NEXT:    ja .LBB7_2
; CHECK-FORCEALL-NEXT:  # %bb.1: # %entry
; CHECK-FORCEALL-NEXT:    movl (%rcx), %eax
; CHECK-FORCEALL-NEXT:  .LBB7_2: # %entry
; CHECK-FORCEALL-NEXT:    retq
entry:
  %cond = icmp ugt i32 %a, %b
  %load = load i32, ptr %y
  %z = select i1 %cond, i32 %x, i32 %load
  ret i32 %z
}

; If cmov instruction is marked as unpredictable, do not convert it to branch.
define i32 @test_cmov_memoperand_unpredictable(i32 %a, i32 %b, i32 %x, ptr %y) #0 {
; CHECK-LABEL: test_cmov_memoperand_unpredictable:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    movl %edx, %eax
; CHECK-NEXT:    cmpl %esi, %edi
; CHECK-NEXT:    cmovbel (%rcx), %eax
; CHECK-NEXT:    retq
;
; CHECK-FORCEALL-LABEL: test_cmov_memoperand_unpredictable:
; CHECK-FORCEALL:       # %bb.0: # %entry
; CHECK-FORCEALL-NEXT:    movl %edx, %eax
; CHECK-FORCEALL-NEXT:    cmpl %esi, %edi
; CHECK-FORCEALL-NEXT:    cmovbel (%rcx), %eax
; CHECK-FORCEALL-NEXT:    retq
entry:
  %cond = icmp ugt i32 %a, %b
  %load = load i32, ptr %y
  %z = select i1 %cond, i32 %x, i32 %load, !unpredictable !0
  ret i32 %z
}

; Test that we can convert a group of cmovs where only one has a memory
; operand.
define i32 @test_cmov_memoperand_in_group(i32 %a, i32 %b, i32 %x, ptr %y.ptr) #0 {
; CHECK-LABEL: test_cmov_memoperand_in_group:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    movl %edx, %eax
; CHECK-NEXT:    movl %edx, %r8d
; CHECK-NEXT:    cmpl %esi, %edi
; CHECK-NEXT:    ja .LBB9_2
; CHECK-NEXT:  # %bb.1: # %entry
; CHECK-NEXT:    movl (%rcx), %edx
; CHECK-NEXT:    movl %edi, %eax
; CHECK-NEXT:    movl %esi, %r8d
; CHECK-NEXT:  .LBB9_2: # %entry
; CHECK-NEXT:    addl %r8d, %eax
; CHECK-NEXT:    addl %edx, %eax
; CHECK-NEXT:    retq
;
; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group:
; CHECK-FORCEALL:       # %bb.0: # %entry
; CHECK-FORCEALL-NEXT:    movl %edx, %eax
; CHECK-FORCEALL-NEXT:    movl %edx, %r8d
; CHECK-FORCEALL-NEXT:    cmpl %esi, %edi
; CHECK-FORCEALL-NEXT:    ja .LBB9_2
; CHECK-FORCEALL-NEXT:  # %bb.1: # %entry
; CHECK-FORCEALL-NEXT:    movl (%rcx), %edx
; CHECK-FORCEALL-NEXT:    movl %edi, %eax
; CHECK-FORCEALL-NEXT:    movl %esi, %r8d
; CHECK-FORCEALL-NEXT:  .LBB9_2: # %entry
; CHECK-FORCEALL-NEXT:    addl %r8d, %eax
; CHECK-FORCEALL-NEXT:    addl %edx, %eax
; CHECK-FORCEALL-NEXT:    retq
entry:
  %cond = icmp ugt i32 %a, %b
  %y = load i32, ptr %y.ptr
  %z1 = select i1 %cond, i32 %x, i32 %a
  %z2 = select i1 %cond, i32 %x, i32 %y
  %z3 = select i1 %cond, i32 %x, i32 %b
  %s1 = add i32 %z1, %z2
  %s2 = add i32 %s1, %z3
  ret i32 %s2
}

; Same as before but with operands reversed in the select with a load.
define i32 @test_cmov_memoperand_in_group2(i32 %a, i32 %b, i32 %x, ptr %y.ptr) #0 {
; CHECK-LABEL: test_cmov_memoperand_in_group2:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    movl %edx, %eax
; CHECK-NEXT:    movl %edx, %r8d
; CHECK-NEXT:    cmpl %esi, %edi
; CHECK-NEXT:    jbe .LBB10_2
; CHECK-NEXT:  # %bb.1: # %entry
; CHECK-NEXT:    movl (%rcx), %edx
; CHECK-NEXT:    movl %edi, %eax
; CHECK-NEXT:    movl %esi, %r8d
; CHECK-NEXT:  .LBB10_2: # %entry
; CHECK-NEXT:    addl %r8d, %eax
; CHECK-NEXT:    addl %edx, %eax
; CHECK-NEXT:    retq
;
; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group2:
; CHECK-FORCEALL:       # %bb.0: # %entry
; CHECK-FORCEALL-NEXT:    movl %edx, %eax
; CHECK-FORCEALL-NEXT:    movl %edx, %r8d
; CHECK-FORCEALL-NEXT:    cmpl %esi, %edi
; CHECK-FORCEALL-NEXT:    jbe .LBB10_2
; CHECK-FORCEALL-NEXT:  # %bb.1: # %entry
; CHECK-FORCEALL-NEXT:    movl (%rcx), %edx
; CHECK-FORCEALL-NEXT:    movl %edi, %eax
; CHECK-FORCEALL-NEXT:    movl %esi, %r8d
; CHECK-FORCEALL-NEXT:  .LBB10_2: # %entry
; CHECK-FORCEALL-NEXT:    addl %r8d, %eax
; CHECK-FORCEALL-NEXT:    addl %edx, %eax
; CHECK-FORCEALL-NEXT:    retq
entry:
  %cond = icmp ugt i32 %a, %b
  %y = load i32, ptr %y.ptr
  %z2 = select i1 %cond, i32 %a, i32 %x
  %z1 = select i1 %cond, i32 %y, i32 %x
  %z3 = select i1 %cond, i32 %b, i32 %x
  %s1 = add i32 %z1, %z2
  %s2 = add i32 %s1, %z3
  ret i32 %s2
}

; Test that we don't convert a group of cmovs with conflicting directions of
; loads.
define i32 @test_cmov_memoperand_conflicting_dir(i32 %a, i32 %b, i32 %x, ptr %y1.ptr, ptr %y2.ptr) #0 {
; CHECK-LABEL: test_cmov_memoperand_conflicting_dir:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    cmpl %esi, %edi
; CHECK-NEXT:    movl (%rcx), %eax
; CHECK-NEXT:    cmoval %edx, %eax
; CHECK-NEXT:    cmoval (%r8), %edx
; CHECK-NEXT:    addl %edx, %eax
; CHECK-NEXT:    retq
;
; CHECK-FORCEALL-LABEL: test_cmov_memoperand_conflicting_dir:
; CHECK-FORCEALL:       # %bb.0: # %entry
; CHECK-FORCEALL-NEXT:    cmpl %esi, %edi
; CHECK-FORCEALL-NEXT:    movl (%rcx), %eax
; CHECK-FORCEALL-NEXT:    cmoval %edx, %eax
; CHECK-FORCEALL-NEXT:    cmoval (%r8), %edx
; CHECK-FORCEALL-NEXT:    addl %edx, %eax
; CHECK-FORCEALL-NEXT:    retq
entry:
  %cond = icmp ugt i32 %a, %b
  %y1 = load i32, ptr %y1.ptr
  %y2 = load i32, ptr %y2.ptr
  %z1 = select i1 %cond, i32 %x, i32 %y1
  %z2 = select i1 %cond, i32 %y2, i32 %x
  %s1 = add i32 %z1, %z2
  ret i32 %s1
}

; Test that we can convert a group of cmovs where only one has a memory
; operand and where that memory operand's registers come from a prior cmov in
; the group.
define i32 @test_cmov_memoperand_in_group_reuse_for_addr(i32 %a, i32 %b, ptr %x, ptr %y) #0 {
; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    movl %edi, %eax
; CHECK-NEXT:    cmpl %esi, %edi
; CHECK-NEXT:    ja .LBB12_2
; CHECK-NEXT:  # %bb.1: # %entry
; CHECK-NEXT:    movl (%rcx), %eax
; CHECK-NEXT:  .LBB12_2: # %entry
; CHECK-NEXT:    retq
;
; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group_reuse_for_addr:
; CHECK-FORCEALL:       # %bb.0: # %entry
; CHECK-FORCEALL-NEXT:    movl %edi, %eax
; CHECK-FORCEALL-NEXT:    cmpl %esi, %edi
; CHECK-FORCEALL-NEXT:    ja .LBB12_2
; CHECK-FORCEALL-NEXT:  # %bb.1: # %entry
; CHECK-FORCEALL-NEXT:    movl (%rcx), %eax
; CHECK-FORCEALL-NEXT:  .LBB12_2: # %entry
; CHECK-FORCEALL-NEXT:    retq
entry:
  %cond = icmp ugt i32 %a, %b
  %p = select i1 %cond, ptr %x, ptr %y
  %load = load i32, ptr %p
  %z = select i1 %cond, i32 %a, i32 %load
  ret i32 %z
}

; Test that we can convert a group of two cmovs with memory operands where one
; uses the result of the other as part of the address.
define i32 @test_cmov_memoperand_in_group_reuse_for_addr2(i32 %a, i32 %b, ptr %x, ptr %y) #0 {
; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr2:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    movl %edi, %eax
; CHECK-NEXT:    cmpl %esi, %edi
; CHECK-NEXT:    ja .LBB13_2
; CHECK-NEXT:  # %bb.1: # %entry
; CHECK-NEXT:    movq (%rcx), %rax
; CHECK-NEXT:    movl (%rax), %eax
; CHECK-NEXT:  .LBB13_2: # %entry
; CHECK-NEXT:    retq
;
; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group_reuse_for_addr2:
; CHECK-FORCEALL:       # %bb.0: # %entry
; CHECK-FORCEALL-NEXT:    movl %edi, %eax
; CHECK-FORCEALL-NEXT:    cmpl %esi, %edi
; CHECK-FORCEALL-NEXT:    ja .LBB13_2
; CHECK-FORCEALL-NEXT:  # %bb.1: # %entry
; CHECK-FORCEALL-NEXT:    movq (%rcx), %rax
; CHECK-FORCEALL-NEXT:    movl (%rax), %eax
; CHECK-FORCEALL-NEXT:  .LBB13_2: # %entry
; CHECK-FORCEALL-NEXT:    retq
entry:
  %cond = icmp ugt i32 %a, %b
  %load1 = load ptr, ptr %y
  %p = select i1 %cond, ptr %x, ptr %load1
  %load2 = load i32, ptr %p
  %z = select i1 %cond, i32 %a, i32 %load2
  ret i32 %z
}

; Test that we can convert a group of cmovs where only one has a memory
; operand and where that memory operand's registers come from a prior cmov and
; where that cmov gets *its* input from a prior cmov in the group.
define i32 @test_cmov_memoperand_in_group_reuse_for_addr3(i32 %a, i32 %b, ptr %x, ptr %y, ptr %z) #0 {
; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr3:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    movl %edi, %eax
; CHECK-NEXT:    cmpl %esi, %edi
; CHECK-NEXT:    ja .LBB14_2
; CHECK-NEXT:  # %bb.1: # %entry
; CHECK-NEXT:    movl (%rcx), %eax
; CHECK-NEXT:  .LBB14_2: # %entry
; CHECK-NEXT:    retq
;
; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group_reuse_for_addr3:
; CHECK-FORCEALL:       # %bb.0: # %entry
; CHECK-FORCEALL-NEXT:    movl %edi, %eax
; CHECK-FORCEALL-NEXT:    cmpl %esi, %edi
; CHECK-FORCEALL-NEXT:    ja .LBB14_2
; CHECK-FORCEALL-NEXT:  # %bb.1: # %entry
; CHECK-FORCEALL-NEXT:    movl (%rcx), %eax
; CHECK-FORCEALL-NEXT:  .LBB14_2: # %entry
; CHECK-FORCEALL-NEXT:    retq
entry:
  %cond = icmp ugt i32 %a, %b
  %p = select i1 %cond, ptr %x, ptr %y
  %p2 = select i1 %cond, ptr %z, ptr %p
  %load = load i32, ptr %p2
  %r = select i1 %cond, i32 %a, i32 %load
  ret i32 %r
}

@begin = external global ptr
@end = external global ptr

define void @test_memoperand_loop(i32 %data) #0 {
; CHECK-LABEL: test_memoperand_loop:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    movq begin@GOTPCREL(%rip), %rax
; CHECK-NEXT:    movq (%rax), %rcx
; CHECK-NEXT:    movq end@GOTPCREL(%rip), %rdx
; CHECK-NEXT:    movq (%rdx), %rdx
; CHECK-NEXT:    xorl %esi, %esi
; CHECK-NEXT:    movq %rcx, %r8
; CHECK-NEXT:  .LBB15_1: # %loop.body
; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    addq $8, %r8
; CHECK-NEXT:    cmpq %rdx, %r8
; CHECK-NEXT:    ja .LBB15_3
; CHECK-NEXT:  # %bb.2: # %loop.body
; CHECK-NEXT:    # in Loop: Header=BB15_1 Depth=1
; CHECK-NEXT:    movq (%rax), %r8
; CHECK-NEXT:  .LBB15_3: # %loop.body
; CHECK-NEXT:    # in Loop: Header=BB15_1 Depth=1
; CHECK-NEXT:    movl %edi, (%r8)
; CHECK-NEXT:    addq $8, %r8
; CHECK-NEXT:    cmpq %rdx, %r8
; CHECK-NEXT:    ja .LBB15_5
; CHECK-NEXT:  # %bb.4: # %loop.body
; CHECK-NEXT:    # in Loop: Header=BB15_1 Depth=1
; CHECK-NEXT:    movq %rcx, %r8
; CHECK-NEXT:  .LBB15_5: # %loop.body
; CHECK-NEXT:    # in Loop: Header=BB15_1 Depth=1
; CHECK-NEXT:    movl %edi, (%r8)
; CHECK-NEXT:    addl $1, %esi
; CHECK-NEXT:    cmpl $1024, %esi # imm = 0x400
; CHECK-NEXT:    jl .LBB15_1
; CHECK-NEXT:  # %bb.6: # %exit
; CHECK-NEXT:    retq
;
; CHECK-FORCEALL-LABEL: test_memoperand_loop:
; CHECK-FORCEALL:       # %bb.0: # %entry
; CHECK-FORCEALL-NEXT:    movq begin@GOTPCREL(%rip), %rax
; CHECK-FORCEALL-NEXT:    movq (%rax), %rcx
; CHECK-FORCEALL-NEXT:    movq end@GOTPCREL(%rip), %rdx
; CHECK-FORCEALL-NEXT:    movq (%rdx), %rdx
; CHECK-FORCEALL-NEXT:    xorl %esi, %esi
; CHECK-FORCEALL-NEXT:    movq %rcx, %r8
; CHECK-FORCEALL-NEXT:  .LBB15_1: # %loop.body
; CHECK-FORCEALL-NEXT:    # =>This Inner Loop Header: Depth=1
; CHECK-FORCEALL-NEXT:    addq $8, %r8
; CHECK-FORCEALL-NEXT:    cmpq %rdx, %r8
; CHECK-FORCEALL-NEXT:    ja .LBB15_3
; CHECK-FORCEALL-NEXT:  # %bb.2: # %loop.body
; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB15_1 Depth=1
; CHECK-FORCEALL-NEXT:    movq (%rax), %r8
; CHECK-FORCEALL-NEXT:  .LBB15_3: # %loop.body
; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB15_1 Depth=1
; CHECK-FORCEALL-NEXT:    movl %edi, (%r8)
; CHECK-FORCEALL-NEXT:    addq $8, %r8
; CHECK-FORCEALL-NEXT:    cmpq %rdx, %r8
; CHECK-FORCEALL-NEXT:    ja .LBB15_5
; CHECK-FORCEALL-NEXT:  # %bb.4: # %loop.body
; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB15_1 Depth=1
; CHECK-FORCEALL-NEXT:    movq %rcx, %r8
; CHECK-FORCEALL-NEXT:  .LBB15_5: # %loop.body
; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB15_1 Depth=1
; CHECK-FORCEALL-NEXT:    movl %edi, (%r8)
; CHECK-FORCEALL-NEXT:    addl $1, %esi
; CHECK-FORCEALL-NEXT:    cmpl $1024, %esi # imm = 0x400
; CHECK-FORCEALL-NEXT:    jl .LBB15_1
; CHECK-FORCEALL-NEXT:  # %bb.6: # %exit
; CHECK-FORCEALL-NEXT:    retq
entry:
  %begin = load ptr, ptr @begin, align 8
  %end = load ptr, ptr @end, align 8
  br label %loop.body
loop.body:
  %phi.iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.body ]
  %phi.ptr = phi ptr [ %begin, %entry ], [ %dst2, %loop.body ]
  %gep1 = getelementptr inbounds i32, ptr%phi.ptr, i64 2
  %cmp1 = icmp ugt ptr %gep1, %end
  %begin_dup = load ptr, ptr @begin, align 8
  %dst1 = select i1 %cmp1, ptr %gep1, ptr %begin_dup
  store i32 %data, ptr%dst1, align 4
  %gep2 = getelementptr inbounds i32, ptr%dst1, i64 2
  %cmp2 = icmp ugt ptr %gep2, %end
  %dst2 = select i1 %cmp2, ptr %gep2, ptr %begin
  store i32 %data, ptr%dst2, align 4
  %iv.next = add i32 %phi.iv, 1
  %cond = icmp slt i32 %iv.next, 1024
  br i1 %cond, label %loop.body, label %exit
exit:
  ret void
}

attributes #0 = {"target-cpu"="x86-64" "tune-cpu"="x86-64"}
!0 = !{}