llvm/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' %s -mtriple=x86_64-apple-darwin -o - | FileCheck %s --check-prefix OPT

; Teach CGP to dup returns to enable tail call optimization.
; rdar://9147433

define i32 @foo(i32 %x) nounwind ssp {
; CHECK-LABEL: foo:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    ## kill: def $edi killed $edi def $rdi
; CHECK-NEXT:    decl %edi
; CHECK-NEXT:    cmpl $5, %edi
; CHECK-NEXT:    ja LBB0_8
; CHECK-NEXT:  ## %bb.1: ## %entry
; CHECK-NEXT:    leaq LJTI0_0(%rip), %rax
; CHECK-NEXT:    movslq (%rax,%rdi,4), %rcx
; CHECK-NEXT:    addq %rax, %rcx
; CHECK-NEXT:    jmpq *%rcx
; CHECK-NEXT:  LBB0_2: ## %sw.bb
; CHECK-NEXT:    jmp _f1 ## TAILCALL
; CHECK-NEXT:  LBB0_6: ## %sw.bb7
; CHECK-NEXT:    jmp _f5 ## TAILCALL
; CHECK-NEXT:  LBB0_4: ## %sw.bb3
; CHECK-NEXT:    jmp _f3 ## TAILCALL
; CHECK-NEXT:  LBB0_5: ## %sw.bb5
; CHECK-NEXT:    jmp _f4 ## TAILCALL
; CHECK-NEXT:  LBB0_3: ## %sw.bb1
; CHECK-NEXT:    jmp _f2 ## TAILCALL
; CHECK-NEXT:  LBB0_7: ## %sw.bb9
; CHECK-NEXT:    jmp _f6 ## TAILCALL
; CHECK-NEXT:  LBB0_8: ## %return
; CHECK-NEXT:    xorl %eax, %eax
; CHECK-NEXT:    retq
; CHECK-NEXT:    .p2align 2
; CHECK-NEXT:    .data_region jt32
; CHECK-NEXT:  .set L0_0_set_2, LBB0_2-LJTI0_0
; CHECK-NEXT:  .set L0_0_set_3, LBB0_3-LJTI0_0
; CHECK-NEXT:  .set L0_0_set_4, LBB0_4-LJTI0_0
; CHECK-NEXT:  .set L0_0_set_5, LBB0_5-LJTI0_0
; CHECK-NEXT:  .set L0_0_set_6, LBB0_6-LJTI0_0
; CHECK-NEXT:  .set L0_0_set_7, LBB0_7-LJTI0_0
; CHECK-NEXT:  LJTI0_0:
; CHECK-NEXT:    .long L0_0_set_2
; CHECK-NEXT:    .long L0_0_set_3
; CHECK-NEXT:    .long L0_0_set_4
; CHECK-NEXT:    .long L0_0_set_5
; CHECK-NEXT:    .long L0_0_set_6
; CHECK-NEXT:    .long L0_0_set_7
; CHECK-NEXT:    .end_data_region
entry:
  switch i32 %x, label %return [
    i32 1, label %sw.bb
    i32 2, label %sw.bb1
    i32 3, label %sw.bb3
    i32 4, label %sw.bb5
    i32 5, label %sw.bb7
    i32 6, label %sw.bb9
  ]

sw.bb:                                            ; preds = %entry
  %call = tail call i32 @f1() nounwind
  br label %return

sw.bb1:                                           ; preds = %entry
  %call2 = tail call i32 @f2() nounwind
  br label %return

sw.bb3:                                           ; preds = %entry
  %call4 = tail call i32 @f3() nounwind
  br label %return

sw.bb5:                                           ; preds = %entry
  %call6 = tail call i32 @f4() nounwind
  br label %return

sw.bb7:                                           ; preds = %entry
  %call8 = tail call i32 @f5() nounwind
  br label %return

sw.bb9:                                           ; preds = %entry
  %call10 = tail call i32 @f6() nounwind
  br label %return

return:                                           ; preds = %entry, %sw.bb9, %sw.bb7, %sw.bb5, %sw.bb3, %sw.bb1, %sw.bb
  %retval.0 = phi i32 [ %call10, %sw.bb9 ], [ %call8, %sw.bb7 ], [ %call6, %sw.bb5 ], [ %call4, %sw.bb3 ], [ %call2, %sw.bb1 ], [ %call, %sw.bb ], [ 0, %entry ]
  ret i32 %retval.0
}

declare i32 @f1()

declare i32 @f2()

declare i32 @f3()

declare i32 @f4()

declare i32 @f5()

declare i32 @f6()

; rdar://11958338
%0 = type opaque

declare ptr @bar(ptr) uwtable optsize noinline ssp

define hidden ptr @thingWithValue(ptr %self) uwtable ssp {
; CHECK-LABEL: thingWithValue:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    xorl %eax, %eax
; CHECK-NEXT:    testb %al, %al
; CHECK-NEXT:    je _bar ## TAILCALL
; CHECK-NEXT:  ## %bb.1: ## %someThingWithValue.exit
; CHECK-NEXT:    retq
entry:
  br i1 undef, label %if.then.i, label %if.else.i

if.then.i:                                        ; preds = %entry
  br label %someThingWithValue.exit

if.else.i:                                        ; preds = %entry
  %call4.i = tail call ptr @bar(ptr undef) optsize
  br label %someThingWithValue.exit

someThingWithValue.exit:                          ; preds = %if.else.i, %if.then.i
  %retval.0.in.i = phi ptr [ undef, %if.then.i ], [ %call4.i, %if.else.i ]
  ret ptr %retval.0.in.i
}


; Correctly handle zext returns.
declare zeroext i1 @foo_i1()

define zeroext i1 @zext_i1(i1 %k) {
; CHECK-LABEL: zext_i1:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    testb $1, %dil
; CHECK-NEXT:    je _foo_i1 ## TAILCALL
; CHECK-NEXT:  ## %bb.1: ## %land.end
; CHECK-NEXT:    xorl %eax, %eax
; CHECK-NEXT:    retq
entry:
  br i1 %k, label %land.end, label %land.rhs

land.rhs:                                         ; preds = %entry
  %call1 = tail call zeroext i1 @foo_i1()
  br label %land.end

land.end:                                         ; preds = %entry, %land.rhs
  %0 = phi i1 [ false, %entry ], [ %call1, %land.rhs ]
  ret i1 %0
}

; We need to look through bitcasts when looking for tail calls in phi incoming
; values.
declare ptr @g_ret32()
define ptr @f_ret8(ptr %obj) nounwind {
; OPT-LABEL: @f_ret8(
; OPT-NEXT:  entry:
; OPT-NEXT:    [[CMP:%.*]] = icmp eq ptr [[OBJ:%.*]], null
; OPT-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[IF_THEN:%.*]]
; OPT:       if.then:
; OPT-NEXT:    [[PTR:%.*]] = tail call ptr @g_ret32()
; OPT-NEXT:    ret ptr [[PTR]]
; OPT:       return:
; OPT-NEXT:    ret ptr [[OBJ]]
;
; CHECK-LABEL: f_ret8:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    testq %rdi, %rdi
; CHECK-NEXT:    jne _g_ret32 ## TAILCALL
; CHECK-NEXT:  ## %bb.1: ## %return
; CHECK-NEXT:    movq %rdi, %rax
; CHECK-NEXT:    retq
entry:
  %cmp = icmp eq ptr %obj, null
  br i1 %cmp, label %return, label %if.then

if.then:
  %ptr = tail call ptr @g_ret32()
  br label %return

return:
  %retval = phi ptr [ %ptr, %if.then ], [ %obj, %entry ]
  ret ptr %retval
}

define ptr @memset_tailc(ptr %ret_val, i64 %sz) nounwind {
; CHECK-LABEL: memset_tailc:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    testq %rdi, %rdi
; CHECK-NEXT:    je LBB4_1
; CHECK-NEXT:  ## %bb.2: ## %if.then
; CHECK-NEXT:    movq %rsi, %rdx
; CHECK-NEXT:    xorl %esi, %esi
; CHECK-NEXT:    jmp _memset ## TAILCALL
; CHECK-NEXT:  LBB4_1: ## %return
; CHECK-NEXT:    movq %rdi, %rax
; CHECK-NEXT:    retq
entry:
  %cmp = icmp eq ptr %ret_val, null
  br i1 %cmp, label %return, label %if.then

if.then:
  tail call void @llvm.memset.p0.i64(ptr nonnull align 1 %ret_val, i8 0, i64 %sz, i1 false)
  br label %return

return:
  ret ptr %ret_val
}

define ptr @memcpy_tailc(ptr %ret_val, i64 %sz, ptr %src) nounwind {
; CHECK-LABEL: memcpy_tailc:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    testq %rsi, %rsi
; CHECK-NEXT:    je LBB5_1
; CHECK-NEXT:  ## %bb.2: ## %if.then
; CHECK-NEXT:    movq %rsi, %rax
; CHECK-NEXT:    movq %rdx, %rsi
; CHECK-NEXT:    movq %rax, %rdx
; CHECK-NEXT:    jmp _memcpy ## TAILCALL
; CHECK-NEXT:  LBB5_1: ## %return
; CHECK-NEXT:    movq %rdx, %rax
; CHECK-NEXT:    retq
entry:
  %cmp = icmp eq i64 %sz, 0
  br i1 %cmp, label %return, label %if.then

if.then:
  tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 %ret_val, ptr align 1 %src, i64 %sz, i1 false)
  br label %return

return:
  %phi = phi ptr [ %ret_val, %if.then ], [ %src, %entry ]
  ret ptr %phi
}

define ptr @strcpy_legal_and_baz_illegal(ptr %arg, i64 %sz, ptr %2) nounwind {
; CHECK-LABEL: strcpy_legal_and_baz_illegal:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    pushq %r15
; CHECK-NEXT:    pushq %r14
; CHECK-NEXT:    pushq %rbx
; CHECK-NEXT:    movq %rdx, %rbx
; CHECK-NEXT:    movq %rsi, %r15
; CHECK-NEXT:    movq %rdi, %r14
; CHECK-NEXT:    movq %rsi, %rdi
; CHECK-NEXT:    callq _malloc
; CHECK-NEXT:    testq %r15, %r15
; CHECK-NEXT:    je LBB6_1
; CHECK-NEXT:  ## %bb.2: ## %if.then
; CHECK-NEXT:    movq %rax, %rdi
; CHECK-NEXT:    movq %rbx, %rsi
; CHECK-NEXT:    popq %rbx
; CHECK-NEXT:    popq %r14
; CHECK-NEXT:    popq %r15
; CHECK-NEXT:    jmp _strcpy ## TAILCALL
; CHECK-NEXT:  LBB6_1: ## %if.else
; CHECK-NEXT:    movq %r14, %rdi
; CHECK-NEXT:    movq %rbx, %rsi
; CHECK-NEXT:    callq _baz
; CHECK-NEXT:    movq %r14, %rax
; CHECK-NEXT:    popq %rbx
; CHECK-NEXT:    popq %r14
; CHECK-NEXT:    popq %r15
; CHECK-NEXT:    retq
entry:
  %strcpy_ret_val = tail call noalias ptr @malloc(i64 %sz)
  %cmp = icmp eq i64 %sz, 0
  br i1 %cmp, label %if.else, label %if.then

if.then:
  %rv_unused = tail call ptr @strcpy(ptr dereferenceable(1) %strcpy_ret_val, ptr dereferenceable(1) %2)
  br label %return

if.else:
  %rv_unused_2 = tail call ptr @baz(ptr %arg, ptr %2)
  br label %return

return:
  %phi = phi ptr [ %strcpy_ret_val, %if.then ], [ %arg, %if.else ]
  ret ptr %phi
}

define ptr @baz_illegal_tailc(ptr %ret_val, ptr %arg) nounwind {
; CHECK-LABEL: baz_illegal_tailc:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    pushq %rbx
; CHECK-NEXT:    movq %rdi, %rbx
; CHECK-NEXT:    testq %rdi, %rdi
; CHECK-NEXT:    je LBB7_2
; CHECK-NEXT:  ## %bb.1: ## %if.then
; CHECK-NEXT:    movq %rbx, %rdi
; CHECK-NEXT:    callq _baz
; CHECK-NEXT:  LBB7_2: ## %return
; CHECK-NEXT:    movq %rbx, %rax
; CHECK-NEXT:    popq %rbx
; CHECK-NEXT:    retq
entry:
  %cmp = icmp eq ptr %ret_val, null
  br i1 %cmp, label %return, label %if.then

if.then:
  %rv = tail call ptr @baz(ptr %ret_val, ptr %arg)
  br label %return

return:
  ret ptr %ret_val
}

define ptr @memset_illegal_tailc(ptr %arg, i64 %sz, ptr %ret_val_1, ptr %ret_val_2) nounwind {
; CHECK-LABEL: memset_illegal_tailc:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    movq %rdx, %rax
; CHECK-NEXT:    testq %rsi, %rsi
; CHECK-NEXT:    je LBB8_2
; CHECK-NEXT:  ## %bb.1: ## %if.then
; CHECK-NEXT:    pushq %rbx
; CHECK-NEXT:    movq %rcx, %rbx
; CHECK-NEXT:    movq %rsi, %rdx
; CHECK-NEXT:    xorl %esi, %esi
; CHECK-NEXT:    callq _memset
; CHECK-NEXT:    movq %rbx, %rax
; CHECK-NEXT:    popq %rbx
; CHECK-NEXT:  LBB8_2: ## %return
; CHECK-NEXT:    retq
entry:
  %cmp = icmp eq i64 %sz, 0
  br i1 %cmp, label %return, label %if.then

if.then:
  tail call void @llvm.memset.p0.i64(ptr align 1 %arg, i8 0, i64 %sz, i1 false)
  br label %return

return:
  %phi = phi ptr [ %ret_val_2, %if.then ], [ %ret_val_1, %entry ]
  ret ptr %phi
}

define ptr @strcpy_illegal_tailc(ptr %dest, i64 %sz, ptr readonly returned %src) nounwind {
; CHECK-LABEL: strcpy_illegal_tailc:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    pushq %rbx
; CHECK-NEXT:    movq %rdx, %rbx
; CHECK-NEXT:    testq %rsi, %rsi
; CHECK-NEXT:    je LBB9_2
; CHECK-NEXT:  ## %bb.1: ## %if.then
; CHECK-NEXT:    movq %rbx, %rsi
; CHECK-NEXT:    callq _strcpy
; CHECK-NEXT:  LBB9_2: ## %return
; CHECK-NEXT:    movq %rbx, %rax
; CHECK-NEXT:    popq %rbx
; CHECK-NEXT:    retq
entry:
  %cmp = icmp eq i64 %sz, 0
  br i1 %cmp, label %return, label %if.then

if.then:
  %6 = tail call ptr @strcpy(ptr dereferenceable(1) %dest, ptr dereferenceable(1) %src)
  br label %return

return:
  ret ptr %src
}

@i = global i32 0, align 4

define i32 @undef_tailc() nounwind {
; CHECK-LABEL: undef_tailc:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    cmpl $0, _i(%rip)
; CHECK-NEXT:    jne _qux ## TAILCALL
; CHECK-NEXT:  ## %bb.1: ## %return
; CHECK-NEXT:    retq
entry:
  %val = load i32, ptr @i, align 4
  %cmp = icmp eq i32 %val, 0
  br i1 %cmp, label %return, label %if.then

if.then:
  %rv_unused = tail call i32 @qux()
  br label %return

return:
  ret i32 undef
}

define i32 @undef_and_known_tailc() nounwind {
; CHECK-LABEL: undef_and_known_tailc:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    movl _i(%rip), %eax
; CHECK-NEXT:    cmpl $5, %eax
; CHECK-NEXT:    je _qux ## TAILCALL
; CHECK-NEXT:  ## %bb.1: ## %entry
; CHECK-NEXT:    cmpl $2, %eax
; CHECK-NEXT:    je _quux ## TAILCALL
; CHECK-NEXT:  ## %bb.2: ## %return
; CHECK-NEXT:    retq
entry:
  %val = load i32, ptr @i, align 4
  switch i32 %val, label %return [
    i32 2, label %case_2
    i32 5, label %case_5
  ]

case_2:
  %rv_unused = tail call i32 @quux()
  br label %return

case_5:
  %rv = tail call i32 @qux()
  br label %return

return:
  %phi = phi i32 [ undef, %case_2 ], [ %rv, %case_5 ], [ undef, %entry ]
  ret i32 %phi
}

declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1)
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1)
declare noalias ptr @malloc(i64)
declare ptr @strcpy(ptr noalias returned writeonly, ptr noalias nocapture readonly)
declare ptr @baz(ptr, ptr)
declare i32 @qux()
declare i32 @quux()