llvm/llvm/test/Transforms/MemCpyOpt/capturing-func.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
; RUN: opt < %s -passes='require<globals-aa>,memcpyopt' -S -verify-memoryssa | FileCheck %s

target datalayout = "e"

declare void @foo(ptr)
declare void @llvm.memcpy.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1) nounwind
declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
declare void @llvm.lifetime.end.p0(i64, ptr nocapture)

; Check that the transformation isn't applied if the called function can
; capture the pointer argument (i.e. the nocapture attribute isn't present)
define void @test() {
; CHECK-LABEL: define {{[^@]+}}@test() {
; CHECK-NEXT:    [[PTR1:%.*]] = alloca i8, align 1
; CHECK-NEXT:    [[PTR2:%.*]] = alloca i8, align 1
; CHECK-NEXT:    call void @foo(ptr [[PTR2]])
; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr [[PTR1]], ptr [[PTR2]], i32 1, i1 false)
; CHECK-NEXT:    call void @foo(ptr [[PTR1]])
; CHECK-NEXT:    ret void
;
  %ptr1 = alloca i8
  %ptr2 = alloca i8
  call void @foo(ptr %ptr2)
  call void @llvm.memcpy.p0.p0.i32(ptr %ptr1, ptr %ptr2, i32 1, i1 false)
  call void @foo(ptr %ptr1)
  ret void
}

; Same as previous test, but with a bitcasted argument.
define void @test_bitcast() {
; CHECK-LABEL: define {{[^@]+}}@test_bitcast() {
; CHECK-NEXT:    [[PTR1:%.*]] = alloca [2 x i8], align 1
; CHECK-NEXT:    [[PTR2:%.*]] = alloca [2 x i8], align 1
; CHECK-NEXT:    call void @foo(ptr [[PTR2]])
; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr [[PTR1]], ptr [[PTR2]], i32 2, i1 false)
; CHECK-NEXT:    call void @foo(ptr [[PTR1]])
; CHECK-NEXT:    ret void
;
  %ptr1 = alloca [2 x i8]
  %ptr2 = alloca [2 x i8]
  call void @foo(ptr %ptr2)
  call void @llvm.memcpy.p0.p0.i32(ptr %ptr1, ptr %ptr2, i32 2, i1 false)
  call void @foo(ptr %ptr1)
  ret void
}

; Lifetime of %ptr2 ends before the potential use of the capture in the second
; call.
define void @test_lifetime_end() {
; CHECK-LABEL: define {{[^@]+}}@test_lifetime_end() {
; CHECK-NEXT:    [[PTR1:%.*]] = alloca i8, align 1
; CHECK-NEXT:    [[PTR2:%.*]] = alloca i8, align 1
; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 1, ptr [[PTR2]])
; CHECK-NEXT:    call void @foo(ptr [[PTR1]])
; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 1, ptr [[PTR2]])
; CHECK-NEXT:    call void @foo(ptr [[PTR1]])
; CHECK-NEXT:    ret void
;
  %ptr1 = alloca i8
  %ptr2 = alloca i8
  call void @llvm.lifetime.start.p0(i64 1, ptr %ptr2)
  call void @foo(ptr %ptr2)
  call void @llvm.memcpy.p0.p0.i32(ptr %ptr1, ptr %ptr2, i32 1, i1 false)
  call void @llvm.lifetime.end.p0(i64 1, ptr %ptr2)
  call void @foo(ptr %ptr1)
  ret void
}

; Lifetime of %ptr2 does not end, because of size mismatch.
define void @test_lifetime_not_end() {
; CHECK-LABEL: define {{[^@]+}}@test_lifetime_not_end() {
; CHECK-NEXT:    [[PTR1:%.*]] = alloca i8, align 1
; CHECK-NEXT:    [[PTR2:%.*]] = alloca i8, align 1
; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 1, ptr [[PTR2]])
; CHECK-NEXT:    call void @foo(ptr [[PTR2]])
; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr [[PTR1]], ptr [[PTR2]], i32 1, i1 false)
; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 0, ptr [[PTR2]])
; CHECK-NEXT:    call void @foo(ptr [[PTR1]])
; CHECK-NEXT:    ret void
;
  %ptr1 = alloca i8
  %ptr2 = alloca i8
  call void @llvm.lifetime.start.p0(i64 1, ptr %ptr2)
  call void @foo(ptr %ptr2)
  call void @llvm.memcpy.p0.p0.i32(ptr %ptr1, ptr %ptr2, i32 1, i1 false)
  call void @llvm.lifetime.end.p0(i64 0, ptr %ptr2)
  call void @foo(ptr %ptr1)
  ret void
}

; Lifetime of %ptr2 ends before any potential use of the capture because we
; return from the function.
define void @test_function_end() {
; CHECK-LABEL: define {{[^@]+}}@test_function_end() {
; CHECK-NEXT:    [[PTR1:%.*]] = alloca i8, align 1
; CHECK-NEXT:    [[PTR2:%.*]] = alloca i8, align 1
; CHECK-NEXT:    call void @foo(ptr [[PTR1]])
; CHECK-NEXT:    ret void
;
  %ptr1 = alloca i8
  %ptr2 = alloca i8
  call void @foo(ptr %ptr2)
  call void @llvm.memcpy.p0.p0.i32(ptr %ptr1, ptr %ptr2, i32 1, i1 false)
  ret void
}

; A potential use of the capture occurs in a later block, can't be optimized.
define void @test_terminator() {
; CHECK-LABEL: define {{[^@]+}}@test_terminator() {
; CHECK-NEXT:    [[PTR1:%.*]] = alloca i8, align 1
; CHECK-NEXT:    [[PTR2:%.*]] = alloca i8, align 1
; CHECK-NEXT:    call void @foo(ptr [[PTR2]])
; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr [[PTR1]], ptr [[PTR2]], i32 1, i1 false)
; CHECK-NEXT:    br label [[NEXT:%.*]]
; CHECK:       next:
; CHECK-NEXT:    call void @foo(ptr [[PTR1]])
; CHECK-NEXT:    ret void
;
  %ptr1 = alloca i8
  %ptr2 = alloca i8
  call void @foo(ptr %ptr2)
  call void @llvm.memcpy.p0.p0.i32(ptr %ptr1, ptr %ptr2, i32 1, i1 false)
  br label %next

next:
  call void @foo(ptr %ptr1)
  ret void
}

; This case can be optimized, but would require a scan across multiple blocks
; and is currently not performed.
define void @test_terminator2() {
; CHECK-LABEL: define {{[^@]+}}@test_terminator2() {
; CHECK-NEXT:    [[PTR1:%.*]] = alloca i8, align 1
; CHECK-NEXT:    [[PTR2:%.*]] = alloca i8, align 1
; CHECK-NEXT:    call void @foo(ptr [[PTR2]])
; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr [[PTR1]], ptr [[PTR2]], i32 1, i1 false)
; CHECK-NEXT:    br label [[NEXT:%.*]]
; CHECK:       next:
; CHECK-NEXT:    ret void
;
  %ptr1 = alloca i8
  %ptr2 = alloca i8
  call void @foo(ptr %ptr2)
  call void @llvm.memcpy.p0.p0.i32(ptr %ptr1, ptr %ptr2, i32 1, i1 false)
  br label %next

next:
  ret void
}

declare void @capture(ptr)

; This case should not be optimized, because dest is captured before the call.
define void @test_dest_captured_before_alloca() {
; CHECK-LABEL: define {{[^@]+}}@test_dest_captured_before_alloca() {
; CHECK-NEXT:    [[PTR1:%.*]] = alloca i8, align 1
; CHECK-NEXT:    [[PTR2:%.*]] = alloca i8, align 1
; CHECK-NEXT:    call void @capture(ptr [[PTR1]])
; CHECK-NEXT:    call void @foo(ptr [[PTR2]]) #[[ATTR2:[0-9]+]]
; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr [[PTR1]], ptr [[PTR2]], i32 1, i1 false)
; CHECK-NEXT:    ret void
;
  %ptr1 = alloca i8
  %ptr2 = alloca i8
  call void @capture(ptr %ptr1)
  call void @foo(ptr %ptr2) argmemonly
  call void @llvm.memcpy.p0.p0.i32(ptr %ptr1, ptr %ptr2, i32 1, i1 false)
  ret void
}


@g = internal global i8 0

; This case should not be optimized, because @g is captured before the call
; (being a global) and @icmp_g might depend on its identity.
define void @test_dest_captured_before_global() {
; CHECK-LABEL: define {{[^@]+}}@test_dest_captured_before_global() {
; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
; CHECK-NEXT:    call void @icmp_g(ptr [[PTR]])
; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr @g, ptr [[PTR]], i32 1, i1 false)
; CHECK-NEXT:    ret void
;
  %ptr = alloca i8
  call void @icmp_g(ptr %ptr)
  call void @llvm.memcpy.p0.p0.i32(ptr @g, ptr %ptr, i32 1, i1 false)
  ret void
}

define void @icmp_g(ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@icmp_g
; CHECK-SAME: (ptr [[P:%.*]]) {
; CHECK-NEXT:    [[C:%.*]] = icmp eq ptr [[P]], @g
; CHECK-NEXT:    br i1 [[C]], label [[IF:%.*]], label [[ELSE:%.*]]
; CHECK:       if:
; CHECK-NEXT:    store i8 1, ptr [[P]], align 1
; CHECK-NEXT:    ret void
; CHECK:       else:
; CHECK-NEXT:    store i8 2, ptr [[P]], align 1
; CHECK-NEXT:    ret void
;
  %c = icmp eq ptr %p, @g
  br i1 %c, label %if, label %else

if:
  store i8 1, ptr %p
  ret void

else:
  store i8 2, ptr %p
  ret void
}