; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=instcombine -S < %s | FileCheck %s
@gp = global ptr null, align 8
declare noalias ptr @malloc(i64) allockind("alloc,uninitialized") allocsize(0)
define i1 @compare_global_trivialeq() {
; CHECK-LABEL: @compare_global_trivialeq(
; CHECK-NEXT: ret i1 false
;
%m = call ptr @malloc(i64 4)
%lgp = load ptr, ptr @gp, align 8
%cmp = icmp eq ptr %m, %lgp
ret i1 %cmp
}
define i1 @compare_global_trivialne() {
; CHECK-LABEL: @compare_global_trivialne(
; CHECK-NEXT: ret i1 true
;
%m = call ptr @malloc(i64 4)
%lgp = load ptr, ptr @gp, align 8
%cmp = icmp ne ptr %m, %lgp
ret i1 %cmp
}
; Although the %m is marked nocapture in the deopt operand in call to function f,
; we cannot remove the alloc site: call to malloc
; The comparison should fold to false irrespective of whether the call to malloc can be elided or not
declare void @f()
define i1 @compare_and_call_with_deopt() {
; CHECK-LABEL: @compare_and_call_with_deopt(
; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(24) ptr @malloc(i64 24)
; CHECK-NEXT: tail call void @f() [ "deopt"(ptr [[M]]) ]
; CHECK-NEXT: ret i1 false
;
%m = call ptr @malloc(i64 24)
%lgp = load ptr, ptr @gp, align 8, !nonnull !0
%cmp = icmp eq ptr %lgp, %m
tail call void @f() [ "deopt"(ptr %m) ]
ret i1 %cmp
}
; Same functon as above with deopt operand in function f, but comparison is NE
define i1 @compare_ne_and_call_with_deopt() {
; CHECK-LABEL: @compare_ne_and_call_with_deopt(
; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(24) ptr @malloc(i64 24)
; CHECK-NEXT: tail call void @f() [ "deopt"(ptr [[M]]) ]
; CHECK-NEXT: ret i1 true
;
%m = call ptr @malloc(i64 24)
%lgp = load ptr, ptr @gp, align 8, !nonnull !0
%cmp = icmp ne ptr %lgp, %m
tail call void @f() [ "deopt"(ptr %m) ]
ret i1 %cmp
}
; Same function as above, but global not marked nonnull, and we cannot fold the comparison
define i1 @compare_ne_global_maybe_null() {
; CHECK-LABEL: @compare_ne_global_maybe_null(
; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(24) ptr @malloc(i64 24)
; CHECK-NEXT: [[LGP:%.*]] = load ptr, ptr @gp, align 8
; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[LGP]], [[M]]
; CHECK-NEXT: tail call void @f() [ "deopt"(ptr [[M]]) ]
; CHECK-NEXT: ret i1 [[CMP]]
;
%m = call ptr @malloc(i64 24)
%lgp = load ptr, ptr @gp
%cmp = icmp ne ptr %lgp, %m
tail call void @f() [ "deopt"(ptr %m) ]
ret i1 %cmp
}
; FIXME: The comparison should fold to false since %m escapes (call to function escape)
; after the comparison.
declare void @escape(ptr)
define i1 @compare_and_call_after() {
; CHECK-LABEL: @compare_and_call_after(
; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(24) ptr @malloc(i64 24)
; CHECK-NEXT: [[LGP:%.*]] = load ptr, ptr @gp, align 8, !nonnull !0
; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[M]], [[LGP]]
; CHECK-NEXT: br i1 [[CMP]], label [[ESCAPE_CALL:%.*]], label [[JUST_RETURN:%.*]]
; CHECK: escape_call:
; CHECK-NEXT: call void @escape(ptr [[M]])
; CHECK-NEXT: ret i1 true
; CHECK: just_return:
; CHECK-NEXT: ret i1 false
;
%m = call ptr @malloc(i64 24)
%lgp = load ptr, ptr @gp, align 8, !nonnull !0
%cmp = icmp eq ptr %m, %lgp
br i1 %cmp, label %escape_call, label %just_return
escape_call:
call void @escape(ptr %m)
ret i1 true
just_return:
ret i1 %cmp
}
define i1 @compare_distinct_mallocs() {
; CHECK-LABEL: @compare_distinct_mallocs(
; CHECK-NEXT: ret i1 false
;
%m = call ptr @malloc(i64 4)
%n = call ptr @malloc(i64 4)
%cmp = icmp eq ptr %m, %n
ret i1 %cmp
}
; the compare is folded to true since the folding compare looks through bitcasts.
; call to malloc and the bitcast instructions are elided after that since there are no uses of the malloc
define i1 @compare_samepointer_under_bitcast() {
; CHECK-LABEL: @compare_samepointer_under_bitcast(
; CHECK-NEXT: ret i1 true
;
%m = call ptr @malloc(i64 4)
%cmp = icmp eq ptr %m, %m
ret i1 %cmp
}
; the compare is folded to true since the folding compare looks through bitcasts.
; The malloc call for %m cannot be elided since it is used in the call to function f.
define i1 @compare_samepointer_escaped() {
; CHECK-LABEL: @compare_samepointer_escaped(
; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
; CHECK-NEXT: call void @f() [ "deopt"(ptr [[M]]) ]
; CHECK-NEXT: ret i1 true
;
%m = call ptr @malloc(i64 4)
%cmp = icmp eq ptr %m, %m
call void @f() [ "deopt"(ptr %m) ]
ret i1 %cmp
}
; Technically, we can fold the %cmp2 comparison, even though %m escapes through
; the ret statement since `ret` terminates the function and we cannot reach from
; the ret to cmp.
; FIXME: Folding this %cmp2 when %m escapes through ret could be an issue with
; cross-threading data dependencies since we do not make the distinction between
; atomic and non-atomic loads in capture tracking.
define ptr @compare_ret_escape(ptr %c) {
; CHECK-LABEL: @compare_ret_escape(
; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
; CHECK-NEXT: [[N:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[N]], [[C:%.*]]
; CHECK-NEXT: br i1 [[CMP]], label [[RETST:%.*]], label [[CHK:%.*]]
; CHECK: retst:
; CHECK-NEXT: ret ptr [[M]]
; CHECK: chk:
; CHECK-NEXT: [[LGP:%.*]] = load ptr, ptr @gp, align 8, !nonnull !0
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq ptr [[M]], [[LGP]]
; CHECK-NEXT: br i1 [[CMP2]], label [[RETST]], label [[CHK2:%.*]]
; CHECK: chk2:
; CHECK-NEXT: ret ptr [[N]]
;
%m = call ptr @malloc(i64 4)
%n = call ptr @malloc(i64 4)
%cmp = icmp eq ptr %n, %c
br i1 %cmp, label %retst, label %chk
retst:
ret ptr %m
chk:
%lgp = load ptr, ptr @gp, align 8, !nonnull !0
%cmp2 = icmp eq ptr %m, %lgp
br i1 %cmp2, label %retst, label %chk2
chk2:
ret ptr %n
}
; The malloc call for %m cannot be elided since it is used in the call to function f.
; However, the cmp can be folded to true as %n doesnt escape and %m, %n are distinct allocations
define i1 @compare_distinct_pointer_escape() {
; CHECK-LABEL: @compare_distinct_pointer_escape(
; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
; CHECK-NEXT: tail call void @f() [ "deopt"(ptr [[M]]) ]
; CHECK-NEXT: ret i1 true
;
%m = call ptr @malloc(i64 4)
%n = call ptr @malloc(i64 4)
tail call void @f() [ "deopt"(ptr %m) ]
%cmp = icmp ne ptr %m, %n
ret i1 %cmp
}
; The next block of tests demonstrate a very subtle correctness requirement.
; We can generally assume any *single* heap layout we chose for the result of
; a malloc call, but we can't simultanious assume two different ones. As a
; result, we must make sure that we only fold conditions if we can ensure that
; we fold *all* potentially address capturing compares the same. This is
; the same point that applies to allocas, applied to noaiias/malloc.
; These two functions represents either a) forging a pointer via inttoptr or
; b) indexing off an adjacent allocation. In either case, the operation is
; obscured by an uninlined helper and not visible to instcombine.
declare ptr @hidden_inttoptr()
declare ptr @hidden_offset(ptr %other)
; FIXME: Missed oppurtunity
define i1 @ptrtoint_single_cmp() {
; CHECK-LABEL: @ptrtoint_single_cmp(
; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[M]], inttoptr (i64 2048 to ptr)
; CHECK-NEXT: ret i1 [[CMP]]
;
%m = call ptr @malloc(i64 4)
%rhs = inttoptr i64 2048 to ptr
%cmp = icmp eq ptr %m, %rhs
ret i1 %cmp
}
define i1 @offset_single_cmp() {
; CHECK-LABEL: @offset_single_cmp(
; CHECK-NEXT: ret i1 false
;
%m = call ptr @malloc(i64 4)
%n = call ptr @malloc(i64 4)
%rhs = getelementptr i8, ptr %n, i32 4
%cmp = icmp eq ptr %m, %rhs
ret i1 %cmp
}
declare void @witness(i1, i1)
define void @neg_consistent_fold1() {
; CHECK-LABEL: @neg_consistent_fold1(
; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
; CHECK-NEXT: [[RHS2:%.*]] = call ptr @hidden_inttoptr()
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq ptr [[M]], inttoptr (i64 2048 to ptr)
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq ptr [[M]], [[RHS2]]
; CHECK-NEXT: call void @witness(i1 [[CMP1]], i1 [[CMP2]])
; CHECK-NEXT: ret void
;
%m = call ptr @malloc(i64 4)
%rhs = inttoptr i64 2048 to ptr
%rhs2 = call ptr @hidden_inttoptr()
%cmp1 = icmp eq ptr %m, %rhs
%cmp2 = icmp eq ptr %m, %rhs2
call void @witness(i1 %cmp1, i1 %cmp2)
ret void
}
define void @neg_consistent_fold2() {
; CHECK-LABEL: @neg_consistent_fold2(
; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
; CHECK-NEXT: [[N:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
; CHECK-NEXT: [[RHS:%.*]] = getelementptr i8, ptr [[N]], i64 4
; CHECK-NEXT: [[RHS2:%.*]] = call ptr @hidden_offset(ptr [[N]])
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq ptr [[M]], [[RHS]]
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq ptr [[M]], [[RHS2]]
; CHECK-NEXT: call void @witness(i1 [[CMP1]], i1 [[CMP2]])
; CHECK-NEXT: ret void
;
%m = call ptr @malloc(i64 4)
%n = call ptr @malloc(i64 4)
%rhs = getelementptr i8, ptr %n, i32 4
%rhs2 = call ptr @hidden_offset(ptr %n)
%cmp1 = icmp eq ptr %m, %rhs
%cmp2 = icmp eq ptr %m, %rhs2
call void @witness(i1 %cmp1, i1 %cmp2)
ret void
}
define void @neg_consistent_fold3() {
; CHECK-LABEL: @neg_consistent_fold3(
; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
; CHECK-NEXT: [[LGP:%.*]] = load ptr, ptr @gp, align 8
; CHECK-NEXT: [[RHS2:%.*]] = call ptr @hidden_inttoptr()
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq ptr [[M]], [[LGP]]
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq ptr [[M]], [[RHS2]]
; CHECK-NEXT: call void @witness(i1 [[CMP1]], i1 [[CMP2]])
; CHECK-NEXT: ret void
;
%m = call ptr @malloc(i64 4)
%lgp = load ptr, ptr @gp, align 8
%rhs2 = call ptr @hidden_inttoptr()
%cmp1 = icmp eq ptr %m, %lgp
%cmp2 = icmp eq ptr %m, %rhs2
call void @witness(i1 %cmp1, i1 %cmp2)
ret void
}
; FIXME: This appears correct, but the current implementation relies
; on visiting both cmps in the same pass. We may have an simplification order
; under which one is missed, and that would be a bug.
define void @neg_consistent_fold4() {
; CHECK-LABEL: @neg_consistent_fold4(
; CHECK-NEXT: call void @witness(i1 false, i1 false)
; CHECK-NEXT: ret void
;
%m = call ptr @malloc(i64 4)
%lgp = load ptr, ptr @gp, align 8
%cmp1 = icmp eq ptr %m, %lgp
%cmp2 = icmp eq ptr %m, %lgp
call void @witness(i1 %cmp1, i1 %cmp2)
ret void
}
declare void @unknown(ptr)
; Points out that a nocapture call can't cause a consistent result issue
; as it is (by assumption) not able to contain a comparison which might
; capture the address.
define i1 @consistent_nocapture_inttoptr() {
; CHECK-LABEL: @consistent_nocapture_inttoptr(
; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
; CHECK-NEXT: call void @unknown(ptr nocapture [[M]])
; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[M]], inttoptr (i64 2048 to ptr)
; CHECK-NEXT: ret i1 [[CMP]]
;
%m = call ptr @malloc(i64 4)
call void @unknown(ptr nocapture %m)
%rhs = inttoptr i64 2048 to ptr
%cmp = icmp eq ptr %m, %rhs
ret i1 %cmp
}
define i1 @consistent_nocapture_offset() {
; CHECK-LABEL: @consistent_nocapture_offset(
; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
; CHECK-NEXT: call void @unknown(ptr nocapture [[M]])
; CHECK-NEXT: ret i1 false
;
%m = call ptr @malloc(i64 4)
call void @unknown(ptr nocapture %m)
%n = call ptr @malloc(i64 4)
%rhs = getelementptr i8, ptr %n, i32 4
%cmp = icmp eq ptr %m, %rhs
ret i1 %cmp
}
define i1 @consistent_nocapture_through_global() {
; CHECK-LABEL: @consistent_nocapture_through_global(
; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4)
; CHECK-NEXT: call void @unknown(ptr nocapture [[M]])
; CHECK-NEXT: ret i1 false
;
%m = call ptr @malloc(i64 4)
call void @unknown(ptr nocapture %m)
%lgp = load ptr, ptr @gp, align 8, !nonnull !0
%cmp = icmp eq ptr %m, %lgp
ret i1 %cmp
}
; End consistent heap layout tests
; We can fold this by assuming a single heap layout
define i1 @two_nonnull_mallocs() {
; CHECK-LABEL: @two_nonnull_mallocs(
; CHECK-NEXT: ret i1 false
;
%m = call nonnull ptr @malloc(i64 4)
%n = call nonnull ptr @malloc(i64 4)
%cmp = icmp eq ptr %m, %n
ret i1 %cmp
}
; The address of %n is captured, but %m can be arranged to make
; the comparison non-equal.
define i1 @two_nonnull_mallocs2() {
; CHECK-LABEL: @two_nonnull_mallocs2(
; CHECK-NEXT: [[N:%.*]] = call nonnull dereferenceable(4) ptr @malloc(i64 4)
; CHECK-NEXT: call void @unknown(ptr nonnull [[N]])
; CHECK-NEXT: ret i1 false
;
%m = call nonnull ptr @malloc(i64 4)
%n = call nonnull ptr @malloc(i64 4)
call void @unknown(ptr %n)
%cmp = icmp eq ptr %m, %n
ret i1 %cmp
}
; TODO: We can fold this, but don't with the current scheme.
define i1 @two_nonnull_mallocs_hidden() {
; CHECK-LABEL: @two_nonnull_mallocs_hidden(
; CHECK-NEXT: [[M:%.*]] = call nonnull dereferenceable(4) ptr @malloc(i64 4)
; CHECK-NEXT: [[N:%.*]] = call nonnull dereferenceable(4) ptr @malloc(i64 4)
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[M]], i64 1
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[N]], i64 2
; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[GEP1]], [[GEP2]]
; CHECK-NEXT: ret i1 [[CMP]]
;
%m = call nonnull ptr @malloc(i64 4)
%n = call nonnull ptr @malloc(i64 4)
%gep1 = getelementptr i8, ptr %m, i32 1
%gep2 = getelementptr i8, ptr %n, i32 2
%cmp = icmp eq ptr %gep1, %gep2
ret i1 %cmp
}
!0 = !{}