;; This test ensures that the logic which assigns calls to stack nodes
;; correctly handles an inlined callsite with stack ids that partially
;; overlap with a trimmed context. In particular when it also partially
;; overlaps with a longer non-trimmed context that doesn't match all of
;; the inlined callsite stack ids.
;; The profile data and call stacks were all manually added, but the code
;; would be structured something like the following (fairly contrived to
;; result in the type of control flow needed to test):
;; void A(bool b) {
;; if (b)
;; // cold: stack ids 6, 2, 8 (trimmed ids 10)
;; // not cold: stack ids 6, 7 (trimmed ids 9, 11)
;; new char[10]; // stack id 6
;; else
;; // not cold: stack ids 1, 2, 8, 3, 4
;; // cold: stack ids 1, 2, 8, 3, 5
;; new char[10]; // stack id 1
;; }
;;
;; void XZ() {
;; A(false); // stack ids 2, 8 (e.g. X inlined into Z)
;; }
;;
;; void XZN() {
;; // This is the tricky one to get right. We want to ensure it gets
;; // correctly correlated with a stack node for the trimmed 6, 2, 8
;; // context shown in A. It should *not* be correlated with the longer
;; // untrimmed 1, 2, 8, 3, 4|5 contexts.
;; A(true); // stack ids 2, 8, 9 (e.g. X inlined into Z inlined into N)
;; }
;;
;; void Y() {
;; A(true); // stack id 7
;; }
;;
;; void M() {
;; XZ(); // stack id 3
;; }
;;
;; int main() {
;; M(); // stack id 4 (leads to not cold allocation)
;; M(); // stack id 5 (leads to cold allocation)
;; XZN(); // stack id 11 (leads to cold allocation)
;; Y(); // stack id 10 (leads to not cold allocation)
;; }
;; -stats requires asserts
; REQUIRES: asserts
; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
; RUN: -memprof-verify-ccg -memprof-verify-nodes \
; RUN: -stats -pass-remarks=memprof-context-disambiguation \
; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=IR \
; RUN: --check-prefix=STATS --check-prefix=REMARKS
; REMARKS: created clone _Z1Ab.memprof.1
; REMARKS: created clone _Z2XZv.memprof.1
; REMARKS: created clone _Z1Mv.memprof.1
;; Make sure the inlined context in _Z3XZNv, which partially overlaps
;; trimmed cold context, and also partially overlaps completely
;; unrelated contexts, correctly calls a cloned version of Z1Ab,
;; which will call the cold annotated allocation.
; REMARKS: call in clone _Z3XZNv assigned to call function clone _Z1Ab.memprof.1
; REMARKS: call in clone main assigned to call function clone _Z1Mv.memprof.1
; REMARKS: call in clone _Z1Mv.memprof.1 assigned to call function clone _Z2XZv.memprof.1
; REMARKS: call in clone _Z2XZv.memprof.1 assigned to call function clone _Z1Ab
; REMARKS: call in clone main assigned to call function clone _Z1Mv
; REMARKS: call in clone _Z1Mv assigned to call function clone _Z2XZv
; REMARKS: call in clone _Z2XZv assigned to call function clone _Z1Ab.memprof.1
; REMARKS: call in clone _Z1Ab.memprof.1 marked with memprof allocation attribute cold
; REMARKS: call in clone _Z1Yv assigned to call function clone _Z1Ab
; REMARKS: call in clone _Z1Ab marked with memprof allocation attribute notcold
; REMARKS: call in clone _Z1Ab marked with memprof allocation attribute cold
; REMARKS: call in clone _Z1Ab.memprof.1 marked with memprof allocation attribute notcold
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define dso_local void @_Z1Ab(i1 noundef zeroext %b) {
entry:
br i1 %b, label %if.then, label %if.else
if.then:
%call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !5, !callsite !11
br label %if.end
if.else:
%call2 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !10
br label %if.end
if.end:
ret void
}
; Function Attrs: nobuiltin
declare ptr @_Znam(i64) #0
define dso_local void @_Z2XZv() local_unnamed_addr #0 {
entry:
tail call void @_Z1Ab(i1 noundef zeroext false), !callsite !12
ret void
}
define dso_local void @_Z1Mv() local_unnamed_addr #0 {
entry:
tail call void @_Z2XZv(), !callsite !19
ret void
}
define dso_local void @_Z3XZNv() local_unnamed_addr {
entry:
tail call void @_Z1Ab(i1 noundef zeroext true), !callsite !15
ret void
}
define dso_local void @_Z1Yv() local_unnamed_addr {
entry:
tail call void @_Z1Ab(i1 noundef zeroext true), !callsite !17
ret void
}
define dso_local noundef i32 @main() local_unnamed_addr {
entry:
tail call void @_Z1Mv(), !callsite !13 ;; Not cold context
tail call void @_Z1Mv(), !callsite !14 ;; Cold context
tail call void @_Z3XZNv(), !callsite !16 ;; Cold context
tail call void @_Z1Yv(), !callsite !18 ;; Not cold context
ret i32 0
}
attributes #0 = { nobuiltin }
attributes #7 = { builtin }
!0 = !{!1, !3}
;; Not cold context via first call to _Z1Mv in main
!1 = !{!2, !"notcold"}
!2 = !{i64 1, i64 2, i64 8, i64 3, i64 4}
;; Cold context via second call to _Z1Mv in main
!3 = !{!4, !"cold"}
!4 = !{i64 1, i64 2, i64 8, i64 3, i64 5}
!5 = !{!6, !8}
;; Cold (trimmed) context via call to _Z3XZNv in main
!6 = !{!7, !"cold"}
!7 = !{i64 6, i64 2, i64 8}
;; Not cold (trimmed) context via call to _Z1Yv in main
!8 = !{!9, !"notcold"}
!9 = !{i64 6, i64 7}
!10 = !{i64 1}
!11 = !{i64 6}
!12 = !{i64 2, i64 8}
!13 = !{i64 4}
!14 = !{i64 5}
;; Inlined context in _Z3XZNv, which includes part of trimmed cold context
!15 = !{i64 2, i64 8, i64 9}
!16 = !{i64 11}
!17 = !{i64 7}
!18 = !{i64 10}
!19 = !{i64 3}
; IR: define {{.*}} @_Z1Ab(i1 noundef zeroext %b)
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]]
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]]
; IR: define {{.*}} @_Z2XZv()
; IR: call {{.*}} @_Z1Ab.memprof.1(i1 noundef zeroext false)
; IR: define {{.*}} @_Z1Mv()
; IR: call {{.*}} @_Z2XZv()
;; Make sure the inlined context in _Z3XZNv, which partially overlaps
;; trimmed cold context, and also partially overlaps completely
;; unrelated contexts, correctly calls the cloned version of Z1Ab
;; that will call the cold annotated allocation.
; IR: define {{.*}} @_Z3XZNv()
; IR: call {{.*}} @_Z1Ab.memprof.1(i1 noundef zeroext true)
; IR: define {{.*}} @_Z1Yv()
; IR: call {{.*}} @_Z1Ab(i1 noundef zeroext true)
; IR: define {{.*}} @main()
; IR: call {{.*}} @_Z1Mv()
; IR: call {{.*}} @_Z1Mv.memprof.1()
; IR: call {{.*}} @_Z3XZNv()
; IR: call {{.*}} @_Z1Yv()
; IR: define {{.*}} @_Z1Ab.memprof.1(i1 noundef zeroext %b)
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD]]
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]]
; IR: define {{.*}} @_Z2XZv.memprof.1()
; IR: call {{.*}} @_Z1Ab(i1 noundef zeroext false)
; IR: define {{.*}} @_Z1Mv.memprof.1()
; IR: call {{.*}} @_Z2XZv.memprof.1()
; IR: attributes #[[NOTCOLD]] = { builtin "memprof"="notcold" }
; IR: attributes #[[COLD]] = { builtin "memprof"="cold" }
; STATS: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
; STATS: 2 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis