; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes OPT
; RUN: llc < %s -mcpu=sm_70 --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes PTX
define void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %input2, ptr %out, i32 %n) {
; PTX-LABEL: grid_const_int(
; PTX-NOT: ld.u32
; PTX: ld.param.{{.*}} [[R2:%.*]], [grid_const_int_param_0];
;
; OPT-LABEL: define void @grid_const_int(
; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) {
; OPT-NOT: alloca
; OPT: [[INPUT11:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
; OPT: [[TMP:%.*]] = load i32, ptr addrspace(101) [[INPUT11]], align 4
;
%tmp = load i32, ptr %input1, align 4
%add = add i32 %tmp, %input2
store i32 %add, ptr %out
ret void
}
%struct.s = type { i32, i32 }
define void @grid_const_struct(ptr byval(%struct.s) align 4 %input, ptr %out){
; PTX-LABEL: grid_const_struct(
; PTX: {
; PTX-NOT: ld.u32
; PTX: ld.param.{{.*}} [[R1:%.*]], [grid_const_struct_param_0];
; PTX: ld.param.{{.*}} [[R2:%.*]], [grid_const_struct_param_0+4];
;
; OPT-LABEL: define void @grid_const_struct(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[OUT:%.*]]) {
; OPT-NOT: alloca
; OPT: [[INPUT1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT: [[GEP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 0
; OPT: [[GEP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 1
; OPT: [[TMP1:%.*]] = load i32, ptr addrspace(101) [[GEP13]], align 4
; OPT: [[TMP2:%.*]] = load i32, ptr addrspace(101) [[GEP22]], align 4
;
%gep1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
%gep2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
%int1 = load i32, ptr %gep1
%int2 = load i32, ptr %gep2
%add = add i32 %int1, %int2
store i32 %add, ptr %out
ret void
}
define void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
; PTX-LABEL: grid_const_escape(
; PTX: {
; PTX-NOT: .local
; PTX: cvta.param.{{.*}}
; OPT-LABEL: define void @grid_const_escape(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]]) {
; OPT-NOT: alloca [[STRUCT_S]]
; OPT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT: [[INPUT_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
; OPT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT_PARAM_GEN]])
;
%call = call i32 @escape(ptr %input)
ret void
}
define void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 %input, i32 %a, ptr byval(i32) align 4 %b) {
; PTX-LABEL: multiple_grid_const_escape(
; PTX: mov.{{.*}} [[RD1:%.*]], multiple_grid_const_escape_param_0;
; PTX: mov.{{.*}} [[RD2:%.*]], multiple_grid_const_escape_param_2;
; PTX: mov.{{.*}} [[RD3:%.*]], [[RD2]];
; PTX: mov.{{.*}} [[RD4:%.*]], [[RD1]];
; PTX: cvta.param.{{.*}} [[RD5:%.*]], [[RD4]];
; PTX: cvta.param.{{.*}} [[RD6:%.*]], [[RD3]];
; PTX: {
; PTX: st.param.b64 [param0+0], [[RD5]];
; PTX: st.param.b64 [param2+0], [[RD6]];
;
; OPT-LABEL: define void @multiple_grid_const_escape(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], i32 [[A:%.*]], ptr byval(i32) align 4 [[B:%.*]]) {
; OPT: [[B_PARAM:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(101)
; OPT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT-NOT: alloca %struct.s
; OPT: [[A_ADDR:%.*]] = alloca i32, align 4
; OPT: [[INPUT_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
; OPT: [[B_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[B_PARAM]])
; OPT-NEXT: [[CALL:%.*]] = call i32 @escape3(ptr [[INPUT_PARAM_GEN]], ptr [[A_ADDR]], ptr [[B_PARAM_GEN]])
;
%a.addr = alloca i32, align 4
store i32 %a, ptr %a.addr, align 4
%call = call i32 @escape3(ptr %input, ptr %a.addr, ptr %b)
ret void
}
define void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %input, ptr %addr) {
; PTX-LABEL: grid_const_memory_escape(
; PTX-NOT: .local
; PTX: mov.b64 [[RD1:%.*]], grid_const_memory_escape_param_0;
; PTX: cvta.param.u64 [[RD3:%.*]], [[RD2:%.*]];
; PTX: st.global.u64 [[[RD4:%.*]]], [[RD3]];
;
; OPT-LABEL: define void @grid_const_memory_escape(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[ADDR:%.*]]) {
; OPT-NOT: alloca [[STRUCT_S]]
; OPT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT: [[INPUT_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
; OPT: store ptr [[INPUT_PARAM_GEN]], ptr {{.*}}, align 8
;
store ptr %input, ptr %addr, align 8
ret void
}
define void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4 %input, ptr %result) {
; PTX-LABEL: grid_const_inlineasm_escape(
; PTX-NOT .local
; PTX: add.{{.*}} [[RD2:%.*]], [[RD1:%.*]], 4;
; PTX: cvta.param.u64 [[RD4:%.*]], [[RD2]]
; PTX: cvta.param.u64 [[RD3:%.*]], [[RD1]]
; PTX: add.s64 [[RD5:%.*]], [[RD3]], [[RD4]];
;
; OPT-LABEL: define void @grid_const_inlineasm_escape(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[RESULT:%.*]]) {
; OPT-NOT: alloca [[STRUCT_S]]
; OPT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT: [[TMPPTR13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT_PARAM]], i32 0, i32 0
; OPT: [[TMPPTR22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT_PARAM]], i32 0, i32 1
; OPT: [[TMPPTR22_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[TMPPTR22]])
; OPT: [[TMPPTR13_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[TMPPTR13]])
; OPT: [[TMP2:%.*]] = call i64 asm "add.s64 $0, $1, $2
;
%tmpptr1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
%tmpptr2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
%1 = call i64 asm "add.s64 $0, $1, $2;", "=l,l,l"(ptr %tmpptr1, ptr %tmpptr2) #1
store i64 %1, ptr %result, align 8
ret void
}
define void @grid_const_partial_escape(ptr byval(i32) %input, ptr %output) {
; PTX-LABEL: grid_const_partial_escape(
; PTX-NOT: .local
; PTX: ld.param.{{.*}} [[R1:%.*]], [grid_const_partial_escape_param_0];
; PTX: add.{{.*}}
; PTX: cvta.param.u64 [[RD3:%.*]], {{%.*}}
; PTX: st.param.{{.*}} [param0+0], [[RD3]]
; PTX: call
;
; OPT-LABEL: define void @grid_const_partial_escape(
; OPT-SAME: ptr byval(i32) align 4 [[INPUT:%.*]], ptr {{%.*}}) {
; OPT-NOT: alloca
; OPT: [[INPUT1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT: [[VAL:%.*]] = load i32, ptr addrspace(101) [[INPUT1]], align 4
; OPT: [[TWICE:%.*]] = add i32 [[VAL]], [[VAL]]
; OPT: store i32 [[TWICE]]
; OPT: [[INPUT1_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1]])
; OPT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT1_GEN]])
; OPT: ret void
;
%val = load i32, ptr %input
%twice = add i32 %val, %val
store i32 %twice, ptr %output
%call = call i32 @escape(ptr %input)
ret void
}
define i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ptr %output) {
; PTX-LABEL: grid_const_partial_escapemem(
; PTX: {
; PTX: ld.param.{{.*}} [[R1:%.*]], [grid_const_partial_escapemem_param_0];
; PTX: ld.param.{{.*}} [[R2:%.*]], [grid_const_partial_escapemem_param_0+4];
; PTX: cvta.param.{{.*}} [[RD5:%.*]], {{%.*}};
; PTX: st.global.{{.*}} [{{.*}}], [[RD5]];
; PTX: add.s32 [[R3:%.*]], [[R1]], [[R2]]
; PTX: st.param.{{.*}} [param0+0], [[RD5]]
; PTX: escape
; OPT-LABEL: define i32 @grid_const_partial_escapemem(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr {{%.*}}) {
; OPT-NOT: alloca
; OPT: [[INPUT2:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT: [[PTR13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT2]], i32 0, i32 0
; OPT: [[VAL1:%.*]] = load i32, ptr addrspace(101) [[PTR13]], align 4
; OPT: [[PTR22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT2]], i32 0, i32 1
; OPT: [[VAL2:%.*]] = load i32, ptr addrspace(101) [[PTR22]], align 4
; OPT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2]])
; OPT: store ptr [[INPUT1]]
; OPT: [[ADD:%.*]] = add i32 [[VAL1]], [[VAL2]]
; OPT: [[PTR1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[PTR13]])
; OPT: [[CALL2:%.*]] = call i32 @escape(ptr [[PTR1]])
;
%ptr1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
%val1 = load i32, ptr %ptr1
%ptr2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
%val2 = load i32, ptr %ptr2
store ptr %input, ptr %output
%add = add i32 %val1, %val2
%call2 = call i32 @escape(ptr %ptr1)
ret i32 %add
}
define void @grid_const_phi_escape(ptr byval(%struct.s) align 4 %input1, ptr %inout) {
; PTX-LABEL: grid_const_phi_escape(
; PTX: cvta.param.{{.*}} [[RD1:%.*]], {{.*}}
; PTX: @[[P1:%.*]] bra $L__BB[[TARGET_LABEL:[_0-9]+]];
; PTX: $L__BB[[TARGET_LABEL]]:
; PTX: ld.{{.*}} [[R1:%.*]], [[[RD1]]];
;
; OPT-LABEL: define void @grid_const_phi_escape(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr {{%.*}}) {
; OPT: [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
; OPT: [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]])
; OPT: br i1 {{.*}}, label %[[FIRST:.*]], label %[[SECOND:.*]]
; OPT: [[FIRST]]:
; OPT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1_PARAM_GEN]], i32 0, i32 0
; OPT: br label %[[MERGE:.*]]
; OPT: [[SECOND]]:
; OPT: [[PTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1_PARAM_GEN]], i32 0, i32 1
; OPT: br label %[[MERGE]]
; OPT: [[MERGE]]:
; OPT: [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ]
; OPT-NOT: load i32, ptr addrspace(101)
; OPT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
;
%val = load i32, ptr %inout
%less = icmp slt i32 %val, 0
br i1 %less, label %first, label %second
first:
%ptr1 = getelementptr inbounds %struct.s, ptr %input1, i32 0, i32 0
br label %merge
second:
%ptr2 = getelementptr inbounds %struct.s, ptr %input1, i32 0, i32 1
br label %merge
merge:
%ptrnew = phi ptr [%ptr1, %first], [%ptr2, %second]
%valloaded = load i32, ptr %ptrnew
store i32 %valloaded, ptr %inout
ret void
}
; NOTE: %input2 is *not* grid_constant
define void @grid_const_phi_escape2(ptr byval(%struct.s) align 4 %input1, ptr byval(%struct.s) %input2, ptr %inout) {
; PTX-LABEL: grid_const_phi_escape2(
; PTX: ld.param.{{.*}} [[R1:%.*]], [grid_const_phi_escape2_param_1+4];
; PTX: @[[P1:%.*]] bra $L__BB[[LABEL:[_0-9]+]];
; PTX: cvta.param.u64 [[RD1:%.*]], [[RD2:%.*]];
; PTX: ld.u32 [[R1]], [[[RD1]]];
; PTX: $L__BB[[LABEL]]:
; PTX: st.global.u32 [[[RD3:%.*]]], [[R1]]
; OPT-LABEL: define void @grid_const_phi_escape2(
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr {{%.*}}) {
; OPT: [[INPUT24:%.*]] = alloca [[STRUCT_S]], align 8
; OPT: [[INPUT25:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
; OPT: [[INPUT26:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[INPUT25]], align 8
; OPT: store [[STRUCT_S]] [[INPUT26]], ptr [[INPUT24]], align 4
; OPT: [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
; OPT: [[INPUT11:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT12]])
; OPT: br i1 [[LESS:%.*]], label %[[FIRST:.*]], label %[[SECOND:.*]]
; OPT: [[FIRST]]:
; OPT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT11]], i32 0, i32 0
; OPT: br label %[[MERGE:.*]]
; OPT: [[SECOND]]:
; OPT: [[PTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT24]], i32 0, i32 1
; OPT: br label %[[MERGE]]
; OPT: [[MERGE]]:
; OPT: [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ]
;
%val = load i32, ptr %inout
%less = icmp slt i32 %val, 0
br i1 %less, label %first, label %second
first:
%ptr1 = getelementptr inbounds %struct.s, ptr %input1, i32 0, i32 0
br label %merge
second:
%ptr2 = getelementptr inbounds %struct.s, ptr %input2, i32 0, i32 1
br label %merge
merge:
%ptrnew = phi ptr [%ptr1, %first], [%ptr2, %second]
%valloaded = load i32, ptr %ptrnew
store i32 %valloaded, ptr %inout
ret void
}
; NOTE: %input2 is *not* grid_constant
define void @grid_const_select_escape(ptr byval(i32) align 4 %input1, ptr byval(i32) %input2, ptr %inout) {
; PTX-LABEL: grid_const_select_escape(
; PTX: cvta.param.{{.*}} [[RD2:%.*]], [[RD1:%.*]]
; PTX: setp.lt.{{.*}} [[P1:%.*]], {{%.*}}, 0
; PTX: add.{{.*}} [[RD3:%.*]], %SP, 0;
; PTX: selp.{{.*}} [[RD4:%.*]], [[RD2]], [[RD3]], [[P1]];
; PTX: ld.u32 {{%.*}}, [[[RD4]]];
; OPT-LABEL: define void @grid_const_select_escape(
; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) {
; OPT: [[INPUT24:%.*]] = alloca i32, align 4
; OPT: [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
; OPT: [[INPUT11:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT12]])
; OPT: load i32, ptr [[INOUT]]
; OPT: [[PTRNEW:%.*]] = select i1 [[LESS:%.*]], ptr [[INPUT11]], ptr [[INPUT24]]
; OPT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
;
%val = load i32, ptr %inout
%less = icmp slt i32 %val, 0
%ptrnew = select i1 %less, ptr %input1, ptr %input2
%valloaded = load i32, ptr %ptrnew
store i32 %valloaded, ptr %inout
ret void
}
define i32 @grid_const_ptrtoint(ptr byval(i32) %input) {
; PTX-LABEL: grid_const_ptrtoint(
; PTX-NOT: .local
; PTX: ld.param.{{.*}} {{%.*}}, [grid_const_ptrtoint_param_0];
; PTX: cvta.param.u64 [[RD1:%.*]], {{%.*}}
; PTX: cvt.u32.u64 {{%.*}}, [[RD1]]
; OPT-LABEL: define i32 @grid_const_ptrtoint(
; OPT-SAME: ptr byval(i32) align 4 [[INPUT:%.*]]) {
; OPT: [[INPUT2:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
; OPT: [[VAL:%.*]] = load i32, ptr addrspace(101) [[INPUT2]]
; OPT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2]])
; OPT: [[PTRVAL:%.*]] = ptrtoint ptr [[INPUT1]] to i32
%val = load i32, ptr %input
%ptrval = ptrtoint ptr %input to i32
%keepalive = add i32 %val, %ptrval
ret i32 %keepalive
}
declare dso_local void @dummy() local_unnamed_addr
declare dso_local ptr @escape(ptr) local_unnamed_addr
declare dso_local ptr @escape3(ptr, ptr, ptr) local_unnamed_addr
!nvvm.annotations = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23}
!0 = !{ptr @grid_const_int, !"kernel", i32 1, !"grid_constant", !1}
!1 = !{i32 1}
!2 = !{ptr @grid_const_struct, !"kernel", i32 1, !"grid_constant", !3}
!3 = !{i32 1}
!4 = !{ptr @grid_const_escape, !"kernel", i32 1, !"grid_constant", !5}
!5 = !{i32 1}
!6 = !{ptr @multiple_grid_const_escape, !"kernel", i32 1, !"grid_constant", !7}
!7 = !{i32 1, i32 3}
!8 = !{ptr @grid_const_memory_escape, !"kernel", i32 1, !"grid_constant", !9}
!9 = !{i32 1}
!10 = !{ptr @grid_const_inlineasm_escape, !"kernel", i32 1, !"grid_constant", !11}
!11 = !{i32 1}
!12 = !{ptr @grid_const_partial_escape, !"kernel", i32 1, !"grid_constant", !13}
!13 = !{i32 1}
!14 = !{ptr @grid_const_partial_escapemem, !"kernel", i32 1, !"grid_constant", !15}
!15 = !{i32 1}
!16 = !{ptr @grid_const_phi_escape, !"kernel", i32 1, !"grid_constant", !17}
!17 = !{i32 1}
!18 = !{ptr @grid_const_phi_escape2, !"kernel", i32 1, !"grid_constant", !19}
!19 = !{i32 1}
!20 = !{ptr @grid_const_select_escape, !"kernel", i32 1, !"grid_constant", !21}
!21 = !{i32 1}
!22 = !{ptr @grid_const_ptrtoint, !"kernel", i32 1, !"grid_constant", !23}
!23 = !{i32 1}