; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx64-- -march=nvptx64 -mcpu=sm_52 -mattr=+ptx64 < %s | FileCheck %s --check-prefix=CHECK-PTX
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-- -march=nvptx64 -mcpu=sm_52 -mattr=+ptx64 | %ptxas-verify %}
%struct.S1 = type { i32, i8, i64 }
%struct.S2 = type { i64, i64 }
@__const.bar.s1 = private unnamed_addr constant %struct.S1 { i32 1, i8 1, i64 1 }, align 8
@__const.qux.s = private unnamed_addr constant %struct.S2 { i64 1, i64 1 }, align 8
define dso_local i32 @variadics1(i32 noundef %first, ...) {
; CHECK-PTX-LABEL: variadics1(
; CHECK-PTX: {
; CHECK-PTX-NEXT: .reg .b32 %r<11>;
; CHECK-PTX-NEXT: .reg .b64 %rd<11>;
; CHECK-PTX-NEXT: .reg .f64 %fd<7>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
; CHECK-PTX-NEXT: ld.param.u32 %r1, [variadics1_param_0];
; CHECK-PTX-NEXT: ld.param.u64 %rd1, [variadics1_param_1];
; CHECK-PTX-NEXT: ld.u32 %r2, [%rd1];
; CHECK-PTX-NEXT: add.s32 %r3, %r1, %r2;
; CHECK-PTX-NEXT: ld.u32 %r4, [%rd1+4];
; CHECK-PTX-NEXT: add.s32 %r5, %r3, %r4;
; CHECK-PTX-NEXT: ld.u32 %r6, [%rd1+8];
; CHECK-PTX-NEXT: add.s32 %r7, %r5, %r6;
; CHECK-PTX-NEXT: add.s64 %rd2, %rd1, 19;
; CHECK-PTX-NEXT: and.b64 %rd3, %rd2, -8;
; CHECK-PTX-NEXT: ld.u64 %rd4, [%rd3];
; CHECK-PTX-NEXT: cvt.u64.u32 %rd5, %r7;
; CHECK-PTX-NEXT: add.s64 %rd6, %rd5, %rd4;
; CHECK-PTX-NEXT: cvt.u32.u64 %r8, %rd6;
; CHECK-PTX-NEXT: add.s64 %rd7, %rd3, 15;
; CHECK-PTX-NEXT: and.b64 %rd8, %rd7, -8;
; CHECK-PTX-NEXT: ld.f64 %fd1, [%rd8];
; CHECK-PTX-NEXT: cvt.rn.f64.s32 %fd2, %r8;
; CHECK-PTX-NEXT: add.rn.f64 %fd3, %fd2, %fd1;
; CHECK-PTX-NEXT: cvt.rzi.s32.f64 %r9, %fd3;
; CHECK-PTX-NEXT: add.s64 %rd9, %rd8, 15;
; CHECK-PTX-NEXT: and.b64 %rd10, %rd9, -8;
; CHECK-PTX-NEXT: ld.f64 %fd4, [%rd10];
; CHECK-PTX-NEXT: cvt.rn.f64.s32 %fd5, %r9;
; CHECK-PTX-NEXT: add.rn.f64 %fd6, %fd5, %fd4;
; CHECK-PTX-NEXT: cvt.rzi.s32.f64 %r10, %fd6;
; CHECK-PTX-NEXT: st.param.b32 [func_retval0+0], %r10;
; CHECK-PTX-NEXT: ret;
entry:
%vlist = alloca ptr, align 8
call void @llvm.va_start.p0(ptr %vlist)
%argp.cur = load ptr, ptr %vlist, align 8
%argp.next = getelementptr inbounds i8, ptr %argp.cur, i64 4
store ptr %argp.next, ptr %vlist, align 8
%0 = load i32, ptr %argp.cur, align 4
%add = add nsw i32 %first, %0
%argp.cur1 = load ptr, ptr %vlist, align 8
%argp.next2 = getelementptr inbounds i8, ptr %argp.cur1, i64 4
store ptr %argp.next2, ptr %vlist, align 8
%1 = load i32, ptr %argp.cur1, align 4
%add3 = add nsw i32 %add, %1
%argp.cur4 = load ptr, ptr %vlist, align 8
%argp.next5 = getelementptr inbounds i8, ptr %argp.cur4, i64 4
store ptr %argp.next5, ptr %vlist, align 8
%2 = load i32, ptr %argp.cur4, align 4
%add6 = add nsw i32 %add3, %2
%argp.cur7 = load ptr, ptr %vlist, align 8
%3 = getelementptr inbounds i8, ptr %argp.cur7, i32 7
%argp.cur7.aligned = call ptr @llvm.ptrmask.p0.i64(ptr %3, i64 -8)
%argp.next8 = getelementptr inbounds i8, ptr %argp.cur7.aligned, i64 8
store ptr %argp.next8, ptr %vlist, align 8
%4 = load i64, ptr %argp.cur7.aligned, align 8
%conv = sext i32 %add6 to i64
%add9 = add nsw i64 %conv, %4
%conv10 = trunc i64 %add9 to i32
%argp.cur11 = load ptr, ptr %vlist, align 8
%5 = getelementptr inbounds i8, ptr %argp.cur11, i32 7
%argp.cur11.aligned = call ptr @llvm.ptrmask.p0.i64(ptr %5, i64 -8)
%argp.next12 = getelementptr inbounds i8, ptr %argp.cur11.aligned, i64 8
store ptr %argp.next12, ptr %vlist, align 8
%6 = load double, ptr %argp.cur11.aligned, align 8
%conv13 = sitofp i32 %conv10 to double
%add14 = fadd double %conv13, %6
%conv15 = fptosi double %add14 to i32
%argp.cur16 = load ptr, ptr %vlist, align 8
%7 = getelementptr inbounds i8, ptr %argp.cur16, i32 7
%argp.cur16.aligned = call ptr @llvm.ptrmask.p0.i64(ptr %7, i64 -8)
%argp.next17 = getelementptr inbounds i8, ptr %argp.cur16.aligned, i64 8
store ptr %argp.next17, ptr %vlist, align 8
%8 = load double, ptr %argp.cur16.aligned, align 8
%conv18 = sitofp i32 %conv15 to double
%add19 = fadd double %conv18, %8
%conv20 = fptosi double %add19 to i32
call void @llvm.va_end.p0(ptr %vlist)
ret i32 %conv20
}
declare void @llvm.va_start.p0(ptr)
declare ptr @llvm.ptrmask.p0.i64(ptr, i64)
declare void @llvm.va_end.p0(ptr)
define dso_local i32 @foo() {
; CHECK-PTX-LABEL: foo(
; CHECK-PTX: {
; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot1[40];
; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
; CHECK-PTX-NEXT: .reg .b32 %r<4>;
; CHECK-PTX-NEXT: .reg .b64 %rd<5>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot1;
; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-PTX-NEXT: mov.u64 %rd1, 4294967297;
; CHECK-PTX-NEXT: st.u64 [%SP+0], %rd1;
; CHECK-PTX-NEXT: mov.b32 %r1, 1;
; CHECK-PTX-NEXT: st.u32 [%SP+8], %r1;
; CHECK-PTX-NEXT: mov.u64 %rd2, 1;
; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd2;
; CHECK-PTX-NEXT: mov.u64 %rd3, 4607182418800017408;
; CHECK-PTX-NEXT: st.u64 [%SP+24], %rd3;
; CHECK-PTX-NEXT: st.u64 [%SP+32], %rd3;
; CHECK-PTX-NEXT: add.u64 %rd4, %SP, 0;
; CHECK-PTX-NEXT: { // callseq 0, 0
; CHECK-PTX-NEXT: .param .b32 param0;
; CHECK-PTX-NEXT: st.param.b32 [param0+0], 1;
; CHECK-PTX-NEXT: .param .b64 param1;
; CHECK-PTX-NEXT: st.param.b64 [param1+0], %rd4;
; CHECK-PTX-NEXT: .param .b32 retval0;
; CHECK-PTX-NEXT: call.uni (retval0),
; CHECK-PTX-NEXT: variadics1,
; CHECK-PTX-NEXT: (
; CHECK-PTX-NEXT: param0,
; CHECK-PTX-NEXT: param1
; CHECK-PTX-NEXT: );
; CHECK-PTX-NEXT: ld.param.b32 %r2, [retval0+0];
; CHECK-PTX-NEXT: } // callseq 0
; CHECK-PTX-NEXT: st.param.b32 [func_retval0+0], %r2;
; CHECK-PTX-NEXT: ret;
entry:
%conv = sext i8 1 to i32
%conv1 = sext i16 1 to i32
%conv2 = fpext float 1.000000e+00 to double
%call = call i32 (i32, ...) @variadics1(i32 noundef 1, i32 noundef %conv, i32 noundef %conv1, i32 noundef 1, i64 noundef 1, double noundef %conv2, double noundef 1.000000e+00)
ret i32 %call
}
define dso_local i32 @variadics2(i32 noundef %first, ...) {
; CHECK-PTX-LABEL: variadics2(
; CHECK-PTX: {
; CHECK-PTX-NEXT: .local .align 2 .b8 __local_depot2[4];
; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
; CHECK-PTX-NEXT: .reg .b16 %rs<6>;
; CHECK-PTX-NEXT: .reg .b32 %r<7>;
; CHECK-PTX-NEXT: .reg .b64 %rd<11>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot2;
; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-PTX-NEXT: ld.param.u32 %r1, [variadics2_param_0];
; CHECK-PTX-NEXT: ld.param.u64 %rd1, [variadics2_param_1];
; CHECK-PTX-NEXT: add.s64 %rd2, %rd1, 7;
; CHECK-PTX-NEXT: and.b64 %rd3, %rd2, -8;
; CHECK-PTX-NEXT: ld.u32 %r2, [%rd3];
; CHECK-PTX-NEXT: or.b64 %rd4, %rd3, 4;
; CHECK-PTX-NEXT: ld.s8 %r3, [%rd4];
; CHECK-PTX-NEXT: or.b64 %rd5, %rd3, 5;
; CHECK-PTX-NEXT: or.b64 %rd6, %rd3, 7;
; CHECK-PTX-NEXT: ld.u8 %rs1, [%rd6];
; CHECK-PTX-NEXT: st.u8 [%SP+2], %rs1;
; CHECK-PTX-NEXT: ld.u8 %rs2, [%rd5];
; CHECK-PTX-NEXT: or.b64 %rd7, %rd3, 6;
; CHECK-PTX-NEXT: ld.u8 %rs3, [%rd7];
; CHECK-PTX-NEXT: shl.b16 %rs4, %rs3, 8;
; CHECK-PTX-NEXT: or.b16 %rs5, %rs4, %rs2;
; CHECK-PTX-NEXT: st.u16 [%SP+0], %rs5;
; CHECK-PTX-NEXT: ld.u64 %rd8, [%rd3+8];
; CHECK-PTX-NEXT: add.s32 %r4, %r1, %r2;
; CHECK-PTX-NEXT: add.s32 %r5, %r4, %r3;
; CHECK-PTX-NEXT: cvt.u64.u32 %rd9, %r5;
; CHECK-PTX-NEXT: add.s64 %rd10, %rd9, %rd8;
; CHECK-PTX-NEXT: cvt.u32.u64 %r6, %rd10;
; CHECK-PTX-NEXT: st.param.b32 [func_retval0+0], %r6;
; CHECK-PTX-NEXT: ret;
entry:
%vlist = alloca ptr, align 8
%s1.sroa.3 = alloca [3 x i8], align 1
call void @llvm.va_start.p0(ptr %vlist)
%argp.cur = load ptr, ptr %vlist, align 8
%0 = getelementptr inbounds i8, ptr %argp.cur, i32 7
%argp.cur.aligned = call ptr @llvm.ptrmask.p0.i64(ptr %0, i64 -8)
%argp.next = getelementptr inbounds i8, ptr %argp.cur.aligned, i64 16
store ptr %argp.next, ptr %vlist, align 8
%s1.sroa.0.0.copyload = load i32, ptr %argp.cur.aligned, align 8
%s1.sroa.2.0.argp.cur.aligned.sroa_idx = getelementptr inbounds i8, ptr %argp.cur.aligned, i64 4
%s1.sroa.2.0.copyload = load i8, ptr %s1.sroa.2.0.argp.cur.aligned.sroa_idx, align 4
%s1.sroa.3.0.argp.cur.aligned.sroa_idx = getelementptr inbounds i8, ptr %argp.cur.aligned, i64 5
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %s1.sroa.3, ptr align 1 %s1.sroa.3.0.argp.cur.aligned.sroa_idx, i64 3, i1 false)
%s1.sroa.31.0.argp.cur.aligned.sroa_idx = getelementptr inbounds i8, ptr %argp.cur.aligned, i64 8
%s1.sroa.31.0.copyload = load i64, ptr %s1.sroa.31.0.argp.cur.aligned.sroa_idx, align 8
%add = add nsw i32 %first, %s1.sroa.0.0.copyload
%conv = sext i8 %s1.sroa.2.0.copyload to i32
%add1 = add nsw i32 %add, %conv
%conv2 = sext i32 %add1 to i64
%add3 = add nsw i64 %conv2, %s1.sroa.31.0.copyload
%conv4 = trunc i64 %add3 to i32
call void @llvm.va_end.p0(ptr %vlist)
ret i32 %conv4
}
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
define dso_local i32 @bar() {
; CHECK-PTX-LABEL: bar(
; CHECK-PTX: {
; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot3[24];
; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
; CHECK-PTX-NEXT: .reg .b16 %rs<10>;
; CHECK-PTX-NEXT: .reg .b32 %r<4>;
; CHECK-PTX-NEXT: .reg .b64 %rd<8>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot3;
; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-PTX-NEXT: mov.u64 %rd1, __const_$_bar_$_s1;
; CHECK-PTX-NEXT: add.s64 %rd2, %rd1, 7;
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs1, [%rd2];
; CHECK-PTX-NEXT: cvt.u16.u8 %rs2, %rs1;
; CHECK-PTX-NEXT: st.u8 [%SP+2], %rs2;
; CHECK-PTX-NEXT: add.s64 %rd3, %rd1, 5;
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs3, [%rd3];
; CHECK-PTX-NEXT: cvt.u16.u8 %rs4, %rs3;
; CHECK-PTX-NEXT: add.s64 %rd4, %rd1, 6;
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs5, [%rd4];
; CHECK-PTX-NEXT: cvt.u16.u8 %rs6, %rs5;
; CHECK-PTX-NEXT: shl.b16 %rs7, %rs6, 8;
; CHECK-PTX-NEXT: or.b16 %rs8, %rs7, %rs4;
; CHECK-PTX-NEXT: st.u16 [%SP+0], %rs8;
; CHECK-PTX-NEXT: mov.b32 %r1, 1;
; CHECK-PTX-NEXT: st.u32 [%SP+8], %r1;
; CHECK-PTX-NEXT: add.u64 %rd5, %SP, 8;
; CHECK-PTX-NEXT: or.b64 %rd6, %rd5, 4;
; CHECK-PTX-NEXT: mov.u16 %rs9, 1;
; CHECK-PTX-NEXT: st.u8 [%rd6], %rs9;
; CHECK-PTX-NEXT: mov.u64 %rd7, 1;
; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd7;
; CHECK-PTX-NEXT: { // callseq 1, 0
; CHECK-PTX-NEXT: .param .b32 param0;
; CHECK-PTX-NEXT: st.param.b32 [param0+0], 1;
; CHECK-PTX-NEXT: .param .b64 param1;
; CHECK-PTX-NEXT: st.param.b64 [param1+0], %rd5;
; CHECK-PTX-NEXT: .param .b32 retval0;
; CHECK-PTX-NEXT: call.uni (retval0),
; CHECK-PTX-NEXT: variadics2,
; CHECK-PTX-NEXT: (
; CHECK-PTX-NEXT: param0,
; CHECK-PTX-NEXT: param1
; CHECK-PTX-NEXT: );
; CHECK-PTX-NEXT: ld.param.b32 %r2, [retval0+0];
; CHECK-PTX-NEXT: } // callseq 1
; CHECK-PTX-NEXT: st.param.b32 [func_retval0+0], %r2;
; CHECK-PTX-NEXT: ret;
entry:
%s1.sroa.3 = alloca [3 x i8], align 1
%s1.sroa.0.0.copyload = load i32, ptr @__const.bar.s1, align 8
%s1.sroa.2.0.copyload = load i8, ptr getelementptr inbounds (i8, ptr @__const.bar.s1, i64 4), align 4
call void @llvm.memcpy.p0.p0.i64(ptr align 1 %s1.sroa.3, ptr align 1 getelementptr inbounds (i8, ptr @__const.bar.s1, i64 5), i64 3, i1 false)
%s1.sroa.31.0.copyload = load i64, ptr getelementptr inbounds (i8, ptr @__const.bar.s1, i64 8), align 8
%call = call i32 (i32, ...) @variadics2(i32 noundef 1, i32 %s1.sroa.0.0.copyload, i8 %s1.sroa.2.0.copyload, i64 %s1.sroa.31.0.copyload)
ret i32 %call
}
define dso_local i32 @variadics3(i32 noundef %first, ...) {
; CHECK-PTX-LABEL: variadics3(
; CHECK-PTX: {
; CHECK-PTX-NEXT: .reg .b32 %r<8>;
; CHECK-PTX-NEXT: .reg .b64 %rd<4>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
; CHECK-PTX-NEXT: ld.param.u64 %rd1, [variadics3_param_1];
; CHECK-PTX-NEXT: add.s64 %rd2, %rd1, 15;
; CHECK-PTX-NEXT: and.b64 %rd3, %rd2, -16;
; CHECK-PTX-NEXT: ld.v4.u32 {%r1, %r2, %r3, %r4}, [%rd3];
; CHECK-PTX-NEXT: add.s32 %r5, %r1, %r2;
; CHECK-PTX-NEXT: add.s32 %r6, %r5, %r3;
; CHECK-PTX-NEXT: add.s32 %r7, %r6, %r4;
; CHECK-PTX-NEXT: st.param.b32 [func_retval0+0], %r7;
; CHECK-PTX-NEXT: ret;
entry:
%vlist = alloca ptr, align 8
call void @llvm.va_start.p0(ptr %vlist)
%argp.cur = load ptr, ptr %vlist, align 8
%0 = getelementptr inbounds i8, ptr %argp.cur, i32 15
%argp.cur.aligned = call ptr @llvm.ptrmask.p0.i64(ptr %0, i64 -16)
%argp.next = getelementptr inbounds i8, ptr %argp.cur.aligned, i64 16
store ptr %argp.next, ptr %vlist, align 8
%1 = load <4 x i32>, ptr %argp.cur.aligned, align 16
call void @llvm.va_end.p0(ptr %vlist)
%2 = extractelement <4 x i32> %1, i64 0
%3 = extractelement <4 x i32> %1, i64 1
%add = add nsw i32 %2, %3
%4 = extractelement <4 x i32> %1, i64 2
%add1 = add nsw i32 %add, %4
%5 = extractelement <4 x i32> %1, i64 3
%add2 = add nsw i32 %add1, %5
ret i32 %add2
}
define dso_local i32 @baz() {
; CHECK-PTX-LABEL: baz(
; CHECK-PTX: {
; CHECK-PTX-NEXT: .local .align 16 .b8 __local_depot5[16];
; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
; CHECK-PTX-NEXT: .reg .b32 %r<4>;
; CHECK-PTX-NEXT: .reg .b64 %rd<2>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot5;
; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-PTX-NEXT: mov.b32 %r1, 1;
; CHECK-PTX-NEXT: st.v4.u32 [%SP+0], {%r1, %r1, %r1, %r1};
; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 0;
; CHECK-PTX-NEXT: { // callseq 2, 0
; CHECK-PTX-NEXT: .param .b32 param0;
; CHECK-PTX-NEXT: st.param.b32 [param0+0], 1;
; CHECK-PTX-NEXT: .param .b64 param1;
; CHECK-PTX-NEXT: st.param.b64 [param1+0], %rd1;
; CHECK-PTX-NEXT: .param .b32 retval0;
; CHECK-PTX-NEXT: call.uni (retval0),
; CHECK-PTX-NEXT: variadics3,
; CHECK-PTX-NEXT: (
; CHECK-PTX-NEXT: param0,
; CHECK-PTX-NEXT: param1
; CHECK-PTX-NEXT: );
; CHECK-PTX-NEXT: ld.param.b32 %r2, [retval0+0];
; CHECK-PTX-NEXT: } // callseq 2
; CHECK-PTX-NEXT: st.param.b32 [func_retval0+0], %r2;
; CHECK-PTX-NEXT: ret;
entry:
%call = call i32 (i32, ...) @variadics3(i32 noundef 1, <4 x i32> noundef <i32 1, i32 1, i32 1, i32 1>)
ret i32 %call
}
define dso_local i32 @variadics4(ptr noundef byval(%struct.S2) align 8 %first, ...) {
; CHECK-PTX-LABEL: variadics4(
; CHECK-PTX: {
; CHECK-PTX-NEXT: .reg .b32 %r<2>;
; CHECK-PTX-NEXT: .reg .b64 %rd<9>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
; CHECK-PTX-NEXT: ld.param.u64 %rd1, [variadics4_param_1];
; CHECK-PTX-NEXT: add.s64 %rd2, %rd1, 7;
; CHECK-PTX-NEXT: and.b64 %rd3, %rd2, -8;
; CHECK-PTX-NEXT: ld.u64 %rd4, [%rd3];
; CHECK-PTX-NEXT: ld.param.u64 %rd5, [variadics4_param_0];
; CHECK-PTX-NEXT: ld.param.u64 %rd6, [variadics4_param_0+8];
; CHECK-PTX-NEXT: add.s64 %rd7, %rd5, %rd6;
; CHECK-PTX-NEXT: add.s64 %rd8, %rd7, %rd4;
; CHECK-PTX-NEXT: cvt.u32.u64 %r1, %rd8;
; CHECK-PTX-NEXT: st.param.b32 [func_retval0+0], %r1;
; CHECK-PTX-NEXT: ret;
entry:
%vlist = alloca ptr, align 8
call void @llvm.va_start.p0(ptr %vlist)
%argp.cur = load ptr, ptr %vlist, align 8
%0 = getelementptr inbounds i8, ptr %argp.cur, i32 7
%argp.cur.aligned = call ptr @llvm.ptrmask.p0.i64(ptr %0, i64 -8)
%argp.next = getelementptr inbounds i8, ptr %argp.cur.aligned, i64 8
store ptr %argp.next, ptr %vlist, align 8
%1 = load i64, ptr %argp.cur.aligned, align 8
%x1 = getelementptr inbounds %struct.S2, ptr %first, i32 0, i32 0
%2 = load i64, ptr %x1, align 8
%y = getelementptr inbounds %struct.S2, ptr %first, i32 0, i32 1
%3 = load i64, ptr %y, align 8
%add = add nsw i64 %2, %3
%add2 = add nsw i64 %add, %1
%conv = trunc i64 %add2 to i32
call void @llvm.va_end.p0(ptr %vlist)
ret i32 %conv
}
define dso_local void @qux() {
; CHECK-PTX-LABEL: qux(
; CHECK-PTX: {
; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot7[24];
; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
; CHECK-PTX-NEXT: .reg .b32 %r<3>;
; CHECK-PTX-NEXT: .reg .b64 %rd<7>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot7;
; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-PTX-NEXT: ld.global.nc.u64 %rd1, [__const_$_qux_$_s];
; CHECK-PTX-NEXT: st.u64 [%SP+0], %rd1;
; CHECK-PTX-NEXT: mov.u64 %rd2, __const_$_qux_$_s;
; CHECK-PTX-NEXT: add.s64 %rd3, %rd2, 8;
; CHECK-PTX-NEXT: ld.global.nc.u64 %rd4, [%rd3];
; CHECK-PTX-NEXT: st.u64 [%SP+8], %rd4;
; CHECK-PTX-NEXT: mov.u64 %rd5, 1;
; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd5;
; CHECK-PTX-NEXT: add.u64 %rd6, %SP, 16;
; CHECK-PTX-NEXT: { // callseq 3, 0
; CHECK-PTX-NEXT: .param .align 8 .b8 param0[16];
; CHECK-PTX-NEXT: st.param.b64 [param0+0], %rd1;
; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd4;
; CHECK-PTX-NEXT: .param .b64 param1;
; CHECK-PTX-NEXT: st.param.b64 [param1+0], %rd6;
; CHECK-PTX-NEXT: .param .b32 retval0;
; CHECK-PTX-NEXT: call.uni (retval0),
; CHECK-PTX-NEXT: variadics4,
; CHECK-PTX-NEXT: (
; CHECK-PTX-NEXT: param0,
; CHECK-PTX-NEXT: param1
; CHECK-PTX-NEXT: );
; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0+0];
; CHECK-PTX-NEXT: } // callseq 3
; CHECK-PTX-NEXT: ret;
entry:
%s = alloca %struct.S2, align 8
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %s, ptr align 8 @__const.qux.s, i64 16, i1 false)
%call = call i32 (ptr, ...) @variadics4(ptr noundef byval(%struct.S2) align 8 %s, i64 noundef 1)
ret void
}