; RUN: llc < %s -mtriple=nvptx64-unknown-unknown | FileCheck %s
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-unknown-unknown | %ptxas-verify %}
;
; Check that parameters of a __global__ (kernel) function do not get increased
; alignment, and no additional vectorization is performed on loads/stores with
; that parameters.
;
; Test IR is a minimized version of IR generated with the following command
; from the source code below:
; $ clang++ -O3 --cuda-gpu-arch=sm_35 -S -emit-llvm src.cu
;
; ----------------------------------------------------------------------------
; #include <stdint.h>
;
; struct St4x1 { uint32_t field[1]; };
; struct St4x2 { uint32_t field[2]; };
; struct St4x3 { uint32_t field[3]; };
; struct St4x4 { uint32_t field[4]; };
; struct St4x5 { uint32_t field[5]; };
; struct St4x6 { uint32_t field[6]; };
; struct St4x7 { uint32_t field[7]; };
; struct St4x8 { uint32_t field[8]; };
; struct St8x1 { uint64_t field[1]; };
; struct St8x2 { uint64_t field[2]; };
; struct St8x3 { uint64_t field[3]; };
; struct St8x4 { uint64_t field[4]; };
;
; #define DECLARE_FUNCTION(StName) \
; static __global__ __attribute__((noinline)) \
; void foo_##StName(struct StName in, struct StName* ret) { \
; const unsigned size = sizeof(ret->field) / sizeof(*ret->field); \
; for (unsigned i = 0; i != size; ++i) \
; ret->field[i] = in.field[i]; \
; } \
;
; DECLARE_FUNCTION(St4x1)
; DECLARE_FUNCTION(St4x2)
; DECLARE_FUNCTION(St4x3)
; DECLARE_FUNCTION(St4x4)
; DECLARE_FUNCTION(St4x5)
; DECLARE_FUNCTION(St4x6)
; DECLARE_FUNCTION(St4x7)
; DECLARE_FUNCTION(St4x8)
; DECLARE_FUNCTION(St8x1)
; DECLARE_FUNCTION(St8x2)
; DECLARE_FUNCTION(St8x3)
; DECLARE_FUNCTION(St8x4)
; ----------------------------------------------------------------------------
%struct.St4x1 = type { [1 x i32] }
%struct.St4x2 = type { [2 x i32] }
%struct.St4x3 = type { [3 x i32] }
%struct.St4x4 = type { [4 x i32] }
%struct.St4x5 = type { [5 x i32] }
%struct.St4x6 = type { [6 x i32] }
%struct.St4x7 = type { [7 x i32] }
%struct.St4x8 = type { [8 x i32] }
%struct.St8x1 = type { [1 x i64] }
%struct.St8x2 = type { [2 x i64] }
%struct.St8x3 = type { [3 x i64] }
%struct.St8x4 = type { [4 x i64] }
define dso_local void @foo_St4x1(ptr nocapture noundef readonly byval(%struct.St4x1) align 4 %in, ptr nocapture noundef writeonly %ret) {
; CHECK-LABEL: .visible .func foo_St4x1(
; CHECK: .param .align 4 .b8 foo_St4x1_param_0[4],
; CHECK: .param .b64 foo_St4x1_param_1
; CHECK: )
; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x1_param_1];
; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x1_param_0];
; CHECK: st.u32 [[[R1]]], [[R2]];
; CHECK: ret;
%1 = load i32, ptr %in, align 4
store i32 %1, ptr %ret, align 4
ret void
}
define dso_local void @foo_St4x2(ptr nocapture noundef readonly byval(%struct.St4x2) align 4 %in, ptr nocapture noundef writeonly %ret) {
; CHECK-LABEL: .visible .func foo_St4x2(
; CHECK: .param .align 4 .b8 foo_St4x2_param_0[8],
; CHECK: .param .b64 foo_St4x2_param_1
; CHECK: )
; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x2_param_1];
; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x2_param_0];
; CHECK: st.u32 [[[R1]]], [[R2]];
; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x2_param_0+4];
; CHECK: st.u32 [[[R1]]+4], [[R3]];
; CHECK: ret;
%1 = load i32, ptr %in, align 4
store i32 %1, ptr %ret, align 4
%arrayidx.1 = getelementptr inbounds [2 x i32], ptr %in, i64 0, i64 1
%2 = load i32, ptr %arrayidx.1, align 4
%arrayidx3.1 = getelementptr inbounds [2 x i32], ptr %ret, i64 0, i64 1
store i32 %2, ptr %arrayidx3.1, align 4
ret void
}
define dso_local void @foo_St4x3(ptr nocapture noundef readonly byval(%struct.St4x3) align 4 %in, ptr nocapture noundef writeonly %ret) {
; CHECK-LABEL: .visible .func foo_St4x3(
; CHECK: .param .align 4 .b8 foo_St4x3_param_0[12],
; CHECK: .param .b64 foo_St4x3_param_1
; CHECK: )
; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x3_param_1];
; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x3_param_0];
; CHECK: st.u32 [[[R1]]], [[R2]];
; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x3_param_0+4];
; CHECK: st.u32 [[[R1]]+4], [[R3]];
; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x3_param_0+8];
; CHECK: st.u32 [[[R1]]+8], [[R4]];
; CHECK: ret;
%1 = load i32, ptr %in, align 4
store i32 %1, ptr %ret, align 4
%arrayidx.1 = getelementptr inbounds [3 x i32], ptr %in, i64 0, i64 1
%2 = load i32, ptr %arrayidx.1, align 4
%arrayidx3.1 = getelementptr inbounds [3 x i32], ptr %ret, i64 0, i64 1
store i32 %2, ptr %arrayidx3.1, align 4
%arrayidx.2 = getelementptr inbounds [3 x i32], ptr %in, i64 0, i64 2
%3 = load i32, ptr %arrayidx.2, align 4
%arrayidx3.2 = getelementptr inbounds [3 x i32], ptr %ret, i64 0, i64 2
store i32 %3, ptr %arrayidx3.2, align 4
ret void
}
define dso_local void @foo_St4x4(ptr nocapture noundef readonly byval(%struct.St4x4) align 4 %in, ptr nocapture noundef writeonly %ret) {
; CHECK-LABEL: .visible .func foo_St4x4(
; CHECK: .param .align 4 .b8 foo_St4x4_param_0[16],
; CHECK: .param .b64 foo_St4x4_param_1
; CHECK: )
; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x4_param_1];
; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x4_param_0];
; CHECK: st.u32 [[[R1]]], [[R2]];
; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x4_param_0+4];
; CHECK: st.u32 [[[R1]]+4], [[R3]];
; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x4_param_0+8];
; CHECK: st.u32 [[[R1]]+8], [[R4]];
; CHECK: ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x4_param_0+12];
; CHECK: st.u32 [[[R1]]+12], [[R5]];
; CHECK: ret;
%1 = load i32, ptr %in, align 4
store i32 %1, ptr %ret, align 4
%arrayidx.1 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 1
%2 = load i32, ptr %arrayidx.1, align 4
%arrayidx3.1 = getelementptr inbounds [4 x i32], ptr %ret, i64 0, i64 1
store i32 %2, ptr %arrayidx3.1, align 4
%arrayidx.2 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 2
%3 = load i32, ptr %arrayidx.2, align 4
%arrayidx3.2 = getelementptr inbounds [4 x i32], ptr %ret, i64 0, i64 2
store i32 %3, ptr %arrayidx3.2, align 4
%arrayidx.3 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 3
%4 = load i32, ptr %arrayidx.3, align 4
%arrayidx3.3 = getelementptr inbounds [4 x i32], ptr %ret, i64 0, i64 3
store i32 %4, ptr %arrayidx3.3, align 4
ret void
}
define dso_local void @foo_St4x5(ptr nocapture noundef readonly byval(%struct.St4x5) align 4 %in, ptr nocapture noundef writeonly %ret) {
; CHECK-LABEL: .visible .func foo_St4x5(
; CHECK: .param .align 4 .b8 foo_St4x5_param_0[20],
; CHECK: .param .b64 foo_St4x5_param_1
; CHECK: )
; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x5_param_1];
; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x5_param_0];
; CHECK: st.u32 [[[R1]]], [[R2]];
; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x5_param_0+4];
; CHECK: st.u32 [[[R1]]+4], [[R3]];
; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x5_param_0+8];
; CHECK: st.u32 [[[R1]]+8], [[R4]];
; CHECK: ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x5_param_0+12];
; CHECK: st.u32 [[[R1]]+12], [[R5]];
; CHECK: ld.param.u32 [[R6:%r[0-9]+]], [foo_St4x5_param_0+16];
; CHECK: st.u32 [[[R1]]+16], [[R6]];
; CHECK: ret;
%1 = load i32, ptr %in, align 4
store i32 %1, ptr %ret, align 4
%arrayidx.1 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 1
%2 = load i32, ptr %arrayidx.1, align 4
%arrayidx3.1 = getelementptr inbounds [5 x i32], ptr %ret, i64 0, i64 1
store i32 %2, ptr %arrayidx3.1, align 4
%arrayidx.2 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 2
%3 = load i32, ptr %arrayidx.2, align 4
%arrayidx3.2 = getelementptr inbounds [5 x i32], ptr %ret, i64 0, i64 2
store i32 %3, ptr %arrayidx3.2, align 4
%arrayidx.3 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 3
%4 = load i32, ptr %arrayidx.3, align 4
%arrayidx3.3 = getelementptr inbounds [5 x i32], ptr %ret, i64 0, i64 3
store i32 %4, ptr %arrayidx3.3, align 4
%arrayidx.4 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 4
%5 = load i32, ptr %arrayidx.4, align 4
%arrayidx3.4 = getelementptr inbounds [5 x i32], ptr %ret, i64 0, i64 4
store i32 %5, ptr %arrayidx3.4, align 4
ret void
}
define dso_local void @foo_St4x6(ptr nocapture noundef readonly byval(%struct.St4x6) align 4 %in, ptr nocapture noundef writeonly %ret) {
; CHECK-LABEL: .visible .func foo_St4x6(
; CHECK: .param .align 4 .b8 foo_St4x6_param_0[24],
; CHECK: .param .b64 foo_St4x6_param_1
; CHECK: )
; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x6_param_1];
; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x6_param_0];
; CHECK: st.u32 [[[R1]]], [[R2]];
; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x6_param_0+4];
; CHECK: st.u32 [[[R1]]+4], [[R3]];
; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x6_param_0+8];
; CHECK: st.u32 [[[R1]]+8], [[R4]];
; CHECK: ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x6_param_0+12];
; CHECK: st.u32 [[[R1]]+12], [[R5]];
; CHECK: ld.param.u32 [[R6:%r[0-9]+]], [foo_St4x6_param_0+16];
; CHECK: st.u32 [[[R1]]+16], [[R6]];
; CHECK: ld.param.u32 [[R7:%r[0-9]+]], [foo_St4x6_param_0+20];
; CHECK: st.u32 [[[R1]]+20], [[R7]];
; CHECK: ret;
%1 = load i32, ptr %in, align 4
store i32 %1, ptr %ret, align 4
%arrayidx.1 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 1
%2 = load i32, ptr %arrayidx.1, align 4
%arrayidx3.1 = getelementptr inbounds [6 x i32], ptr %ret, i64 0, i64 1
store i32 %2, ptr %arrayidx3.1, align 4
%arrayidx.2 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 2
%3 = load i32, ptr %arrayidx.2, align 4
%arrayidx3.2 = getelementptr inbounds [6 x i32], ptr %ret, i64 0, i64 2
store i32 %3, ptr %arrayidx3.2, align 4
%arrayidx.3 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 3
%4 = load i32, ptr %arrayidx.3, align 4
%arrayidx3.3 = getelementptr inbounds [6 x i32], ptr %ret, i64 0, i64 3
store i32 %4, ptr %arrayidx3.3, align 4
%arrayidx.4 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 4
%5 = load i32, ptr %arrayidx.4, align 4
%arrayidx3.4 = getelementptr inbounds [6 x i32], ptr %ret, i64 0, i64 4
store i32 %5, ptr %arrayidx3.4, align 4
%arrayidx.5 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 5
%6 = load i32, ptr %arrayidx.5, align 4
%arrayidx3.5 = getelementptr inbounds [6 x i32], ptr %ret, i64 0, i64 5
store i32 %6, ptr %arrayidx3.5, align 4
ret void
}
define dso_local void @foo_St4x7(ptr nocapture noundef readonly byval(%struct.St4x7) align 4 %in, ptr nocapture noundef writeonly %ret) {
; CHECK-LABEL: .visible .func foo_St4x7(
; CHECK: .param .align 4 .b8 foo_St4x7_param_0[28],
; CHECK: .param .b64 foo_St4x7_param_1
; CHECK: )
; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x7_param_1];
; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x7_param_0];
; CHECK: st.u32 [[[R1]]], [[R2]];
; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x7_param_0+4];
; CHECK: st.u32 [[[R1]]+4], [[R3]];
; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x7_param_0+8];
; CHECK: st.u32 [[[R1]]+8], [[R4]];
; CHECK: ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x7_param_0+12];
; CHECK: st.u32 [[[R1]]+12], [[R5]];
; CHECK: ld.param.u32 [[R6:%r[0-9]+]], [foo_St4x7_param_0+16];
; CHECK: st.u32 [[[R1]]+16], [[R6]];
; CHECK: ld.param.u32 [[R7:%r[0-9]+]], [foo_St4x7_param_0+20];
; CHECK: st.u32 [[[R1]]+20], [[R7]];
; CHECK: ld.param.u32 [[R8:%r[0-9]+]], [foo_St4x7_param_0+24];
; CHECK: st.u32 [[[R1]]+24], [[R8]];
; CHECK: ret;
%1 = load i32, ptr %in, align 4
store i32 %1, ptr %ret, align 4
%arrayidx.1 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 1
%2 = load i32, ptr %arrayidx.1, align 4
%arrayidx3.1 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 1
store i32 %2, ptr %arrayidx3.1, align 4
%arrayidx.2 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 2
%3 = load i32, ptr %arrayidx.2, align 4
%arrayidx3.2 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 2
store i32 %3, ptr %arrayidx3.2, align 4
%arrayidx.3 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 3
%4 = load i32, ptr %arrayidx.3, align 4
%arrayidx3.3 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 3
store i32 %4, ptr %arrayidx3.3, align 4
%arrayidx.4 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 4
%5 = load i32, ptr %arrayidx.4, align 4
%arrayidx3.4 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 4
store i32 %5, ptr %arrayidx3.4, align 4
%arrayidx.5 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 5
%6 = load i32, ptr %arrayidx.5, align 4
%arrayidx3.5 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 5
store i32 %6, ptr %arrayidx3.5, align 4
%arrayidx.6 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 6
%7 = load i32, ptr %arrayidx.6, align 4
%arrayidx3.6 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 6
store i32 %7, ptr %arrayidx3.6, align 4
ret void
}
define dso_local void @foo_St4x8(ptr nocapture noundef readonly byval(%struct.St4x8) align 4 %in, ptr nocapture noundef writeonly %ret) {
; CHECK-LABEL: .visible .func foo_St4x8(
; CHECK: .param .align 4 .b8 foo_St4x8_param_0[32],
; CHECK: .param .b64 foo_St4x8_param_1
; CHECK: )
; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x8_param_1];
; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x8_param_0];
; CHECK: st.u32 [[[R1]]], [[R2]];
; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x8_param_0+4];
; CHECK: st.u32 [[[R1]]+4], [[R3]];
; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x8_param_0+8];
; CHECK: st.u32 [[[R1]]+8], [[R4]];
; CHECK: ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x8_param_0+12];
; CHECK: st.u32 [[[R1]]+12], [[R5]];
; CHECK: ld.param.u32 [[R6:%r[0-9]+]], [foo_St4x8_param_0+16];
; CHECK: st.u32 [[[R1]]+16], [[R6]];
; CHECK: ld.param.u32 [[R7:%r[0-9]+]], [foo_St4x8_param_0+20];
; CHECK: st.u32 [[[R1]]+20], [[R7]];
; CHECK: ld.param.u32 [[R8:%r[0-9]+]], [foo_St4x8_param_0+24];
; CHECK: st.u32 [[[R1]]+24], [[R8]];
; CHECK: ld.param.u32 [[R9:%r[0-9]+]], [foo_St4x8_param_0+28];
; CHECK: st.u32 [[[R1]]+28], [[R9]];
; CHECK: ret;
%1 = load i32, ptr %in, align 4
store i32 %1, ptr %ret, align 4
%arrayidx.1 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 1
%2 = load i32, ptr %arrayidx.1, align 4
%arrayidx3.1 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 1
store i32 %2, ptr %arrayidx3.1, align 4
%arrayidx.2 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 2
%3 = load i32, ptr %arrayidx.2, align 4
%arrayidx3.2 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 2
store i32 %3, ptr %arrayidx3.2, align 4
%arrayidx.3 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 3
%4 = load i32, ptr %arrayidx.3, align 4
%arrayidx3.3 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 3
store i32 %4, ptr %arrayidx3.3, align 4
%arrayidx.4 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 4
%5 = load i32, ptr %arrayidx.4, align 4
%arrayidx3.4 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 4
store i32 %5, ptr %arrayidx3.4, align 4
%arrayidx.5 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 5
%6 = load i32, ptr %arrayidx.5, align 4
%arrayidx3.5 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 5
store i32 %6, ptr %arrayidx3.5, align 4
%arrayidx.6 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 6
%7 = load i32, ptr %arrayidx.6, align 4
%arrayidx3.6 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 6
store i32 %7, ptr %arrayidx3.6, align 4
%arrayidx.7 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 7
%8 = load i32, ptr %arrayidx.7, align 4
%arrayidx3.7 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 7
store i32 %8, ptr %arrayidx3.7, align 4
ret void
}
define dso_local void @foo_St8x1(ptr nocapture noundef readonly byval(%struct.St8x1) align 8 %in, ptr nocapture noundef writeonly %ret) {
; CHECK-LABEL: .visible .func foo_St8x1(
; CHECK: .param .align 8 .b8 foo_St8x1_param_0[8],
; CHECK: .param .b64 foo_St8x1_param_1
; CHECK: )
; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St8x1_param_1];
; CHECK: ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x1_param_0];
; CHECK: st.u64 [[[R1]]], [[RD1]];
; CHECK: ret;
%1 = load i64, ptr %in, align 8
store i64 %1, ptr %ret, align 8
ret void
}
define dso_local void @foo_St8x2(ptr nocapture noundef readonly byval(%struct.St8x2) align 8 %in, ptr nocapture noundef writeonly %ret) {
; CHECK-LABEL: .visible .func foo_St8x2(
; CHECK: .param .align 8 .b8 foo_St8x2_param_0[16],
; CHECK: .param .b64 foo_St8x2_param_1
; CHECK: )
; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St8x2_param_1];
; CHECK: ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x2_param_0];
; CHECK: st.u64 [[[R1]]], [[RD1]];
; CHECK: ld.param.u64 [[RD2:%rd[0-9]+]], [foo_St8x2_param_0+8];
; CHECK: st.u64 [[[R1]]+8], [[RD2]];
; CHECK: ret;
%1 = load i64, ptr %in, align 8
store i64 %1, ptr %ret, align 8
%arrayidx.1 = getelementptr inbounds [2 x i64], ptr %in, i64 0, i64 1
%2 = load i64, ptr %arrayidx.1, align 8
%arrayidx3.1 = getelementptr inbounds [2 x i64], ptr %ret, i64 0, i64 1
store i64 %2, ptr %arrayidx3.1, align 8
ret void
}
define dso_local void @foo_St8x3(ptr nocapture noundef readonly byval(%struct.St8x3) align 8 %in, ptr nocapture noundef writeonly %ret) {
; CHECK-LABEL: .visible .func foo_St8x3(
; CHECK: .param .align 8 .b8 foo_St8x3_param_0[24],
; CHECK: .param .b64 foo_St8x3_param_1
; CHECK: )
; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St8x3_param_1];
; CHECK: ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x3_param_0];
; CHECK: st.u64 [[[R1]]], [[RD1]];
; CHECK: ld.param.u64 [[RD2:%rd[0-9]+]], [foo_St8x3_param_0+8];
; CHECK: st.u64 [[[R1]]+8], [[RD2]];
; CHECK: ld.param.u64 [[RD3:%rd[0-9]+]], [foo_St8x3_param_0+16];
; CHECK: st.u64 [[[R1]]+16], [[RD3]];
; CHECK: ret;
%1 = load i64, ptr %in, align 8
store i64 %1, ptr %ret, align 8
%arrayidx.1 = getelementptr inbounds [3 x i64], ptr %in, i64 0, i64 1
%2 = load i64, ptr %arrayidx.1, align 8
%arrayidx3.1 = getelementptr inbounds [3 x i64], ptr %ret, i64 0, i64 1
store i64 %2, ptr %arrayidx3.1, align 8
%arrayidx.2 = getelementptr inbounds [3 x i64], ptr %in, i64 0, i64 2
%3 = load i64, ptr %arrayidx.2, align 8
%arrayidx3.2 = getelementptr inbounds [3 x i64], ptr %ret, i64 0, i64 2
store i64 %3, ptr %arrayidx3.2, align 8
ret void
}
define dso_local void @foo_St8x4(ptr nocapture noundef readonly byval(%struct.St8x4) align 8 %in, ptr nocapture noundef writeonly %ret) {
; CHECK-LABEL: .visible .func foo_St8x4(
; CHECK: .param .align 8 .b8 foo_St8x4_param_0[32],
; CHECK: .param .b64 foo_St8x4_param_1
; CHECK: )
; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St8x4_param_1];
; CHECK: ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x4_param_0];
; CHECK: st.u64 [[[R1]]], [[RD1]];
; CHECK: ld.param.u64 [[RD2:%rd[0-9]+]], [foo_St8x4_param_0+8];
; CHECK: st.u64 [[[R1]]+8], [[RD2]];
; CHECK: ld.param.u64 [[RD3:%rd[0-9]+]], [foo_St8x4_param_0+16];
; CHECK: st.u64 [[[R1]]+16], [[RD3]];
; CHECK: ld.param.u64 [[RD4:%rd[0-9]+]], [foo_St8x4_param_0+24];
; CHECK: st.u64 [[[R1]]+24], [[RD4]];
; CHECK: ret;
%1 = load i64, ptr %in, align 8
store i64 %1, ptr %ret, align 8
%arrayidx.1 = getelementptr inbounds [4 x i64], ptr %in, i64 0, i64 1
%2 = load i64, ptr %arrayidx.1, align 8
%arrayidx3.1 = getelementptr inbounds [4 x i64], ptr %ret, i64 0, i64 1
store i64 %2, ptr %arrayidx3.1, align 8
%arrayidx.2 = getelementptr inbounds [4 x i64], ptr %in, i64 0, i64 2
%3 = load i64, ptr %arrayidx.2, align 8
%arrayidx3.2 = getelementptr inbounds [4 x i64], ptr %ret, i64 0, i64 2
store i64 %3, ptr %arrayidx3.2, align 8
%arrayidx.3 = getelementptr inbounds [4 x i64], ptr %in, i64 0, i64 3
%4 = load i64, ptr %arrayidx.3, align 8
%arrayidx3.3 = getelementptr inbounds [4 x i64], ptr %ret, i64 0, i64 3
store i64 %4, ptr %arrayidx3.3, align 8
ret void
}