; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=aarch64 -mattr=+sve %s -o - | FileCheck %s
define <vscale x 16 x i8> @i8_1v_4s(ptr %b) {
; CHECK-LABEL: i8_1v_4s:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov w9, #4 // =0x4
; CHECK-NEXT: add x8, x0, x8
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, x9]
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.vscale.i64()
%1 = shl nuw nsw i64 %0, 4
%add.ptr = getelementptr inbounds i8, ptr %b, i64 %1
%add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 4
%2 = load <vscale x 16 x i8>, ptr %add.ptr1, align 16
ret <vscale x 16 x i8> %2
}
define <vscale x 16 x i8> @i8_4s_1v(ptr %b) {
; CHECK-LABEL: i8_4s_1v:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: add x8, x0, #4
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, #1, mul vl]
; CHECK-NEXT: ret
entry:
%add.ptr = getelementptr inbounds i8, ptr %b, i64 4
%0 = tail call i64 @llvm.vscale.i64()
%1 = shl nuw nsw i64 %0, 4
%add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1
%2 = load <vscale x 16 x i8>, ptr %add.ptr1, align 16
ret <vscale x 16 x i8> %2
}
define <vscale x 8 x i16> @i16_1v_8s(ptr %b) {
; CHECK-LABEL: i16_1v_8s:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov x9, #4 // =0x4
; CHECK-NEXT: add x8, x0, x8
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.vscale.i64()
%1 = shl nuw nsw i64 %0, 3
%add.ptr = getelementptr inbounds i16, ptr %b, i64 %1
%add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 8
%2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16
ret <vscale x 8 x i16> %2
}
define <vscale x 8 x i16> @i16_8s_1v(ptr %b) {
; CHECK-LABEL: i16_8s_1v:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: add x8, x0, #8
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, #1, mul vl]
; CHECK-NEXT: ret
entry:
%add.ptr = getelementptr inbounds i8, ptr %b, i64 8
%0 = tail call i64 @llvm.vscale.i64()
%1 = shl nuw nsw i64 %0, 3
%add.ptr1 = getelementptr inbounds i16, ptr %add.ptr, i64 %1
%2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16
ret <vscale x 8 x i16> %2
}
define <vscale x 8 x i16> @i16_2v_8s(ptr %b) {
; CHECK-LABEL: i16_2v_8s:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: rdvl x8, #2
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov x9, #4 // =0x4
; CHECK-NEXT: add x8, x0, x8
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.vscale.i64()
%1 = shl nuw nsw i64 %0, 4
%add.ptr = getelementptr inbounds i16, ptr %b, i64 %1
%add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 8
%2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16
ret <vscale x 8 x i16> %2
}
define <vscale x 8 x i16> @i16_8s_2v(ptr %b) {
; CHECK-LABEL: i16_8s_2v:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: add x8, x0, #8
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, #2, mul vl]
; CHECK-NEXT: ret
entry:
%add.ptr = getelementptr inbounds i8, ptr %b, i64 8
%0 = tail call i64 @llvm.vscale.i64()
%1 = shl nuw nsw i64 %0, 4
%add.ptr1 = getelementptr inbounds i16, ptr %add.ptr, i64 %1
%2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16
ret <vscale x 8 x i16> %2
}
define <vscale x 4 x i32> @i32_1v_16s(ptr %b) {
; CHECK-LABEL: i32_1v_16s:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov x9, #4 // =0x4
; CHECK-NEXT: add x8, x0, x8
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.vscale.i64()
%1 = shl nuw nsw i64 %0, 2
%add.ptr = getelementptr inbounds i32, ptr %b, i64 %1
%add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 16
%2 = load <vscale x 4 x i32>, ptr %add.ptr1, align 16
ret <vscale x 4 x i32> %2
}
define <vscale x 4 x i32> @i32_16s_2v(ptr %b) {
; CHECK-LABEL: i32_16s_2v:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: add x8, x0, #16
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, #1, mul vl]
; CHECK-NEXT: ret
entry:
%add.ptr = getelementptr inbounds i8, ptr %b, i64 16
%0 = tail call i64 @llvm.vscale.i64()
%1 = shl nuw nsw i64 %0, 2
%add.ptr1 = getelementptr inbounds i32, ptr %add.ptr, i64 %1
%2 = load <vscale x 4 x i32>, ptr %add.ptr1, align 16
ret <vscale x 4 x i32> %2
}
define <vscale x 2 x i64> @i64_1v_32s(ptr %b) {
; CHECK-LABEL: i64_1v_32s:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x9, #4 // =0x4
; CHECK-NEXT: add x8, x0, x8
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3]
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.vscale.i64()
%1 = shl nuw nsw i64 %0, 1
%add.ptr = getelementptr inbounds i64, ptr %b, i64 %1
%add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 32
%2 = load <vscale x 2 x i64>, ptr %add.ptr1, align 16
ret <vscale x 2 x i64> %2
}
define <vscale x 2 x i64> @i64_32s_2v(ptr %b) {
; CHECK-LABEL: i64_32s_2v:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: add x8, x0, #32
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, #1, mul vl]
; CHECK-NEXT: ret
entry:
%add.ptr = getelementptr inbounds i8, ptr %b, i64 32
%0 = tail call i64 @llvm.vscale.i64()
%1 = shl nuw nsw i64 %0, 1
%add.ptr1 = getelementptr inbounds i64, ptr %add.ptr, i64 %1
%2 = load <vscale x 2 x i64>, ptr %add.ptr1, align 16
ret <vscale x 2 x i64> %2
}
define <vscale x 16 x i8> @i8_m2v_4s(ptr %b) {
; CHECK-LABEL: i8_m2v_4s:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: cnth x8, all, mul #4
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov w9, #4 // =0x4
; CHECK-NEXT: sub x8, x0, x8
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, x9]
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.vscale.i64()
%1 = mul i64 %0, -32
%add.ptr = getelementptr inbounds i8, ptr %b, i64 %1
%add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 4
%2 = load <vscale x 16 x i8>, ptr %add.ptr1, align 16
ret <vscale x 16 x i8> %2
}
define <vscale x 16 x i8> @i8_4s_m2v(ptr %b) {
; CHECK-LABEL: i8_4s_m2v:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: add x8, x0, #4
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, #-2, mul vl]
; CHECK-NEXT: ret
entry:
%add.ptr = getelementptr inbounds i8, ptr %b, i64 4
%0 = tail call i64 @llvm.vscale.i64()
%1 = mul i64 %0, -32
%add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1
%2 = load <vscale x 16 x i8>, ptr %add.ptr1, align 16
ret <vscale x 16 x i8> %2
}
define <vscale x 8 x i16> @i16_m2v_8s(ptr %b) {
; CHECK-LABEL: i16_m2v_8s:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: cnth x8, all, mul #4
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov x9, #4 // =0x4
; CHECK-NEXT: sub x8, x0, x8
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.vscale.i64()
%1 = mul i64 %0, -32
%add.ptr = getelementptr inbounds i8, ptr %b, i64 %1
%add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 8
%2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16
ret <vscale x 8 x i16> %2
}
define <vscale x 8 x i16> @i16_8s_m2v(ptr %b) {
; CHECK-LABEL: i16_8s_m2v:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: add x8, x0, #8
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, #-2, mul vl]
; CHECK-NEXT: ret
entry:
%add.ptr = getelementptr inbounds i8, ptr %b, i64 8
%0 = tail call i64 @llvm.vscale.i64()
%1 = mul i64 %0, -32
%add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1
%2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16
ret <vscale x 8 x i16> %2
}
define <vscale x 4 x i32> @i32_m2v_16s(ptr %b) {
; CHECK-LABEL: i32_m2v_16s:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: cnth x8, all, mul #4
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov x9, #4 // =0x4
; CHECK-NEXT: sub x8, x0, x8
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.vscale.i64()
%1 = mul i64 %0, -32
%add.ptr = getelementptr inbounds i8, ptr %b, i64 %1
%add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 16
%2 = load <vscale x 4 x i32>, ptr %add.ptr1, align 16
ret <vscale x 4 x i32> %2
}
define <vscale x 4 x i32> @i32_16s_m2v(ptr %b) {
; CHECK-LABEL: i32_16s_m2v:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: add x8, x0, #16
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, #-2, mul vl]
; CHECK-NEXT: ret
entry:
%add.ptr = getelementptr inbounds i8, ptr %b, i64 16
%0 = tail call i64 @llvm.vscale.i64()
%1 = mul i64 %0, -32
%add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1
%2 = load <vscale x 4 x i32>, ptr %add.ptr1, align 16
ret <vscale x 4 x i32> %2
}
define <vscale x 2 x i64> @i64_m2v_32s(ptr %b) {
; CHECK-LABEL: i64_m2v_32s:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: cnth x8, all, mul #4
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x9, #4 // =0x4
; CHECK-NEXT: sub x8, x0, x8
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3]
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.vscale.i64()
%1 = mul i64 %0, -32
%add.ptr = getelementptr inbounds i8, ptr %b, i64 %1
%add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 32
%2 = load <vscale x 2 x i64>, ptr %add.ptr1, align 16
ret <vscale x 2 x i64> %2
}
define <vscale x 2 x i64> @i64_32s_m2v(ptr %b) {
; CHECK-LABEL: i64_32s_m2v:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: add x8, x0, #32
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, #-2, mul vl]
; CHECK-NEXT: ret
entry:
%add.ptr = getelementptr inbounds i8, ptr %b, i64 32
%0 = tail call i64 @llvm.vscale.i64()
%1 = mul i64 %0, -32
%add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1
%2 = load <vscale x 2 x i64>, ptr %add.ptr1, align 16
ret <vscale x 2 x i64> %2
}
declare i64 @llvm.vscale.i64()