; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s
;
; ST2Q
;
define void @st2q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st2q_ss_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8>%v0, <vscale x 16 x i8> %v1 ,
<vscale x 16 x i1> %pred,
ptr %1)
ret void
}
define void @st2q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st2q_ss_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> %v0,
<vscale x 8 x i16> %v1,
<vscale x 8 x i1> %pred,
ptr %1)
ret void
}
define void @st2q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st2q_ss_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> %v0,
<vscale x 4 x i32> %v1,
<vscale x 4 x i1> %pred,
ptr %1)
ret void
}
define void @st2q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st2q_ss_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> %v0,
<vscale x 2 x i64> %v1,
<vscale x 2 x i1> %pred,
ptr %1)
ret void
}
define void @st2q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st2q_ss_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
call void @llvm.aarch64.sve.st2q.nxv8f16(<vscale x 8 x half> %v0,
<vscale x 8 x half> %v1,
<vscale x 8 x i1> %pred,
ptr %1)
ret void
}
define void @st2q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st2q_ss_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
call void @llvm.aarch64.sve.st2q.nxv4f32(<vscale x 4 x float> %v0,
<vscale x 4 x float> %v1,
<vscale x 4 x i1> %pred,
ptr %1)
ret void
}
define void @st2q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st2q_ss_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
call void @llvm.aarch64.sve.st2q.nxv2f64(<vscale x 2 x double> %v0,
<vscale x 2 x double> %v1,
<vscale x 2 x i1> %pred,
ptr %1)
ret void
}
define void @st2q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st2q_ss_bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
call void @llvm.aarch64.sve.st2q.nxv8bf16(<vscale x 8 x bfloat> %v0,
<vscale x 8 x bfloat> %v1,
<vscale x 8 x i1> %pred,
ptr %1)
ret void
}
define void @st2q_si_i8_off16(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st2q_si_i8_off16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #-16, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -16
call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i1> %pred,
ptr %base)
ret void
}
define void @st2q_si_i8_off14(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st2q_si_i8_off14:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 14
call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i1> %pred,
ptr %base)
ret void
}
define void @st2q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, ptr %base) {
; CHECK-LABEL: st2q_si_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
; CHECK-NEXT: ret
%gep = getelementptr <vscale x 8 x i16>, ptr %base, i64 14
call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> %v0,
<vscale x 8 x i16> %v1,
<vscale x 8 x i1> %pred,
ptr %gep)
ret void
}
define void @st2q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, ptr %base) {
; CHECK-LABEL: st2q_si_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
; CHECK-NEXT: ret
%gep = getelementptr <vscale x 4 x i32>, ptr %base, i64 14
call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> %v0,
<vscale x 4 x i32> %v1,
<vscale x 4 x i1> %pred,
ptr %gep)
ret void
}
define void @st2q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, ptr %base) {
; CHECK-LABEL: st2q_si_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
; CHECK-NEXT: ret
%gep = getelementptr <vscale x 2 x i64>, ptr %base, i64 14
call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> %v0,
<vscale x 2 x i64> %v1,
<vscale x 2 x i1> %pred,
ptr %gep)
ret void
}
define void @st2q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, ptr %base) {
; CHECK-LABEL: st2q_si_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
; CHECK-NEXT: ret
%gep = getelementptr <vscale x 8 x half>, ptr %base, i64 14
call void @llvm.aarch64.sve.st2q.nxv8f16(<vscale x 8 x half> %v0,
<vscale x 8 x half> %v1,
<vscale x 8 x i1> %pred,
ptr %gep)
ret void
}
define void @st2q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, ptr %base) {
; CHECK-LABEL: st2q_si_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
; CHECK-NEXT: ret
%gep = getelementptr <vscale x 4 x float>, ptr %base, i64 14
call void @llvm.aarch64.sve.st2q.nxv4f32(<vscale x 4 x float> %v0,
<vscale x 4 x float> %v1,
<vscale x 4 x i1> %pred,
ptr %gep)
ret void
}
define void @st2q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, ptr %base) {
; CHECK-LABEL: st2q_si_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
; CHECK-NEXT: ret
%gep= getelementptr <vscale x 2 x double>, ptr %base, i64 14
call void @llvm.aarch64.sve.st2q.nxv2f64(<vscale x 2 x double> %v0,
<vscale x 2 x double> %v1,
<vscale x 2 x i1> %pred,
ptr %gep)
ret void
}
define void @st2q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x i1> %pred, ptr %base) {
; CHECK-LABEL: st2q_si_bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
; CHECK-NEXT: ret
%gep = getelementptr <vscale x 8 x bfloat>, ptr %base, i64 14
call void @llvm.aarch64.sve.st2q.nxv8bf16(<vscale x 8 x bfloat> %v0,
<vscale x 8 x bfloat> %v1,
<vscale x 8 x i1> %pred,
ptr %gep)
ret void
}
;
; ST3Q
;
define void @st3q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st3q_ss_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8>%v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i8> %v2,
<vscale x 16 x i1> %pred,
ptr %1)
ret void
}
define void @st3q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st3q_ss_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> %v0,
<vscale x 8 x i16> %v1,
<vscale x 8 x i16> %v2,
<vscale x 8 x i1> %pred,
ptr %1)
ret void
}
define void @st3q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st3q_ss_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> %v0,
<vscale x 4 x i32> %v1,
<vscale x 4 x i32> %v2,
<vscale x 4 x i1> %pred,
ptr %1)
ret void
}
define void @st3q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st3q_ss_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> %v0,
<vscale x 2 x i64> %v1,
<vscale x 2 x i64> %v2,
<vscale x 2 x i1> %pred,
ptr %1)
ret void
}
define void @st3q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st3q_ss_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
call void @llvm.aarch64.sve.st3q.nxv8f16(<vscale x 8 x half> %v0,
<vscale x 8 x half> %v1,
<vscale x 8 x half> %v2,
<vscale x 8 x i1> %pred,
ptr %1)
ret void
}
define void @st3q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st3q_ss_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
call void @llvm.aarch64.sve.st3q.nxv4f32(<vscale x 4 x float> %v0,
<vscale x 4 x float> %v1,
<vscale x 4 x float> %v2,
<vscale x 4 x i1> %pred,
ptr %1)
ret void
}
define void @st3q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st3q_ss_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
call void @llvm.aarch64.sve.st3q.nxv2f64(<vscale x 2 x double> %v0,
<vscale x 2 x double> %v1,
<vscale x 2 x double> %v2,
<vscale x 2 x i1> %pred,
ptr %1)
ret void
}
define void @st3q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st3q_ss_bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
call void @llvm.aarch64.sve.st3q.nxv8bf16(<vscale x 8 x bfloat> %v0,
<vscale x 8 x bfloat> %v1,
<vscale x 8 x bfloat> %v2,
<vscale x 8 x i1> %pred,
ptr %1)
ret void
}
define void @st3q_si_i8_off24(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3q_si_i8_off24:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #-24, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -24
call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i8> %v2,
<vscale x 16 x i1> %pred,
ptr %base)
ret void
}
define void @st3q_si_i8_off21(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3q_si_i8_off21:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 21
call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i8> %v2,
<vscale x 16 x i1> %pred,
ptr %base)
ret void
}
define void @st3q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3q_si_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 8 x i16>, ptr %addr, i64 21
call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> %v0,
<vscale x 8 x i16> %v1,
<vscale x 8 x i16> %v2,
<vscale x 8 x i1> %pred,
ptr %base)
ret void
}
define void @st3q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3q_si_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 4 x i32>, ptr %addr, i64 21
call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> %v0,
<vscale x 4 x i32> %v1,
<vscale x 4 x i32> %v2,
<vscale x 4 x i1> %pred,
ptr %base)
ret void
}
define void @st3q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1,<vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3q_si_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 2 x i64>, ptr %addr, i64 21
call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> %v0,
<vscale x 2 x i64> %v1,
<vscale x 2 x i64> %v2,
<vscale x 2 x i1> %pred,
ptr %base)
ret void
}
define void @st3q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3q_si_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 8 x half>, ptr %addr, i64 21
call void @llvm.aarch64.sve.st3q.nxv8f16(<vscale x 8 x half> %v0,
<vscale x 8 x half> %v1,
<vscale x 8 x half> %v2,
<vscale x 8 x i1> %pred,
ptr %base)
ret void
}
define void @st3q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3q_si_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 4 x float>, ptr %addr, i64 21
call void @llvm.aarch64.sve.st3q.nxv4f32(<vscale x 4 x float> %v0,
<vscale x 4 x float> %v1,
<vscale x 4 x float> %v2,
<vscale x 4 x i1> %pred,
ptr %base)
ret void
}
define void @st3q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3q_si_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 2 x double>, ptr %addr, i64 21
call void @llvm.aarch64.sve.st3q.nxv2f64(<vscale x 2 x double> %v0,
<vscale x 2 x double> %v1,
<vscale x 2 x double> %v2,
<vscale x 2 x i1> %pred,
ptr %base)
ret void
}
define void @st3q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st3q_si_bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 8 x bfloat>, ptr %addr, i64 21
call void @llvm.aarch64.sve.st3q.nxv8bf16(<vscale x 8 x bfloat> %v0,
<vscale x 8 x bfloat> %v1,
<vscale x 8 x bfloat> %v2,
<vscale x 8 x i1> %pred,
ptr %base)
ret void
}
;
; ST4Q
;
define void @st4q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2,<vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st4q_ss_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8>%v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i8> %v2,
<vscale x 16 x i8> %v3,
<vscale x 16 x i1> %pred,
ptr %1)
ret void
}
define void @st4q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st4q_ss_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> %v0,
<vscale x 8 x i16> %v1,
<vscale x 8 x i16> %v2,
<vscale x 8 x i16> %v3,
<vscale x 8 x i1> %pred,
ptr %1)
ret void
}
define void @st4q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st4q_ss_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> %v0,
<vscale x 4 x i32> %v1,
<vscale x 4 x i32> %v2,
<vscale x 4 x i32> %v3,
<vscale x 4 x i1> %pred,
ptr %1)
ret void
}
define void @st4q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st4q_ss_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> %v0,
<vscale x 2 x i64> %v1,
<vscale x 2 x i64> %v2,
<vscale x 2 x i64> %v3,
<vscale x 2 x i1> %pred,
ptr %1)
ret void
}
define void @st4q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st4q_ss_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
call void @llvm.aarch64.sve.st4q.nxv8f16(<vscale x 8 x half> %v0,
<vscale x 8 x half> %v1,
<vscale x 8 x half> %v2,
<vscale x 8 x half> %v3,
<vscale x 8 x i1> %pred,
ptr %1)
ret void
}
define void @st4q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st4q_ss_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
call void @llvm.aarch64.sve.st4q.nxv4f32(<vscale x 4 x float> %v0,
<vscale x 4 x float> %v1,
<vscale x 4 x float> %v2,
<vscale x 4 x float> %v3,
<vscale x 4 x i1> %pred,
ptr %1)
ret void
}
define void @st4q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st4q_ss_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
call void @llvm.aarch64.sve.st4q.nxv2f64(<vscale x 2 x double> %v0,
<vscale x 2 x double> %v1,
<vscale x 2 x double> %v2,
<vscale x 2 x double> %v3,
<vscale x 2 x i1> %pred,
ptr %1)
ret void
}
define void @st4q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x bfloat> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
; CHECK-LABEL: st4q_ss_bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
%1 = getelementptr i128, ptr %addr, i64 %offset
call void @llvm.aarch64.sve.st4q.nxv8bf16(<vscale x 8 x bfloat> %v0,
<vscale x 8 x bfloat> %v1,
<vscale x 8 x bfloat> %v2,
<vscale x 8 x bfloat> %v3,
<vscale x 8 x i1> %pred,
ptr %1)
ret void
}
define void @st4q_si_i8_off32(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2,<vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4q_si_i8_off32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #-32, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -32
call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i8> %v2,
<vscale x 16 x i8> %v3,
<vscale x 16 x i1> %pred,
ptr %base)
ret void
}
define void @st4q_si_i8_off28(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2,<vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4q_si_i8_off28:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 28
call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i8> %v2,
<vscale x 16 x i8> %v3,
<vscale x 16 x i1> %pred,
ptr %base)
ret void
}
define void @st4q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4q_si_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 8 x i16>, ptr %addr, i64 28
call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> %v0,
<vscale x 8 x i16> %v1,
<vscale x 8 x i16> %v2,
<vscale x 8 x i16> %v3,
<vscale x 8 x i1> %pred,
ptr %base)
ret void
}
define void @st4q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4q_si_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
; CHECK-NEXT: ret
%base1 = getelementptr <vscale x 4 x i32>, ptr %addr, i64 28
call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> %v0,
<vscale x 4 x i32> %v1,
<vscale x 4 x i32> %v2,
<vscale x 4 x i32> %v3,
<vscale x 4 x i1> %pred,
ptr %base1)
ret void
}
define void @st4q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4q_si_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 2 x i64>, ptr %addr, i64 28
call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> %v0,
<vscale x 2 x i64> %v1,
<vscale x 2 x i64> %v2,
<vscale x 2 x i64> %v3,
<vscale x 2 x i1> %pred,
ptr %base)
ret void
}
define void @st4q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4q_si_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 8 x half>, ptr %addr, i64 28
call void @llvm.aarch64.sve.st4q.nxv8f16(<vscale x 8 x half> %v0,
<vscale x 8 x half> %v1,
<vscale x 8 x half> %v2,
<vscale x 8 x half> %v3,
<vscale x 8 x i1> %pred,
ptr %base)
ret void
}
define void @st4q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2,<vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4q_si_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 4 x float>, ptr %addr, i64 28
call void @llvm.aarch64.sve.st4q.nxv4f32(<vscale x 4 x float> %v0,
<vscale x 4 x float> %v1,
<vscale x 4 x float> %v2,
<vscale x 4 x float> %v3,
<vscale x 4 x i1> %pred,
ptr %base)
ret void
}
define void @st4q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4q_si_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 2 x double>, ptr %addr, i64 28
call void @llvm.aarch64.sve.st4q.nxv2f64(<vscale x 2 x double> %v0,
<vscale x 2 x double> %v1,
<vscale x 2 x double> %v2,
<vscale x 2 x double> %v3,
<vscale x 2 x i1> %pred,
ptr %base)
ret void
}
define void @st4q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x bfloat> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
; CHECK-LABEL: st4q_si_bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
; CHECK-NEXT: ret
%base = getelementptr <vscale x 8 x bfloat>, ptr %addr, i64 28
call void @llvm.aarch64.sve.st4q.nxv8bf16(<vscale x 8 x bfloat> %v0,
<vscale x 8 x bfloat> %v1,
<vscale x 8 x bfloat> %v2,
<vscale x 8 x bfloat> %v3,
<vscale x 8 x i1> %pred,
ptr %base)
ret void
}
declare void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, ptr)
declare void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, ptr)
declare void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, ptr)
declare void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, ptr)
declare void @llvm.aarch64.sve.st2q.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, ptr)
declare void @llvm.aarch64.sve.st2q.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, ptr)
declare void @llvm.aarch64.sve.st2q.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, ptr)
declare void @llvm.aarch64.sve.st2q.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, ptr)
declare void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>,<vscale x 16 x i8>, <vscale x 16 x i1>, ptr)
declare void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, ptr)
declare void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, ptr)
declare void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, ptr)
declare void @llvm.aarch64.sve.st3q.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, ptr)
declare void @llvm.aarch64.sve.st3q.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, ptr)
declare void @llvm.aarch64.sve.st3q.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, ptr)
declare void @llvm.aarch64.sve.st3q.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, ptr)
declare void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>,<vscale x 16 x i1>, ptr)
declare void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, ptr)
declare void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i1>, ptr)
declare void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, ptr)
declare void @llvm.aarch64.sve.st4q.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, ptr)
declare void @llvm.aarch64.sve.st4q.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, ptr)
declare void @llvm.aarch64.sve.st4q.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, ptr)
declare void @llvm.aarch64.sve.st4q.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, ptr)