; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32-ONLY
; RUN: llc -mtriple=riscv32 -mattr=+v,+zba,+zbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32VB
; RUN: llc -mtriple=riscv32 -mattr=+v,+zba,+zbb,+zbkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32VB-PACK
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RV64V-ONLY
; RUN: llc -mtriple=riscv64 -mattr=+v,+rva22u64 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RVA22U64
; RUN: llc -mtriple=riscv64 -mattr=+v,+rva22u64,+zbkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RVA22U64-PACK
; RUN: llc -mtriple=riscv64 -mattr=+zve32x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32
define void @buildvec_vid_v16i8(ptr %x) {
; CHECK-LABEL: buildvec_vid_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
store <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, ptr %x
ret void
}
define void @buildvec_vid_undefelts_v16i8(ptr %x) {
; CHECK-LABEL: buildvec_vid_undefelts_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
store <16 x i8> <i8 0, i8 1, i8 2, i8 undef, i8 4, i8 undef, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, ptr %x
ret void
}
; TODO: Could do VID then insertelement on missing elements
define void @buildvec_notquite_vid_v16i8(ptr %x) {
; CHECK-LABEL: buildvec_notquite_vid_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI2_0)
; CHECK-NEXT: addi a1, a1, %lo(.LCPI2_0)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vle8.v v8, (a1)
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
store <16 x i8> <i8 0, i8 1, i8 3, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, ptr %x
ret void
}
define void @buildvec_vid_plus_imm_v16i8(ptr %x) {
; CHECK-LABEL: buildvec_vid_plus_imm_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vi v8, v8, 2
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
store <16 x i8> <i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17>, ptr %x
ret void
}
define void @buildvec_vid_plus_nonimm_v16i8(ptr %x) {
; CHECK-LABEL: buildvec_vid_plus_nonimm_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI4_0)
; CHECK-NEXT: addi a1, a1, %lo(.LCPI4_0)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vle8.v v8, (a1)
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
store <16 x i8> <i8 100, i8 101, i8 102, i8 103, i8 104, i8 105, i8 106, i8 107, i8 108, i8 109, i8 110, i8 111, i8 112, i8 113, i8 114, i8 115>, ptr %x
ret void
}
define void @buildvec_vid_mpy_imm_v16i8(ptr %x) {
; CHECK-LABEL: buildvec_vid_mpy_imm_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: li a1, 3
; CHECK-NEXT: vmul.vx v8, v8, a1
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
store <16 x i8> <i8 0, i8 3, i8 6, i8 9, i8 12, i8 15, i8 18, i8 21, i8 24, i8 27, i8 30, i8 33, i8 36, i8 39, i8 42, i8 45>, ptr %x
ret void
}
define <4 x i8> @buildvec_vid_step2_add0_v4i8() {
; CHECK-LABEL: buildvec_vid_step2_add0_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: ret
ret <4 x i8> <i8 0, i8 2, i8 4, i8 6>
}
define <4 x i8> @buildvec_vid_step2_add0_v4i8_undef0() {
; CHECK-LABEL: buildvec_vid_step2_add0_v4i8_undef0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: ret
ret <4 x i8> <i8 undef, i8 2, i8 4, i8 6>
}
define <4 x i8> @buildvec_vid_step2_add0_v4i8_undef1() {
; CHECK-LABEL: buildvec_vid_step2_add0_v4i8_undef1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: ret
ret <4 x i8> <i8 undef, i8 undef, i8 4, i8 6>
}
define <4 x i8> @buildvec_vid_step2_add0_v4i8_undef2() {
; CHECK-LABEL: buildvec_vid_step2_add0_v4i8_undef2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: ret
ret <4 x i8> <i8 0, i8 undef, i8 undef, i8 6>
}
define <4 x i8> @buildvec_vid_step2_add1_v4i8() {
; CHECK-LABEL: buildvec_vid_step2_add1_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: vadd.vi v8, v8, 1
; CHECK-NEXT: ret
ret <4 x i8> <i8 1, i8 3, i8 5, i8 7>
}
define <4 x i8> @buildvec_vid_step2_add1_v4i8_undef0() {
; CHECK-LABEL: buildvec_vid_step2_add1_v4i8_undef0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: vadd.vi v8, v8, 1
; CHECK-NEXT: ret
ret <4 x i8> <i8 undef, i8 3, i8 5, i8 7>
}
define <4 x i8> @buildvec_vid_step2_add1_v4i8_undef1() {
; CHECK-LABEL: buildvec_vid_step2_add1_v4i8_undef1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: vadd.vi v8, v8, 1
; CHECK-NEXT: ret
ret <4 x i8> <i8 undef, i8 undef, i8 5, i8 7>
}
define <4 x i8> @buildvec_vid_step2_add1_v4i8_undef2() {
; CHECK-LABEL: buildvec_vid_step2_add1_v4i8_undef2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: vadd.vi v8, v8, 1
; CHECK-NEXT: ret
ret <4 x i8> <i8 1, i8 undef, i8 undef, i8 7>
}
define <4 x i8> @buildvec_vid_stepn1_add0_v4i8() {
; CHECK-LABEL: buildvec_vid_stepn1_add0_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vrsub.vi v8, v8, 0
; CHECK-NEXT: ret
ret <4 x i8> <i8 0, i8 -1, i8 -2, i8 -3>
}
define <4 x i8> @buildvec_vid_stepn1_add0_v4i8_undef0() {
; CHECK-LABEL: buildvec_vid_stepn1_add0_v4i8_undef0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vrsub.vi v8, v8, 0
; CHECK-NEXT: ret
ret <4 x i8> <i8 undef, i8 -1, i8 -2, i8 -3>
}
define <4 x i8> @buildvec_vid_stepn1_add0_v4i8_undef1() {
; CHECK-LABEL: buildvec_vid_stepn1_add0_v4i8_undef1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vrsub.vi v8, v8, 0
; CHECK-NEXT: ret
ret <4 x i8> <i8 undef, i8 undef, i8 -2, i8 -3>
}
define <4 x i8> @buildvec_vid_stepn1_add0_v4i8_undef2() {
; CHECK-LABEL: buildvec_vid_stepn1_add0_v4i8_undef2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vrsub.vi v8, v8, 0
; CHECK-NEXT: ret
ret <4 x i8> <i8 0, i8 undef, i8 undef, i8 -3>
}
define <4 x i8> @buildvec_vid_stepn2_add0_v4i8() {
; CHECK-LABEL: buildvec_vid_stepn2_add0_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: vrsub.vi v8, v8, 0
; CHECK-NEXT: ret
ret <4 x i8> <i8 0, i8 -2, i8 -4, i8 -6>
}
define <4 x i8> @buildvec_vid_stepn2_add0_v4i8_undef0() {
; CHECK-LABEL: buildvec_vid_stepn2_add0_v4i8_undef0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: vrsub.vi v8, v8, 0
; CHECK-NEXT: ret
ret <4 x i8> <i8 undef, i8 -2, i8 -4, i8 -6>
}
define <4 x i8> @buildvec_vid_stepn2_add0_v4i8_undef1() {
; CHECK-LABEL: buildvec_vid_stepn2_add0_v4i8_undef1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: vrsub.vi v8, v8, 0
; CHECK-NEXT: ret
ret <4 x i8> <i8 undef, i8 undef, i8 -4, i8 -6>
}
define <4 x i8> @buildvec_vid_stepn2_add0_v4i8_undef2() {
; CHECK-LABEL: buildvec_vid_stepn2_add0_v4i8_undef2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vmv.v.i v8, -6
; CHECK-NEXT: ret
ret <4 x i8> <i8 undef, i8 undef, i8 undef, i8 -6>
}
define <4 x i8> @buildvec_vid_stepn2_add3_v4i8() {
; CHECK-LABEL: buildvec_vid_stepn2_add3_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: vrsub.vi v8, v8, 3
; CHECK-NEXT: ret
ret <4 x i8> <i8 3, i8 1, i8 -1, i8 -3>
}
define <4 x i8> @buildvec_vid_stepn3_add3_v4i8() {
; CHECK-LABEL: buildvec_vid_stepn3_add3_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vmv.v.i v9, 3
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: li a0, -3
; CHECK-NEXT: vmadd.vx v8, a0, v9
; CHECK-NEXT: ret
ret <4 x i8> <i8 3, i8 0, i8 -3, i8 -6>
}
define void @buildvec_vid_stepn3_addn3_v4i32(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
; CHECK-LABEL: buildvec_vid_stepn3_addn3_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, -3
; CHECK-NEXT: vid.v v9
; CHECK-NEXT: li a4, -3
; CHECK-NEXT: vmadd.vx v9, a4, v8
; CHECK-NEXT: vse32.v v9, (a0)
; CHECK-NEXT: vse32.v v9, (a1)
; CHECK-NEXT: vse32.v v9, (a2)
; CHECK-NEXT: vse32.v v9, (a3)
; CHECK-NEXT: ret
store <4 x i32> <i32 -3, i32 -6, i32 -9, i32 -12>, ptr %z0
store <4 x i32> <i32 undef, i32 -6, i32 -9, i32 -12>, ptr %z1
store <4 x i32> <i32 undef, i32 undef, i32 -9, i32 -12>, ptr %z2
store <4 x i32> <i32 -3, i32 undef, i32 undef, i32 -12>, ptr %z3
ret void
}
; FIXME: RV32 doesn't catch this pattern due to BUILD_VECTOR legalization.
define <4 x i64> @buildvec_vid_step1_add0_v4i64() {
; RV32-LABEL: buildvec_vid_step1_add0_v4i64:
; RV32: # %bb.0:
; RV32-NEXT: lui a0, %hi(.LCPI25_0)
; RV32-NEXT: addi a0, a0, %lo(.LCPI25_0)
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vle8.v v10, (a0)
; RV32-NEXT: vsext.vf4 v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: buildvec_vid_step1_add0_v4i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64V-NEXT: vid.v v8
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_vid_step1_add0_v4i64:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: li a1, 3
; RV64ZVE32-NEXT: sd a1, 24(a0)
; RV64ZVE32-NEXT: li a1, 2
; RV64ZVE32-NEXT: sd a1, 16(a0)
; RV64ZVE32-NEXT: li a1, 1
; RV64ZVE32-NEXT: sd a1, 8(a0)
; RV64ZVE32-NEXT: sd zero, 0(a0)
; RV64ZVE32-NEXT: ret
ret <4 x i64> <i64 0, i64 1, i64 2, i64 3>
}
define <4 x i64> @buildvec_vid_step2_add0_v4i64() {
; RV32-LABEL: buildvec_vid_step2_add0_v4i64:
; RV32: # %bb.0:
; RV32-NEXT: lui a0, %hi(.LCPI26_0)
; RV32-NEXT: addi a0, a0, %lo(.LCPI26_0)
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vle8.v v10, (a0)
; RV32-NEXT: vsext.vf4 v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: buildvec_vid_step2_add0_v4i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64V-NEXT: vid.v v8
; RV64V-NEXT: vadd.vv v8, v8, v8
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_vid_step2_add0_v4i64:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: li a1, 6
; RV64ZVE32-NEXT: sd a1, 24(a0)
; RV64ZVE32-NEXT: li a1, 4
; RV64ZVE32-NEXT: sd a1, 16(a0)
; RV64ZVE32-NEXT: li a1, 2
; RV64ZVE32-NEXT: sd a1, 8(a0)
; RV64ZVE32-NEXT: sd zero, 0(a0)
; RV64ZVE32-NEXT: ret
ret <4 x i64> <i64 0, i64 2, i64 4, i64 6>
}
define <4 x i8> @buildvec_no_vid_v4i8_0() {
; CHECK-LABEL: buildvec_no_vid_v4i8_0:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 28768
; CHECK-NEXT: addi a0, a0, 769
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
ret <4 x i8> <i8 1, i8 3, i8 6, i8 7>
}
define <4 x i8> @buildvec_no_vid_v4i8_1() {
; CHECK-LABEL: buildvec_no_vid_v4i8_1:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 28752
; CHECK-NEXT: addi a0, a0, 512
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
ret <4 x i8> <i8 undef, i8 2, i8 5, i8 7>
}
define <4 x i8> @buildvec_no_vid_v4i8_2() {
; CHECK-LABEL: buildvec_no_vid_v4i8_2:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 32768
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
ret <4 x i8> <i8 0, i8 undef, i8 undef, i8 8>
}
define <4 x i8> @buildvec_no_vid_v4i8_3() {
; CHECK-LABEL: buildvec_no_vid_v4i8_3:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 28672
; CHECK-NEXT: addi a0, a0, 255
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
ret <4 x i8> <i8 -1, i8 undef, i8 undef, i8 7>
}
define <4 x i8> @buildvec_no_vid_v4i8_4() {
; CHECK-LABEL: buildvec_no_vid_v4i8_4:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vmv.v.i v8, -2
; CHECK-NEXT: ret
ret <4 x i8> <i8 -2, i8 undef, i8 undef, i8 undef>
}
define <4 x i8> @buildvec_no_vid_v4i8_5() {
; CHECK-LABEL: buildvec_no_vid_v4i8_5:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 1032144
; CHECK-NEXT: addi a0, a0, -257
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
ret <4 x i8> <i8 -1, i8 -2, i8 -4, i8 -5>
}
define void @buildvec_dominant0_v8i16(ptr %x) {
; CHECK-LABEL: buildvec_dominant0_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v8, zero
; CHECK-NEXT: vmv.v.i v9, 8
; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; CHECK-NEXT: vslideup.vi v9, v8, 3
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vse16.v v9, (a0)
; CHECK-NEXT: ret
store <8 x i16> <i16 8, i16 8, i16 undef, i16 0, i16 8, i16 undef, i16 8, i16 8>, ptr %x
ret void
}
define void @buildvec_dominant1_v8i16(ptr %x) {
; CHECK-LABEL: buildvec_dominant1_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, 8
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
store <8 x i16> <i16 undef, i16 8, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, ptr %x
ret void
}
define <2 x i8> @buildvec_dominant0_v2i8() {
; CHECK-LABEL: buildvec_dominant0_v2i8:
; CHECK: # %bb.0:
; CHECK-NEXT: ret
ret <2 x i8> <i8 undef, i8 undef>
}
define <2 x i8> @buildvec_dominant1_v2i8() {
; RV32-LABEL: buildvec_dominant1_v2i8:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; RV32-NEXT: vmv.v.i v8, -1
; RV32-NEXT: ret
;
; RV64V-LABEL: buildvec_dominant1_v2i8:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; RV64V-NEXT: vmv.v.i v8, -1
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_dominant1_v2i8:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32-NEXT: vmv.v.i v8, -1
; RV64ZVE32-NEXT: ret
ret <2 x i8> <i8 undef, i8 -1>
}
define <2 x i8> @buildvec_dominant2_v2i8() {
; RV32-LABEL: buildvec_dominant2_v2i8:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; RV32-NEXT: vid.v v8
; RV32-NEXT: vrsub.vi v8, v8, 0
; RV32-NEXT: ret
;
; RV64V-LABEL: buildvec_dominant2_v2i8:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; RV64V-NEXT: vid.v v8
; RV64V-NEXT: vrsub.vi v8, v8, 0
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_dominant2_v2i8:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32-NEXT: vid.v v8
; RV64ZVE32-NEXT: vrsub.vi v8, v8, 0
; RV64ZVE32-NEXT: ret
ret <2 x i8> <i8 0, i8 -1>
}
define void @buildvec_dominant0_v2i32(ptr %x) {
; RV32-LABEL: buildvec_dominant0_v2i32:
; RV32: # %bb.0:
; RV32-NEXT: lui a1, %hi(.LCPI38_0)
; RV32-NEXT: addi a1, a1, %lo(.LCPI38_0)
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vle32.v v8, (a1)
; RV32-NEXT: vse32.v v8, (a0)
; RV32-NEXT: ret
;
; RV64V-LABEL: buildvec_dominant0_v2i32:
; RV64V: # %bb.0:
; RV64V-NEXT: lui a1, %hi(.LCPI38_0)
; RV64V-NEXT: ld a1, %lo(.LCPI38_0)(a1)
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64V-NEXT: vmv.v.i v8, -1
; RV64V-NEXT: vsetvli zero, zero, e64, m1, tu, ma
; RV64V-NEXT: vmv.s.x v8, a1
; RV64V-NEXT: vse64.v v8, (a0)
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_dominant0_v2i32:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: lui a1, %hi(.LCPI38_0)
; RV64ZVE32-NEXT: ld a1, %lo(.LCPI38_0)(a1)
; RV64ZVE32-NEXT: li a2, -1
; RV64ZVE32-NEXT: sd a2, 8(a0)
; RV64ZVE32-NEXT: sd a1, 0(a0)
; RV64ZVE32-NEXT: ret
store <2 x i64> <i64 2049638230412172402, i64 -1>, ptr %x
ret void
}
define void @buildvec_dominant1_optsize_v2i32(ptr %x) optsize {
; RV32-LABEL: buildvec_dominant1_optsize_v2i32:
; RV32: # %bb.0:
; RV32-NEXT: lui a1, %hi(.LCPI39_0)
; RV32-NEXT: addi a1, a1, %lo(.LCPI39_0)
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vle32.v v8, (a1)
; RV32-NEXT: vse32.v v8, (a0)
; RV32-NEXT: ret
;
; RV64V-LABEL: buildvec_dominant1_optsize_v2i32:
; RV64V: # %bb.0:
; RV64V-NEXT: lui a1, %hi(.LCPI39_0)
; RV64V-NEXT: addi a1, a1, %lo(.LCPI39_0)
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64V-NEXT: vle64.v v8, (a1)
; RV64V-NEXT: vse64.v v8, (a0)
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_dominant1_optsize_v2i32:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: lui a1, %hi(.LCPI39_0)
; RV64ZVE32-NEXT: ld a1, %lo(.LCPI39_0)(a1)
; RV64ZVE32-NEXT: li a2, -1
; RV64ZVE32-NEXT: sd a2, 8(a0)
; RV64ZVE32-NEXT: sd a1, 0(a0)
; RV64ZVE32-NEXT: ret
store <2 x i64> <i64 2049638230412172402, i64 -1>, ptr %x
ret void
}
define void @buildvec_seq_v8i8_v4i16(ptr %x) {
; CHECK-LABEL: buildvec_seq_v8i8_v4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 513
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.x v8, a1
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
store <8 x i8> <i8 1, i8 2, i8 1, i8 2, i8 1, i8 2, i8 undef, i8 2>, ptr %x
ret void
}
define void @buildvec_seq_v8i8_v2i32(ptr %x) {
; RV32-LABEL: buildvec_seq_v8i8_v2i32:
; RV32: # %bb.0:
; RV32-NEXT: lui a1, 48
; RV32-NEXT: addi a1, a1, 513
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32-NEXT: vmv.v.x v8, a1
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32-NEXT: vse8.v v8, (a0)
; RV32-NEXT: ret
;
; RV64V-LABEL: buildvec_seq_v8i8_v2i32:
; RV64V: # %bb.0:
; RV64V-NEXT: lui a1, 48
; RV64V-NEXT: addi a1, a1, 513
; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV64V-NEXT: vmv.v.x v8, a1
; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64V-NEXT: vse8.v v8, (a0)
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_seq_v8i8_v2i32:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: lui a1, 48
; RV64ZVE32-NEXT: addi a1, a1, 513
; RV64ZVE32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32-NEXT: vmv.v.x v8, a1
; RV64ZVE32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64ZVE32-NEXT: vse8.v v8, (a0)
; RV64ZVE32-NEXT: ret
store <8 x i8> <i8 1, i8 2, i8 3, i8 undef, i8 1, i8 2, i8 3, i8 undef>, ptr %x
ret void
}
define void @buildvec_seq_v16i8_v2i64(ptr %x) {
; RV32-LABEL: buildvec_seq_v16i8_v2i64:
; RV32: # %bb.0:
; RV32-NEXT: lui a1, %hi(.LCPI42_0)
; RV32-NEXT: addi a1, a1, %lo(.LCPI42_0)
; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV32-NEXT: vle8.v v8, (a1)
; RV32-NEXT: vse8.v v8, (a0)
; RV32-NEXT: ret
;
; RV64V-LABEL: buildvec_seq_v16i8_v2i64:
; RV64V: # %bb.0:
; RV64V-NEXT: lui a1, %hi(.LCPI42_0)
; RV64V-NEXT: ld a1, %lo(.LCPI42_0)(a1)
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64V-NEXT: vmv.v.x v8, a1
; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64V-NEXT: vse8.v v8, (a0)
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_seq_v16i8_v2i64:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: lui a1, %hi(.LCPI42_0)
; RV64ZVE32-NEXT: addi a1, a1, %lo(.LCPI42_0)
; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64ZVE32-NEXT: vle8.v v8, (a1)
; RV64ZVE32-NEXT: vse8.v v8, (a0)
; RV64ZVE32-NEXT: ret
store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr %x
ret void
}
define void @buildvec_seq2_v16i8_v2i64(ptr %x) {
; RV32-LABEL: buildvec_seq2_v16i8_v2i64:
; RV32: # %bb.0:
; RV32-NEXT: lui a1, 528432
; RV32-NEXT: addi a1, a1, 513
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vmv.v.x v8, a1
; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV32-NEXT: vse8.v v8, (a0)
; RV32-NEXT: ret
;
; RV64V-LABEL: buildvec_seq2_v16i8_v2i64:
; RV64V: # %bb.0:
; RV64V-NEXT: lui a1, 528432
; RV64V-NEXT: addiw a1, a1, 513
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64V-NEXT: vmv.v.x v8, a1
; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64V-NEXT: vse8.v v8, (a0)
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_seq2_v16i8_v2i64:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: lui a1, %hi(.LCPI43_0)
; RV64ZVE32-NEXT: addi a1, a1, %lo(.LCPI43_0)
; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64ZVE32-NEXT: vle8.v v8, (a1)
; RV64ZVE32-NEXT: vse8.v v8, (a0)
; RV64ZVE32-NEXT: ret
store <16 x i8> <i8 1, i8 2, i8 3, i8 129, i8 -1, i8 -1, i8 -1, i8 -1, i8 1, i8 2, i8 3, i8 129, i8 -1, i8 -1, i8 -1, i8 -1>, ptr %x
ret void
}
define void @buildvec_seq_v9i8(ptr %x) {
; CHECK-LABEL: buildvec_seq_v9i8:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 73
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, 3
; CHECK-NEXT: li a1, 146
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v8, a1
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vsetivli zero, 9, e8, m1, ta, ma
; CHECK-NEXT: vmerge.vim v8, v9, 2, v0
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
store <9 x i8> <i8 1, i8 2, i8 3, i8 1, i8 2, i8 3, i8 1, i8 2, i8 3>, ptr %x
ret void
}
define void @buildvec_seq_v4i16_v2i32(ptr %x) {
; CHECK-LABEL: buildvec_seq_v4i16_v2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, -127
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.x v8, a1
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
store <4 x i16> <i16 -127, i16 -1, i16 -127, i16 -1>, ptr %x
ret void
}
define void @buildvec_vid_step1o2_v4i32(ptr %z0, ptr %z1, ptr %z2, ptr %z3, ptr %z4, ptr %z5, ptr %z6) {
; RV32-LABEL: buildvec_vid_step1o2_v4i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vid.v v8
; RV32-NEXT: vsrl.vi v8, v8, 1
; RV32-NEXT: vse32.v v8, (a0)
; RV32-NEXT: vse32.v v8, (a1)
; RV32-NEXT: vmv.v.i v9, 1
; RV32-NEXT: vse32.v v8, (a2)
; RV32-NEXT: vse32.v v8, (a3)
; RV32-NEXT: vse32.v v8, (a4)
; RV32-NEXT: vmv.s.x v8, zero
; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV32-NEXT: vslideup.vi v9, v8, 1
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vse32.v v9, (a5)
; RV32-NEXT: vmv.v.i v8, 0
; RV32-NEXT: li a0, 1
; RV32-NEXT: vslide1down.vx v8, v8, a0
; RV32-NEXT: vse32.v v8, (a6)
; RV32-NEXT: ret
;
; RV64-LABEL: buildvec_vid_step1o2_v4i32:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vid.v v8
; RV64-NEXT: vsrl.vi v8, v8, 1
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: vmv.v.i v9, 1
; RV64-NEXT: vse32.v v8, (a1)
; RV64-NEXT: vse32.v v8, (a2)
; RV64-NEXT: vse32.v v8, (a3)
; RV64-NEXT: vse32.v v8, (a4)
; RV64-NEXT: vmv.s.x v8, zero
; RV64-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64-NEXT: vslideup.vi v9, v8, 1
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vse32.v v9, (a5)
; RV64-NEXT: vmv.v.i v8, 0
; RV64-NEXT: li a0, 1
; RV64-NEXT: vslide1down.vx v8, v8, a0
; RV64-NEXT: vse32.v v8, (a6)
; RV64-NEXT: ret
store <4 x i32> <i32 0, i32 0, i32 1, i32 1>, ptr %z0
store <4 x i32> <i32 0, i32 0, i32 1, i32 undef>, ptr %z1
store <4 x i32> <i32 0, i32 undef, i32 1, i32 1>, ptr %z2
store <4 x i32> <i32 undef, i32 0, i32 undef, i32 1>, ptr %z3
store <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>, ptr %z4
; We don't catch this one
store <4 x i32> <i32 undef, i32 0, i32 1, i32 1>, ptr %z5
; We catch this one but as VID/3 rather than VID/2
store <4 x i32> <i32 0, i32 0, i32 undef, i32 1>, ptr %z6
ret void
}
define void @buildvec_vid_step1o2_add3_v4i16(ptr %z0, ptr %z1, ptr %z2, ptr %z3, ptr %z4, ptr %z5, ptr %z6) {
; CHECK-LABEL: buildvec_vid_step1o2_add3_v4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vsrl.vi v8, v8, 1
; CHECK-NEXT: vadd.vi v8, v8, 3
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: vmv.v.i v9, 3
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: vse16.v v8, (a2)
; CHECK-NEXT: vse16.v v8, (a3)
; CHECK-NEXT: vse16.v v8, (a4)
; CHECK-NEXT: vmv.v.i v8, 4
; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v9, 1
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vse16.v v8, (a5)
; CHECK-NEXT: li a0, 4
; CHECK-NEXT: vslide1down.vx v8, v9, a0
; CHECK-NEXT: vse16.v v8, (a6)
; CHECK-NEXT: ret
store <4 x i16> <i16 3, i16 3, i16 4, i16 4>, ptr %z0
store <4 x i16> <i16 3, i16 3, i16 4, i16 undef>, ptr %z1
store <4 x i16> <i16 3, i16 undef, i16 4, i16 4>, ptr %z2
store <4 x i16> <i16 undef, i16 3, i16 undef, i16 4>, ptr %z3
store <4 x i16> <i16 3, i16 undef, i16 4, i16 undef>, ptr %z4
; We don't catch this one
store <4 x i16> <i16 undef, i16 3, i16 4, i16 4>, ptr %z5
; We catch this one but as VID/3 rather than VID/2
store <4 x i16> <i16 3, i16 3, i16 undef, i16 4>, ptr %z6
ret void
}
define void @buildvec_vid_stepn1o4_addn5_v8i8(ptr %z0) {
; CHECK-LABEL: buildvec_vid_stepn1o4_addn5_v8i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vsrl.vi v8, v8, 2
; CHECK-NEXT: vrsub.vi v8, v8, -5
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
store <8 x i8> <i8 -5, i8 -5, i8 -5, i8 -5, i8 -6, i8 -6, i8 -6, i8 -6>, ptr %z0
ret void
}
define void @buildvec_vid_mpy_imm_v8i16(ptr %x) {
; CHECK-LABEL: buildvec_vid_mpy_imm_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: li a1, 17
; CHECK-NEXT: vmul.vx v8, v8, a1
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
store <8 x i16> <i16 0, i16 17, i16 34, i16 51, i16 68, i16 85, i16 102, i16 119>, ptr %x
ret void
}
define void @buildvec_vid_shl_imm_v8i16(ptr %x) {
; CHECK-LABEL: buildvec_vid_shl_imm_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vsll.vi v8, v8, 9
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
store <8 x i16> <i16 0, i16 512, i16 1024, i16 1536, i16 2048, i16 2560, i16 3072, i16 3584>, ptr %x
ret void
}
define <4 x i32> @splat_c3_v4i32(<4 x i32> %v) {
; CHECK-LABEL: splat_c3_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vrgather.vi v9, v8, 3
; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
%x = extractelement <4 x i32> %v, i32 3
%ins = insertelement <4 x i32> poison, i32 %x, i32 0
%splat = shufflevector <4 x i32> %ins, <4 x i32> poison, <4 x i32> zeroinitializer
ret <4 x i32> %splat
}
define <4 x i32> @splat_idx_v4i32(<4 x i32> %v, i64 %idx) {
; CHECK-LABEL: splat_idx_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vrgather.vx v9, v8, a0
; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
%x = extractelement <4 x i32> %v, i64 %idx
%ins = insertelement <4 x i32> poison, i32 %x, i32 0
%splat = shufflevector <4 x i32> %ins, <4 x i32> poison, <4 x i32> zeroinitializer
ret <4 x i32> %splat
}
define <8 x i16> @splat_c4_v8i16(<8 x i16> %v) {
; CHECK-LABEL: splat_c4_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vrgather.vi v9, v8, 4
; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
%x = extractelement <8 x i16> %v, i32 4
%ins = insertelement <8 x i16> poison, i16 %x, i32 0
%splat = shufflevector <8 x i16> %ins, <8 x i16> poison, <8 x i32> zeroinitializer
ret <8 x i16> %splat
}
define <8 x i16> @splat_idx_v8i16(<8 x i16> %v, i64 %idx) {
; CHECK-LABEL: splat_idx_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vrgather.vx v9, v8, a0
; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
%x = extractelement <8 x i16> %v, i64 %idx
%ins = insertelement <8 x i16> poison, i16 %x, i32 0
%splat = shufflevector <8 x i16> %ins, <8 x i16> poison, <8 x i32> zeroinitializer
ret <8 x i16> %splat
}
define <4 x i8> @buildvec_not_vid_v4i8_1() {
; CHECK-LABEL: buildvec_not_vid_v4i8_1:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 12320
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
ret <4 x i8> <i8 0, i8 0, i8 2, i8 3>
}
define <4 x i8> @buildvec_not_vid_v4i8_2() {
; CHECK-LABEL: buildvec_not_vid_v4i8_2:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 16
; CHECK-NEXT: addi a0, a0, 771
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
ret <4 x i8> <i8 3, i8 3, i8 1, i8 0>
}
; We match this as a VID sequence (-3 / 8) + 5 but choose not to introduce
; division to compute it.
define <16 x i8> @buildvec_not_vid_v16i8() {
; CHECK-LABEL: buildvec_not_vid_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, 3
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vsetivli zero, 7, e8, m1, tu, ma
; CHECK-NEXT: vslideup.vi v8, v9, 6
; CHECK-NEXT: ret
ret <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 3, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0>
}
define <512 x i8> @buildvec_vid_v512i8_indices_overflow() vscale_range(16, 1024) {
; CHECK-LABEL: buildvec_vid_v512i8_indices_overflow:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 512
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: ret
ret <512 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39, i8 40, i8 41, i8 42, i8 43, i8 44, i8 45, i8 46, i8 47, i8 48, i8 49, i8 50, i8 51, i8 52, i8 53, i8 54, i8 55, i8 56, i8 57, i8 58, i8 59, i8 60, i8 61, i8 62, i8 63, i8 64, i8 65, i8 66, i8 67, i8 68, i8 69, i8 70, i8 71, i8 72, i8 73, i8 74, i8 75, i8 76, i8 77, i8 78, i8 79, i8 80, i8 81, i8 82, i8 83, i8 84, i8 85, i8 86, i8 87, i8 88, i8 89, i8 90, i8 91, i8 92, i8 93, i8 94, i8 95, i8 96, i8 97, i8 98, i8 99, i8 100, i8 101, i8 102, i8 103, i8 104, i8 105, i8 106, i8 107, i8 108, i8 109, i8 110, i8 111, i8 112, i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 128, i8 129, i8 130, i8 131, i8 132, i8 133, i8 134, i8 135, i8 136, i8 137, i8 138, i8 139, i8 140, i8 141, i8 142, i8 143, i8 144, i8 145, i8 146, i8 147, i8 148, i8 149, i8 150, i8 151, i8 152, i8 153, i8 154, i8 155, i8 156, i8 157, i8 158, i8 159, i8 160, i8 161, i8 162, i8 163, i8 164, i8 165, i8 166, i8 167, i8 168, i8 169, i8 170, i8 171, i8 172, i8 173, i8 174, i8 175, i8 176, i8 177, i8 178, i8 179, i8 180, i8 181, i8 182, i8 183, i8 184, i8 185, i8 186, i8 187, i8 188, i8 189, i8 190, i8 191, i8 192, i8 193, i8 194, i8 195, i8 196, i8 197, i8 198, i8 199, i8 200, i8 201, i8 202, i8 203, i8 204, i8 205, i8 206, i8 207, i8 208, i8 209, i8 210, i8 211, i8 212, i8 213, i8 214, i8 215, i8 216, i8 217, i8 218, i8 219, i8 220, i8 221, i8 222, i8 223, i8 224, i8 225, i8 226, i8 227, i8 228, i8 229, i8 230, i8 231, i8 232, i8 233, i8 234, i8 235, i8 236, i8 237, i8 238, i8 239, i8 240, i8 241, i8 242, i8 243, i8 244, i8 245, i8 246, i8 247, i8 248, i8 249, i8 250, i8 251, i8 252, i8 253, i8 254, i8 255, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39, i8 40, i8 41, i8 42, i8 43, i8 44, i8 45, i8 46, i8 47, i8 48, i8 49, i8 50, i8 51, i8 52, i8 53, i8 54, i8 55, i8 56, i8 57, i8 58, i8 59, i8 60, i8 61, i8 62, i8 63, i8 64, i8 65, i8 66, i8 67, i8 68, i8 69, i8 70, i8 71, i8 72, i8 73, i8 74, i8 75, i8 76, i8 77, i8 78, i8 79, i8 80, i8 81, i8 82, i8 83, i8 84, i8 85, i8 86, i8 87, i8 88, i8 89, i8 90, i8 91, i8 92, i8 93, i8 94, i8 95, i8 96, i8 97, i8 98, i8 99, i8 100, i8 101, i8 102, i8 103, i8 104, i8 105, i8 106, i8 107, i8 108, i8 109, i8 110, i8 111, i8 112, i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 128, i8 129, i8 130, i8 131, i8 132, i8 133, i8 134, i8 135, i8 136, i8 137, i8 138, i8 139, i8 140, i8 141, i8 142, i8 143, i8 144, i8 145, i8 146, i8 147, i8 148, i8 149, i8 150, i8 151, i8 152, i8 153, i8 154, i8 155, i8 156, i8 157, i8 158, i8 159, i8 160, i8 161, i8 162, i8 163, i8 164, i8 165, i8 166, i8 167, i8 168, i8 169, i8 170, i8 171, i8 172, i8 173, i8 174, i8 175, i8 176, i8 177, i8 178, i8 179, i8 180, i8 181, i8 182, i8 183, i8 184, i8 185, i8 186, i8 187, i8 188, i8 189, i8 190, i8 191, i8 192, i8 193, i8 194, i8 195, i8 196, i8 197, i8 198, i8 199, i8 200, i8 201, i8 202, i8 203, i8 204, i8 205, i8 206, i8 207, i8 208, i8 209, i8 210, i8 211, i8 212, i8 213, i8 214, i8 215, i8 216, i8 217, i8 218, i8 219, i8 220, i8 221, i8 222, i8 223, i8 224, i8 225, i8 226, i8 227, i8 228, i8 229, i8 230, i8 231, i8 232, i8 233, i8 234, i8 235, i8 236, i8 237, i8 238, i8 239, i8 240, i8 241, i8 242, i8 243, i8 244, i8 245, i8 246, i8 247, i8 248, i8 249, i8 250, i8 251, i8 252, i8 253, i8 254, i8 255>
}
define <512 x i8> @buildvec_not_vid_v512i8_indices_overflow_1() vscale_range(16, 1024) {
; RV32-LABEL: buildvec_not_vid_v512i8_indices_overflow_1:
; RV32: # %bb.0:
; RV32-NEXT: li a0, 512
; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma
; RV32-NEXT: vid.v v8
; RV32-NEXT: vsrl.vi v8, v8, 3
; RV32-NEXT: vadd.vi v0, v8, -1
; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV32-NEXT: vmv.v.i v8, 1
; RV32-NEXT: vmerge.vim v8, v8, 0, v0
; RV32-NEXT: ret
;
; RV64V-LABEL: buildvec_not_vid_v512i8_indices_overflow_1:
; RV64V: # %bb.0:
; RV64V-NEXT: li a0, 512
; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; RV64V-NEXT: vid.v v8
; RV64V-NEXT: vsrl.vi v8, v8, 2
; RV64V-NEXT: vadd.vi v0, v8, -1
; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV64V-NEXT: vmv.v.i v8, 1
; RV64V-NEXT: vmerge.vim v8, v8, 0, v0
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_not_vid_v512i8_indices_overflow_1:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: li a0, 512
; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma
; RV64ZVE32-NEXT: vid.v v8
; RV64ZVE32-NEXT: vsrl.vi v8, v8, 3
; RV64ZVE32-NEXT: vadd.vi v0, v8, -1
; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV64ZVE32-NEXT: vmv.v.i v8, 1
; RV64ZVE32-NEXT: vmerge.vim v8, v8, 0, v0
; RV64ZVE32-NEXT: ret
ret <512 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
}
define <512 x i8> @buildvec_not_vid_v512i8_indices_overflow_2() vscale_range(16, 1024) {
; RV32-LABEL: buildvec_not_vid_v512i8_indices_overflow_2:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma
; RV32-NEXT: vmv.v.i v0, 15
; RV32-NEXT: vmv.v.i v9, 0
; RV32-NEXT: vmerge.vim v10, v9, -1, v0
; RV32-NEXT: li a0, 512
; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV32-NEXT: vmv.v.i v12, 3
; RV32-NEXT: li a1, 240
; RV32-NEXT: vsetvli zero, zero, e16, m8, ta, ma
; RV32-NEXT: vmv.s.x v8, a1
; RV32-NEXT: vmv1r.v v0, v10
; RV32-NEXT: vsetvli zero, zero, e8, m4, ta, ma
; RV32-NEXT: vmerge.vim v12, v12, 0, v0
; RV32-NEXT: vmv1r.v v0, v8
; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma
; RV32-NEXT: vmerge.vim v10, v9, -1, v0
; RV32-NEXT: li a1, 15
; RV32-NEXT: slli a1, a1, 8
; RV32-NEXT: vmv.s.x v8, a1
; RV32-NEXT: vmv1r.v v0, v10
; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV32-NEXT: vmerge.vim v12, v12, 1, v0
; RV32-NEXT: vmv1r.v v0, v8
; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma
; RV32-NEXT: vmerge.vim v8, v9, -1, v0
; RV32-NEXT: vmv1r.v v0, v8
; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV32-NEXT: vmerge.vim v8, v12, 2, v0
; RV32-NEXT: ret
;
; RV64V-LABEL: buildvec_not_vid_v512i8_indices_overflow_2:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; RV64V-NEXT: vmv.v.i v0, 3
; RV64V-NEXT: vmv.v.i v9, 0
; RV64V-NEXT: vmerge.vim v10, v9, -1, v0
; RV64V-NEXT: li a0, 512
; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV64V-NEXT: vmv.v.i v12, 3
; RV64V-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; RV64V-NEXT: vmv.v.i v8, 12
; RV64V-NEXT: vmv1r.v v0, v10
; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV64V-NEXT: vmerge.vim v12, v12, 0, v0
; RV64V-NEXT: vmv1r.v v0, v8
; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; RV64V-NEXT: vmerge.vim v10, v9, -1, v0
; RV64V-NEXT: li a1, 48
; RV64V-NEXT: vmv.s.x v8, a1
; RV64V-NEXT: vmv.v.v v0, v10
; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV64V-NEXT: vmerge.vim v12, v12, 1, v0
; RV64V-NEXT: vmv1r.v v0, v8
; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; RV64V-NEXT: vmerge.vim v8, v9, -1, v0
; RV64V-NEXT: vmv.v.v v0, v8
; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV64V-NEXT: vmerge.vim v8, v12, 2, v0
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_not_vid_v512i8_indices_overflow_2:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma
; RV64ZVE32-NEXT: vmv.v.i v0, 15
; RV64ZVE32-NEXT: vmv.v.i v9, 0
; RV64ZVE32-NEXT: vmerge.vim v10, v9, -1, v0
; RV64ZVE32-NEXT: li a0, 512
; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV64ZVE32-NEXT: vmv.v.i v12, 3
; RV64ZVE32-NEXT: li a1, 240
; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m8, ta, ma
; RV64ZVE32-NEXT: vmv.s.x v8, a1
; RV64ZVE32-NEXT: vmv1r.v v0, v10
; RV64ZVE32-NEXT: vsetvli zero, zero, e8, m4, ta, ma
; RV64ZVE32-NEXT: vmerge.vim v12, v12, 0, v0
; RV64ZVE32-NEXT: vmv1r.v v0, v8
; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma
; RV64ZVE32-NEXT: vmerge.vim v10, v9, -1, v0
; RV64ZVE32-NEXT: li a1, 15
; RV64ZVE32-NEXT: slli a1, a1, 8
; RV64ZVE32-NEXT: vmv.s.x v8, a1
; RV64ZVE32-NEXT: vmv.v.v v0, v10
; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV64ZVE32-NEXT: vmerge.vim v12, v12, 1, v0
; RV64ZVE32-NEXT: vmv1r.v v0, v8
; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma
; RV64ZVE32-NEXT: vmerge.vim v8, v9, -1, v0
; RV64ZVE32-NEXT: vmv.v.v v0, v8
; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV64ZVE32-NEXT: vmerge.vim v8, v12, 2, v0
; RV64ZVE32-NEXT: ret
ret <512 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
}
define <8 x i32> @prefix_overwrite(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: prefix_overwrite:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vmv.s.x v10, a1
; CHECK-NEXT: vslideup.vi v8, v10, 1
; CHECK-NEXT: vmv.s.x v10, a2
; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 2
; CHECK-NEXT: vmv.s.x v10, a3
; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 3
; CHECK-NEXT: ret
%v0 = insertelement <8 x i32> %vin, i32 %a, i32 0
%v1 = insertelement <8 x i32> %v0, i32 %b, i32 1
%v2 = insertelement <8 x i32> %v1, i32 %c, i32 2
%v3 = insertelement <8 x i32> %v2, i32 %d, i32 3
ret <8 x i32> %v3
}
define <8 x i32> @suffix_overwrite(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: suffix_overwrite:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vslideup.vi v8, v10, 4
; CHECK-NEXT: vmv.s.x v10, a1
; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 5
; CHECK-NEXT: vmv.s.x v10, a2
; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 6
; CHECK-NEXT: vmv.s.x v10, a3
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v10, 7
; CHECK-NEXT: ret
%v0 = insertelement <8 x i32> %vin, i32 %a, i32 4
%v1 = insertelement <8 x i32> %v0, i32 %b, i32 5
%v2 = insertelement <8 x i32> %v1, i32 %c, i32 6
%v3 = insertelement <8 x i32> %v2, i32 %d, i32 7
ret <8 x i32> %v3
}
define <4 x i64> @v4xi64_exact(i64 %a, i64 %b, i64 %c, i64 %d) vscale_range(2,2) {
; RV32-LABEL: v4xi64_exact:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vmv.v.x v8, a4
; RV32-NEXT: vslide1down.vx v8, v8, a5
; RV32-NEXT: vslide1down.vx v8, v8, a6
; RV32-NEXT: vslide1down.vx v9, v8, a7
; RV32-NEXT: vmv.v.x v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: vslide1down.vx v8, v8, a2
; RV32-NEXT: vslide1down.vx v8, v8, a3
; RV32-NEXT: ret
;
; RV64V-LABEL: v4xi64_exact:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64V-NEXT: vmv.v.x v8, a2
; RV64V-NEXT: vslide1down.vx v9, v8, a3
; RV64V-NEXT: vmv.v.x v8, a0
; RV64V-NEXT: vslide1down.vx v8, v8, a1
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: v4xi64_exact:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: sd a4, 24(a0)
; RV64ZVE32-NEXT: sd a3, 16(a0)
; RV64ZVE32-NEXT: sd a2, 8(a0)
; RV64ZVE32-NEXT: sd a1, 0(a0)
; RV64ZVE32-NEXT: ret
%v1 = insertelement <4 x i64> poison, i64 %a, i32 0
%v2 = insertelement <4 x i64> %v1, i64 %b, i32 1
%v3 = insertelement <4 x i64> %v2, i64 %c, i32 2
%v4 = insertelement <4 x i64> %v3, i64 %d, i32 3
ret <4 x i64> %v4
}
define <8 x i64> @v8xi64_exact(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h) vscale_range(2,2) {
; RV32-LABEL: v8xi64_exact:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset s0, -4
; RV32-NEXT: lw t0, 44(sp)
; RV32-NEXT: lw t1, 40(sp)
; RV32-NEXT: lw t2, 36(sp)
; RV32-NEXT: lw t3, 32(sp)
; RV32-NEXT: lw t4, 28(sp)
; RV32-NEXT: lw t5, 24(sp)
; RV32-NEXT: lw t6, 20(sp)
; RV32-NEXT: lw s0, 16(sp)
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vmv.v.x v8, a4
; RV32-NEXT: vslide1down.vx v8, v8, a5
; RV32-NEXT: vslide1down.vx v8, v8, a6
; RV32-NEXT: vslide1down.vx v9, v8, a7
; RV32-NEXT: vmv.v.x v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: vslide1down.vx v8, v8, a2
; RV32-NEXT: vslide1down.vx v8, v8, a3
; RV32-NEXT: vmv.v.x v10, s0
; RV32-NEXT: vslide1down.vx v10, v10, t6
; RV32-NEXT: vslide1down.vx v10, v10, t5
; RV32-NEXT: vslide1down.vx v10, v10, t4
; RV32-NEXT: vmv.v.x v11, t3
; RV32-NEXT: vslide1down.vx v11, v11, t2
; RV32-NEXT: vslide1down.vx v11, v11, t1
; RV32-NEXT: vslide1down.vx v11, v11, t0
; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64V-LABEL: v8xi64_exact:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64V-NEXT: vmv.v.x v8, a2
; RV64V-NEXT: vslide1down.vx v9, v8, a3
; RV64V-NEXT: vmv.v.x v8, a0
; RV64V-NEXT: vslide1down.vx v8, v8, a1
; RV64V-NEXT: vmv.v.x v10, a4
; RV64V-NEXT: vslide1down.vx v10, v10, a5
; RV64V-NEXT: vmv.v.x v11, a6
; RV64V-NEXT: vslide1down.vx v11, v11, a7
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: v8xi64_exact:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: ld t0, 0(sp)
; RV64ZVE32-NEXT: sd t0, 56(a0)
; RV64ZVE32-NEXT: sd a7, 48(a0)
; RV64ZVE32-NEXT: sd a6, 40(a0)
; RV64ZVE32-NEXT: sd a5, 32(a0)
; RV64ZVE32-NEXT: sd a4, 24(a0)
; RV64ZVE32-NEXT: sd a3, 16(a0)
; RV64ZVE32-NEXT: sd a2, 8(a0)
; RV64ZVE32-NEXT: sd a1, 0(a0)
; RV64ZVE32-NEXT: ret
%v1 = insertelement <8 x i64> poison, i64 %a, i32 0
%v2 = insertelement <8 x i64> %v1, i64 %b, i32 1
%v3 = insertelement <8 x i64> %v2, i64 %c, i32 2
%v4 = insertelement <8 x i64> %v3, i64 %d, i32 3
%v5 = insertelement <8 x i64> %v4, i64 %e, i32 4
%v6 = insertelement <8 x i64> %v5, i64 %f, i32 5
%v7 = insertelement <8 x i64> %v6, i64 %g, i32 6
%v8 = insertelement <8 x i64> %v7, i64 %h, i32 7
ret <8 x i64> %v8
}
define <8 x i64> @v8xi64_exact_equal_halves(i64 %a, i64 %b, i64 %c, i64 %d) vscale_range(2,2) {
; RV32-LABEL: v8xi64_exact_equal_halves:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vmv.v.x v8, a4
; RV32-NEXT: vslide1down.vx v8, v8, a5
; RV32-NEXT: vslide1down.vx v8, v8, a6
; RV32-NEXT: vslide1down.vx v9, v8, a7
; RV32-NEXT: vmv.v.x v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: vslide1down.vx v8, v8, a2
; RV32-NEXT: vslide1down.vx v8, v8, a3
; RV32-NEXT: vmv.v.v v10, v8
; RV32-NEXT: vmv.v.v v11, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: v8xi64_exact_equal_halves:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64V-NEXT: vmv.v.x v8, a2
; RV64V-NEXT: vslide1down.vx v9, v8, a3
; RV64V-NEXT: vmv.v.x v8, a0
; RV64V-NEXT: vslide1down.vx v8, v8, a1
; RV64V-NEXT: vmv.v.v v10, v8
; RV64V-NEXT: vmv.v.v v11, v9
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: v8xi64_exact_equal_halves:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: sd a4, 56(a0)
; RV64ZVE32-NEXT: sd a3, 48(a0)
; RV64ZVE32-NEXT: sd a2, 40(a0)
; RV64ZVE32-NEXT: sd a1, 32(a0)
; RV64ZVE32-NEXT: sd a4, 24(a0)
; RV64ZVE32-NEXT: sd a3, 16(a0)
; RV64ZVE32-NEXT: sd a2, 8(a0)
; RV64ZVE32-NEXT: sd a1, 0(a0)
; RV64ZVE32-NEXT: ret
%v1 = insertelement <8 x i64> poison, i64 %a, i32 0
%v2 = insertelement <8 x i64> %v1, i64 %b, i32 1
%v3 = insertelement <8 x i64> %v2, i64 %c, i32 2
%v4 = insertelement <8 x i64> %v3, i64 %d, i32 3
%v5 = insertelement <8 x i64> %v4, i64 %a, i32 4
%v6 = insertelement <8 x i64> %v5, i64 %b, i32 5
%v7 = insertelement <8 x i64> %v6, i64 %c, i32 6
%v8 = insertelement <8 x i64> %v7, i64 %d, i32 7
ret <8 x i64> %v8
}
define <8 x i64> @v8xi64_exact_undef_suffix(i64 %a, i64 %b, i64 %c, i64 %d) vscale_range(2,2) {
; RV32-LABEL: v8xi64_exact_undef_suffix:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vmv.v.x v8, a4
; RV32-NEXT: vslide1down.vx v8, v8, a5
; RV32-NEXT: vslide1down.vx v8, v8, a6
; RV32-NEXT: vslide1down.vx v9, v8, a7
; RV32-NEXT: vmv.v.x v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: vslide1down.vx v8, v8, a2
; RV32-NEXT: vslide1down.vx v8, v8, a3
; RV32-NEXT: ret
;
; RV64V-LABEL: v8xi64_exact_undef_suffix:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64V-NEXT: vmv.v.x v8, a2
; RV64V-NEXT: vslide1down.vx v9, v8, a3
; RV64V-NEXT: vmv.v.x v8, a0
; RV64V-NEXT: vslide1down.vx v8, v8, a1
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: v8xi64_exact_undef_suffix:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: sd a4, 24(a0)
; RV64ZVE32-NEXT: sd a3, 16(a0)
; RV64ZVE32-NEXT: sd a2, 8(a0)
; RV64ZVE32-NEXT: sd a1, 0(a0)
; RV64ZVE32-NEXT: ret
%v1 = insertelement <8 x i64> poison, i64 %a, i32 0
%v2 = insertelement <8 x i64> %v1, i64 %b, i32 1
%v3 = insertelement <8 x i64> %v2, i64 %c, i32 2
%v4 = insertelement <8 x i64> %v3, i64 %d, i32 3
ret <8 x i64> %v4
}
define <8 x i64> @v8xi64_exact_undef_prefix(i64 %a, i64 %b, i64 %c, i64 %d) vscale_range(2,2) {
; RV32-LABEL: v8xi64_exact_undef_prefix:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vmv.v.x v8, a4
; RV32-NEXT: vslide1down.vx v8, v8, a5
; RV32-NEXT: vslide1down.vx v8, v8, a6
; RV32-NEXT: vslide1down.vx v11, v8, a7
; RV32-NEXT: vmv.v.x v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: vslide1down.vx v8, v8, a2
; RV32-NEXT: vslide1down.vx v10, v8, a3
; RV32-NEXT: ret
;
; RV64V-LABEL: v8xi64_exact_undef_prefix:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64V-NEXT: vmv.v.x v8, a2
; RV64V-NEXT: vslide1down.vx v11, v8, a3
; RV64V-NEXT: vmv.v.x v8, a0
; RV64V-NEXT: vslide1down.vx v10, v8, a1
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: v8xi64_exact_undef_prefix:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: sd a4, 56(a0)
; RV64ZVE32-NEXT: sd a3, 48(a0)
; RV64ZVE32-NEXT: sd a2, 40(a0)
; RV64ZVE32-NEXT: sd a1, 32(a0)
; RV64ZVE32-NEXT: ret
%v1 = insertelement <8 x i64> poison, i64 %a, i32 4
%v2 = insertelement <8 x i64> %v1, i64 %b, i32 5
%v3 = insertelement <8 x i64> %v2, i64 %c, i32 6
%v4 = insertelement <8 x i64> %v3, i64 %d, i32 7
ret <8 x i64> %v4
}
define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) {
; RV32-ONLY-LABEL: buildvec_v16i8_loads_contigous:
; RV32-ONLY: # %bb.0:
; RV32-ONLY-NEXT: addi sp, sp, -16
; RV32-ONLY-NEXT: .cfi_def_cfa_offset 16
; RV32-ONLY-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32-ONLY-NEXT: .cfi_offset s0, -4
; RV32-ONLY-NEXT: lbu a1, 0(a0)
; RV32-ONLY-NEXT: lbu a2, 1(a0)
; RV32-ONLY-NEXT: lbu a3, 2(a0)
; RV32-ONLY-NEXT: lbu a4, 3(a0)
; RV32-ONLY-NEXT: lbu a5, 4(a0)
; RV32-ONLY-NEXT: lbu a6, 5(a0)
; RV32-ONLY-NEXT: lbu a7, 6(a0)
; RV32-ONLY-NEXT: lbu t0, 7(a0)
; RV32-ONLY-NEXT: lbu t1, 8(a0)
; RV32-ONLY-NEXT: lbu t2, 9(a0)
; RV32-ONLY-NEXT: lbu t3, 10(a0)
; RV32-ONLY-NEXT: lbu t4, 11(a0)
; RV32-ONLY-NEXT: lbu t5, 12(a0)
; RV32-ONLY-NEXT: lbu t6, 13(a0)
; RV32-ONLY-NEXT: lbu s0, 14(a0)
; RV32-ONLY-NEXT: lbu a0, 15(a0)
; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV32-ONLY-NEXT: vmv.v.x v8, a1
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7
; RV32-ONLY-NEXT: vslide1down.vx v9, v8, t0
; RV32-ONLY-NEXT: vmv.v.x v8, t1
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t2
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t3
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t4
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t5
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t6
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, s0
; RV32-ONLY-NEXT: li a1, 255
; RV32-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV32-ONLY-NEXT: vmv.s.x v0, a1
; RV32-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV32-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV32-ONLY-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32-ONLY-NEXT: addi sp, sp, 16
; RV32-ONLY-NEXT: ret
;
; RV32VB-LABEL: buildvec_v16i8_loads_contigous:
; RV32VB: # %bb.0:
; RV32VB-NEXT: lbu a1, 1(a0)
; RV32VB-NEXT: lbu a2, 0(a0)
; RV32VB-NEXT: lbu a3, 2(a0)
; RV32VB-NEXT: lbu a4, 3(a0)
; RV32VB-NEXT: slli a1, a1, 8
; RV32VB-NEXT: or a1, a2, a1
; RV32VB-NEXT: slli a3, a3, 16
; RV32VB-NEXT: slli a4, a4, 24
; RV32VB-NEXT: or a3, a4, a3
; RV32VB-NEXT: or a1, a1, a3
; RV32VB-NEXT: lbu a2, 5(a0)
; RV32VB-NEXT: lbu a3, 4(a0)
; RV32VB-NEXT: lbu a4, 6(a0)
; RV32VB-NEXT: lbu a5, 7(a0)
; RV32VB-NEXT: slli a2, a2, 8
; RV32VB-NEXT: or a2, a3, a2
; RV32VB-NEXT: slli a4, a4, 16
; RV32VB-NEXT: slli a5, a5, 24
; RV32VB-NEXT: or a4, a5, a4
; RV32VB-NEXT: or a2, a2, a4
; RV32VB-NEXT: lbu a3, 9(a0)
; RV32VB-NEXT: lbu a4, 8(a0)
; RV32VB-NEXT: lbu a5, 10(a0)
; RV32VB-NEXT: lbu a6, 11(a0)
; RV32VB-NEXT: slli a3, a3, 8
; RV32VB-NEXT: or a3, a4, a3
; RV32VB-NEXT: slli a5, a5, 16
; RV32VB-NEXT: slli a6, a6, 24
; RV32VB-NEXT: or a4, a6, a5
; RV32VB-NEXT: or a3, a3, a4
; RV32VB-NEXT: lbu a4, 13(a0)
; RV32VB-NEXT: lbu a5, 12(a0)
; RV32VB-NEXT: lbu a6, 14(a0)
; RV32VB-NEXT: lbu a0, 15(a0)
; RV32VB-NEXT: slli a4, a4, 8
; RV32VB-NEXT: or a4, a5, a4
; RV32VB-NEXT: slli a6, a6, 16
; RV32VB-NEXT: slli a0, a0, 24
; RV32VB-NEXT: or a0, a0, a6
; RV32VB-NEXT: or a0, a4, a0
; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-NEXT: vmv.v.x v8, a1
; RV32VB-NEXT: vslide1down.vx v8, v8, a2
; RV32VB-NEXT: vslide1down.vx v8, v8, a3
; RV32VB-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-NEXT: ret
;
; RV32VB-PACK-LABEL: buildvec_v16i8_loads_contigous:
; RV32VB-PACK: # %bb.0:
; RV32VB-PACK-NEXT: lbu a1, 0(a0)
; RV32VB-PACK-NEXT: lbu a2, 1(a0)
; RV32VB-PACK-NEXT: lbu a3, 2(a0)
; RV32VB-PACK-NEXT: lbu a4, 3(a0)
; RV32VB-PACK-NEXT: packh a1, a1, a2
; RV32VB-PACK-NEXT: packh a2, a3, a4
; RV32VB-PACK-NEXT: pack a1, a1, a2
; RV32VB-PACK-NEXT: lbu a2, 4(a0)
; RV32VB-PACK-NEXT: lbu a3, 5(a0)
; RV32VB-PACK-NEXT: lbu a4, 6(a0)
; RV32VB-PACK-NEXT: lbu a5, 7(a0)
; RV32VB-PACK-NEXT: lbu a6, 8(a0)
; RV32VB-PACK-NEXT: lbu a7, 9(a0)
; RV32VB-PACK-NEXT: packh a2, a2, a3
; RV32VB-PACK-NEXT: packh a3, a4, a5
; RV32VB-PACK-NEXT: pack a2, a2, a3
; RV32VB-PACK-NEXT: packh a3, a6, a7
; RV32VB-PACK-NEXT: lbu a4, 10(a0)
; RV32VB-PACK-NEXT: lbu a5, 11(a0)
; RV32VB-PACK-NEXT: lbu a6, 12(a0)
; RV32VB-PACK-NEXT: lbu a7, 13(a0)
; RV32VB-PACK-NEXT: lbu t0, 14(a0)
; RV32VB-PACK-NEXT: lbu a0, 15(a0)
; RV32VB-PACK-NEXT: packh a4, a4, a5
; RV32VB-PACK-NEXT: pack a3, a3, a4
; RV32VB-PACK-NEXT: packh a4, a6, a7
; RV32VB-PACK-NEXT: packh a0, t0, a0
; RV32VB-PACK-NEXT: pack a0, a4, a0
; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-PACK-NEXT: vmv.v.x v8, a1
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a3
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-PACK-NEXT: ret
;
; RV64V-ONLY-LABEL: buildvec_v16i8_loads_contigous:
; RV64V-ONLY: # %bb.0:
; RV64V-ONLY-NEXT: addi sp, sp, -16
; RV64V-ONLY-NEXT: .cfi_def_cfa_offset 16
; RV64V-ONLY-NEXT: sd s0, 8(sp) # 8-byte Folded Spill
; RV64V-ONLY-NEXT: .cfi_offset s0, -8
; RV64V-ONLY-NEXT: lbu a1, 0(a0)
; RV64V-ONLY-NEXT: lbu a2, 1(a0)
; RV64V-ONLY-NEXT: lbu a3, 2(a0)
; RV64V-ONLY-NEXT: lbu a4, 3(a0)
; RV64V-ONLY-NEXT: lbu a5, 4(a0)
; RV64V-ONLY-NEXT: lbu a6, 5(a0)
; RV64V-ONLY-NEXT: lbu a7, 6(a0)
; RV64V-ONLY-NEXT: lbu t0, 7(a0)
; RV64V-ONLY-NEXT: lbu t1, 8(a0)
; RV64V-ONLY-NEXT: lbu t2, 9(a0)
; RV64V-ONLY-NEXT: lbu t3, 10(a0)
; RV64V-ONLY-NEXT: lbu t4, 11(a0)
; RV64V-ONLY-NEXT: lbu t5, 12(a0)
; RV64V-ONLY-NEXT: lbu t6, 13(a0)
; RV64V-ONLY-NEXT: lbu s0, 14(a0)
; RV64V-ONLY-NEXT: lbu a0, 15(a0)
; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64V-ONLY-NEXT: vmv.v.x v8, a1
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
; RV64V-ONLY-NEXT: vslide1down.vx v9, v8, t0
; RV64V-ONLY-NEXT: vmv.v.x v8, t1
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t2
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t3
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t4
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t5
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t6
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, s0
; RV64V-ONLY-NEXT: li a1, 255
; RV64V-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64V-ONLY-NEXT: vmv.s.x v0, a1
; RV64V-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV64V-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV64V-ONLY-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
; RV64V-ONLY-NEXT: addi sp, sp, 16
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_v16i8_loads_contigous:
; RVA22U64: # %bb.0:
; RVA22U64-NEXT: lbu a1, 1(a0)
; RVA22U64-NEXT: lbu a2, 0(a0)
; RVA22U64-NEXT: lbu a3, 2(a0)
; RVA22U64-NEXT: lbu a4, 3(a0)
; RVA22U64-NEXT: slli a1, a1, 8
; RVA22U64-NEXT: or a1, a1, a2
; RVA22U64-NEXT: slli a3, a3, 16
; RVA22U64-NEXT: slli a4, a4, 24
; RVA22U64-NEXT: or a3, a3, a4
; RVA22U64-NEXT: lbu a2, 4(a0)
; RVA22U64-NEXT: or a1, a1, a3
; RVA22U64-NEXT: lbu a3, 5(a0)
; RVA22U64-NEXT: lbu a4, 6(a0)
; RVA22U64-NEXT: slli a2, a2, 32
; RVA22U64-NEXT: lbu a5, 7(a0)
; RVA22U64-NEXT: slli a3, a3, 40
; RVA22U64-NEXT: or a2, a2, a3
; RVA22U64-NEXT: slli a4, a4, 48
; RVA22U64-NEXT: slli a5, a5, 56
; RVA22U64-NEXT: or a4, a4, a5
; RVA22U64-NEXT: or a2, a2, a4
; RVA22U64-NEXT: or a1, a1, a2
; RVA22U64-NEXT: lbu a2, 9(a0)
; RVA22U64-NEXT: lbu a3, 8(a0)
; RVA22U64-NEXT: lbu a4, 10(a0)
; RVA22U64-NEXT: lbu a5, 11(a0)
; RVA22U64-NEXT: slli a2, a2, 8
; RVA22U64-NEXT: or a2, a2, a3
; RVA22U64-NEXT: slli a4, a4, 16
; RVA22U64-NEXT: slli a5, a5, 24
; RVA22U64-NEXT: or a4, a4, a5
; RVA22U64-NEXT: lbu a3, 12(a0)
; RVA22U64-NEXT: or a2, a2, a4
; RVA22U64-NEXT: lbu a4, 13(a0)
; RVA22U64-NEXT: lbu a5, 14(a0)
; RVA22U64-NEXT: slli a3, a3, 32
; RVA22U64-NEXT: lbu a0, 15(a0)
; RVA22U64-NEXT: slli a4, a4, 40
; RVA22U64-NEXT: or a3, a3, a4
; RVA22U64-NEXT: slli a5, a5, 48
; RVA22U64-NEXT: slli a0, a0, 56
; RVA22U64-NEXT: or a0, a0, a5
; RVA22U64-NEXT: or a0, a0, a3
; RVA22U64-NEXT: or a0, a0, a2
; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-NEXT: vmv.v.x v8, a1
; RVA22U64-NEXT: vslide1down.vx v8, v8, a0
; RVA22U64-NEXT: ret
;
; RVA22U64-PACK-LABEL: buildvec_v16i8_loads_contigous:
; RVA22U64-PACK: # %bb.0:
; RVA22U64-PACK-NEXT: lbu a1, 0(a0)
; RVA22U64-PACK-NEXT: lbu a2, 1(a0)
; RVA22U64-PACK-NEXT: lbu a3, 2(a0)
; RVA22U64-PACK-NEXT: lbu a4, 3(a0)
; RVA22U64-PACK-NEXT: packh a1, a1, a2
; RVA22U64-PACK-NEXT: packh a2, a3, a4
; RVA22U64-PACK-NEXT: lbu a3, 4(a0)
; RVA22U64-PACK-NEXT: lbu a4, 5(a0)
; RVA22U64-PACK-NEXT: packw a6, a1, a2
; RVA22U64-PACK-NEXT: lbu a2, 6(a0)
; RVA22U64-PACK-NEXT: lbu a5, 7(a0)
; RVA22U64-PACK-NEXT: packh a3, a3, a4
; RVA22U64-PACK-NEXT: lbu a4, 8(a0)
; RVA22U64-PACK-NEXT: lbu a1, 9(a0)
; RVA22U64-PACK-NEXT: packh a2, a2, a5
; RVA22U64-PACK-NEXT: packw a2, a3, a2
; RVA22U64-PACK-NEXT: pack a6, a6, a2
; RVA22U64-PACK-NEXT: packh a7, a4, a1
; RVA22U64-PACK-NEXT: lbu a3, 10(a0)
; RVA22U64-PACK-NEXT: lbu a4, 11(a0)
; RVA22U64-PACK-NEXT: lbu a5, 12(a0)
; RVA22U64-PACK-NEXT: lbu a2, 13(a0)
; RVA22U64-PACK-NEXT: lbu a1, 14(a0)
; RVA22U64-PACK-NEXT: lbu a0, 15(a0)
; RVA22U64-PACK-NEXT: packh a3, a3, a4
; RVA22U64-PACK-NEXT: packw a3, a7, a3
; RVA22U64-PACK-NEXT: packh a2, a5, a2
; RVA22U64-PACK-NEXT: packh a0, a1, a0
; RVA22U64-PACK-NEXT: packw a0, a2, a0
; RVA22U64-PACK-NEXT: pack a0, a3, a0
; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-PACK-NEXT: vmv.v.x v8, a6
; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0
; RVA22U64-PACK-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_v16i8_loads_contigous:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: addi sp, sp, -16
; RV64ZVE32-NEXT: .cfi_def_cfa_offset 16
; RV64ZVE32-NEXT: sd s0, 8(sp) # 8-byte Folded Spill
; RV64ZVE32-NEXT: .cfi_offset s0, -8
; RV64ZVE32-NEXT: lbu a1, 0(a0)
; RV64ZVE32-NEXT: lbu a2, 1(a0)
; RV64ZVE32-NEXT: lbu a3, 2(a0)
; RV64ZVE32-NEXT: lbu a4, 3(a0)
; RV64ZVE32-NEXT: lbu a5, 4(a0)
; RV64ZVE32-NEXT: lbu a6, 5(a0)
; RV64ZVE32-NEXT: lbu a7, 6(a0)
; RV64ZVE32-NEXT: lbu t0, 7(a0)
; RV64ZVE32-NEXT: lbu t1, 8(a0)
; RV64ZVE32-NEXT: lbu t2, 9(a0)
; RV64ZVE32-NEXT: lbu t3, 10(a0)
; RV64ZVE32-NEXT: lbu t4, 11(a0)
; RV64ZVE32-NEXT: lbu t5, 12(a0)
; RV64ZVE32-NEXT: lbu t6, 13(a0)
; RV64ZVE32-NEXT: lbu s0, 14(a0)
; RV64ZVE32-NEXT: lbu a0, 15(a0)
; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64ZVE32-NEXT: vmv.v.x v8, a1
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
; RV64ZVE32-NEXT: vslide1down.vx v9, v8, t0
; RV64ZVE32-NEXT: vmv.v.x v8, t1
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t2
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t3
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t4
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t5
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t6
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, s0
; RV64ZVE32-NEXT: li a1, 255
; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32-NEXT: vmv.s.x v0, a1
; RV64ZVE32-NEXT: vsetvli zero, zero, e8, m1, ta, mu
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
; RV64ZVE32-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV64ZVE32-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
; RV64ZVE32-NEXT: addi sp, sp, 16
; RV64ZVE32-NEXT: ret
%p2 = getelementptr i8, ptr %p, i32 1
%p3 = getelementptr i8, ptr %p, i32 2
%p4 = getelementptr i8, ptr %p, i32 3
%p5 = getelementptr i8, ptr %p, i32 4
%p6 = getelementptr i8, ptr %p, i32 5
%p7 = getelementptr i8, ptr %p, i32 6
%p8 = getelementptr i8, ptr %p, i32 7
%p9 = getelementptr i8, ptr %p, i32 8
%p10 = getelementptr i8, ptr %p, i32 9
%p11 = getelementptr i8, ptr %p, i32 10
%p12 = getelementptr i8, ptr %p, i32 11
%p13 = getelementptr i8, ptr %p, i32 12
%p14 = getelementptr i8, ptr %p, i32 13
%p15 = getelementptr i8, ptr %p, i32 14
%p16 = getelementptr i8, ptr %p, i32 15
%ld1 = load i8, ptr %p
%ld2 = load i8, ptr %p2
%ld3 = load i8, ptr %p3
%ld4 = load i8, ptr %p4
%ld5 = load i8, ptr %p5
%ld6 = load i8, ptr %p6
%ld7 = load i8, ptr %p7
%ld8 = load i8, ptr %p8
%ld9 = load i8, ptr %p9
%ld10 = load i8, ptr %p10
%ld11 = load i8, ptr %p11
%ld12 = load i8, ptr %p12
%ld13 = load i8, ptr %p13
%ld14 = load i8, ptr %p14
%ld15 = load i8, ptr %p15
%ld16 = load i8, ptr %p16
%v1 = insertelement <16 x i8> poison, i8 %ld1, i32 0
%v2 = insertelement <16 x i8> %v1, i8 %ld2, i32 1
%v3 = insertelement <16 x i8> %v2, i8 %ld3, i32 2
%v4 = insertelement <16 x i8> %v3, i8 %ld4, i32 3
%v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4
%v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5
%v7 = insertelement <16 x i8> %v6, i8 %ld7, i32 6
%v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7
%v9 = insertelement <16 x i8> %v8, i8 %ld9, i32 8
%v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9
%v11 = insertelement <16 x i8> %v10, i8 %ld11, i32 10
%v12 = insertelement <16 x i8> %v11, i8 %ld12, i32 11
%v13 = insertelement <16 x i8> %v12, i8 %ld13, i32 12
%v14 = insertelement <16 x i8> %v13, i8 %ld14, i32 13
%v15 = insertelement <16 x i8> %v14, i8 %ld15, i32 14
%v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15
ret <16 x i8> %v16
}
define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) {
; RV32-ONLY-LABEL: buildvec_v16i8_loads_gather:
; RV32-ONLY: # %bb.0:
; RV32-ONLY-NEXT: addi sp, sp, -16
; RV32-ONLY-NEXT: .cfi_def_cfa_offset 16
; RV32-ONLY-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32-ONLY-NEXT: .cfi_offset s0, -4
; RV32-ONLY-NEXT: lbu a1, 0(a0)
; RV32-ONLY-NEXT: lbu a2, 1(a0)
; RV32-ONLY-NEXT: lbu a3, 22(a0)
; RV32-ONLY-NEXT: lbu a4, 31(a0)
; RV32-ONLY-NEXT: lbu a5, 44(a0)
; RV32-ONLY-NEXT: lbu a6, 55(a0)
; RV32-ONLY-NEXT: lbu a7, 623(a0)
; RV32-ONLY-NEXT: lbu t0, 75(a0)
; RV32-ONLY-NEXT: lbu t1, 82(a0)
; RV32-ONLY-NEXT: lbu t2, 93(a0)
; RV32-ONLY-NEXT: lbu t3, 105(a0)
; RV32-ONLY-NEXT: lbu t4, 161(a0)
; RV32-ONLY-NEXT: lbu t5, 124(a0)
; RV32-ONLY-NEXT: lbu t6, 163(a0)
; RV32-ONLY-NEXT: lbu s0, 144(a0)
; RV32-ONLY-NEXT: lbu a0, 154(a0)
; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV32-ONLY-NEXT: vmv.v.x v8, a1
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7
; RV32-ONLY-NEXT: vslide1down.vx v9, v8, t0
; RV32-ONLY-NEXT: vmv.v.x v8, t1
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t2
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t3
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t4
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t5
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t6
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, s0
; RV32-ONLY-NEXT: li a1, 255
; RV32-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV32-ONLY-NEXT: vmv.s.x v0, a1
; RV32-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV32-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV32-ONLY-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32-ONLY-NEXT: addi sp, sp, 16
; RV32-ONLY-NEXT: ret
;
; RV32VB-LABEL: buildvec_v16i8_loads_gather:
; RV32VB: # %bb.0:
; RV32VB-NEXT: lbu a1, 1(a0)
; RV32VB-NEXT: lbu a2, 0(a0)
; RV32VB-NEXT: lbu a3, 22(a0)
; RV32VB-NEXT: lbu a4, 31(a0)
; RV32VB-NEXT: slli a1, a1, 8
; RV32VB-NEXT: or a1, a2, a1
; RV32VB-NEXT: slli a3, a3, 16
; RV32VB-NEXT: slli a4, a4, 24
; RV32VB-NEXT: or a3, a4, a3
; RV32VB-NEXT: or a1, a1, a3
; RV32VB-NEXT: lbu a2, 55(a0)
; RV32VB-NEXT: lbu a3, 44(a0)
; RV32VB-NEXT: lbu a4, 623(a0)
; RV32VB-NEXT: lbu a5, 75(a0)
; RV32VB-NEXT: slli a2, a2, 8
; RV32VB-NEXT: or a2, a3, a2
; RV32VB-NEXT: slli a4, a4, 16
; RV32VB-NEXT: slli a5, a5, 24
; RV32VB-NEXT: or a4, a5, a4
; RV32VB-NEXT: or a2, a2, a4
; RV32VB-NEXT: lbu a3, 93(a0)
; RV32VB-NEXT: lbu a4, 82(a0)
; RV32VB-NEXT: lbu a5, 105(a0)
; RV32VB-NEXT: lbu a6, 161(a0)
; RV32VB-NEXT: slli a3, a3, 8
; RV32VB-NEXT: or a3, a4, a3
; RV32VB-NEXT: slli a5, a5, 16
; RV32VB-NEXT: slli a6, a6, 24
; RV32VB-NEXT: or a4, a6, a5
; RV32VB-NEXT: or a3, a3, a4
; RV32VB-NEXT: lbu a4, 163(a0)
; RV32VB-NEXT: lbu a5, 124(a0)
; RV32VB-NEXT: lbu a6, 144(a0)
; RV32VB-NEXT: lbu a0, 154(a0)
; RV32VB-NEXT: slli a4, a4, 8
; RV32VB-NEXT: or a4, a5, a4
; RV32VB-NEXT: slli a6, a6, 16
; RV32VB-NEXT: slli a0, a0, 24
; RV32VB-NEXT: or a0, a0, a6
; RV32VB-NEXT: or a0, a4, a0
; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-NEXT: vmv.v.x v8, a1
; RV32VB-NEXT: vslide1down.vx v8, v8, a2
; RV32VB-NEXT: vslide1down.vx v8, v8, a3
; RV32VB-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-NEXT: ret
;
; RV32VB-PACK-LABEL: buildvec_v16i8_loads_gather:
; RV32VB-PACK: # %bb.0:
; RV32VB-PACK-NEXT: lbu a1, 0(a0)
; RV32VB-PACK-NEXT: lbu a2, 1(a0)
; RV32VB-PACK-NEXT: lbu a3, 22(a0)
; RV32VB-PACK-NEXT: lbu a4, 31(a0)
; RV32VB-PACK-NEXT: packh a1, a1, a2
; RV32VB-PACK-NEXT: packh a2, a3, a4
; RV32VB-PACK-NEXT: pack a1, a1, a2
; RV32VB-PACK-NEXT: lbu a2, 44(a0)
; RV32VB-PACK-NEXT: lbu a3, 55(a0)
; RV32VB-PACK-NEXT: lbu a4, 623(a0)
; RV32VB-PACK-NEXT: lbu a5, 75(a0)
; RV32VB-PACK-NEXT: lbu a6, 82(a0)
; RV32VB-PACK-NEXT: lbu a7, 93(a0)
; RV32VB-PACK-NEXT: packh a2, a2, a3
; RV32VB-PACK-NEXT: packh a3, a4, a5
; RV32VB-PACK-NEXT: pack a2, a2, a3
; RV32VB-PACK-NEXT: packh a3, a6, a7
; RV32VB-PACK-NEXT: lbu a4, 105(a0)
; RV32VB-PACK-NEXT: lbu a5, 161(a0)
; RV32VB-PACK-NEXT: lbu a6, 124(a0)
; RV32VB-PACK-NEXT: lbu a7, 163(a0)
; RV32VB-PACK-NEXT: lbu t0, 144(a0)
; RV32VB-PACK-NEXT: lbu a0, 154(a0)
; RV32VB-PACK-NEXT: packh a4, a4, a5
; RV32VB-PACK-NEXT: pack a3, a3, a4
; RV32VB-PACK-NEXT: packh a4, a6, a7
; RV32VB-PACK-NEXT: packh a0, t0, a0
; RV32VB-PACK-NEXT: pack a0, a4, a0
; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-PACK-NEXT: vmv.v.x v8, a1
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a3
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-PACK-NEXT: ret
;
; RV64V-ONLY-LABEL: buildvec_v16i8_loads_gather:
; RV64V-ONLY: # %bb.0:
; RV64V-ONLY-NEXT: addi sp, sp, -16
; RV64V-ONLY-NEXT: .cfi_def_cfa_offset 16
; RV64V-ONLY-NEXT: sd s0, 8(sp) # 8-byte Folded Spill
; RV64V-ONLY-NEXT: .cfi_offset s0, -8
; RV64V-ONLY-NEXT: lbu a1, 0(a0)
; RV64V-ONLY-NEXT: lbu a2, 1(a0)
; RV64V-ONLY-NEXT: lbu a3, 22(a0)
; RV64V-ONLY-NEXT: lbu a4, 31(a0)
; RV64V-ONLY-NEXT: lbu a5, 44(a0)
; RV64V-ONLY-NEXT: lbu a6, 55(a0)
; RV64V-ONLY-NEXT: lbu a7, 623(a0)
; RV64V-ONLY-NEXT: lbu t0, 75(a0)
; RV64V-ONLY-NEXT: lbu t1, 82(a0)
; RV64V-ONLY-NEXT: lbu t2, 93(a0)
; RV64V-ONLY-NEXT: lbu t3, 105(a0)
; RV64V-ONLY-NEXT: lbu t4, 161(a0)
; RV64V-ONLY-NEXT: lbu t5, 124(a0)
; RV64V-ONLY-NEXT: lbu t6, 163(a0)
; RV64V-ONLY-NEXT: lbu s0, 144(a0)
; RV64V-ONLY-NEXT: lbu a0, 154(a0)
; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64V-ONLY-NEXT: vmv.v.x v8, a1
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
; RV64V-ONLY-NEXT: vslide1down.vx v9, v8, t0
; RV64V-ONLY-NEXT: vmv.v.x v8, t1
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t2
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t3
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t4
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t5
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t6
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, s0
; RV64V-ONLY-NEXT: li a1, 255
; RV64V-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64V-ONLY-NEXT: vmv.s.x v0, a1
; RV64V-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV64V-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV64V-ONLY-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
; RV64V-ONLY-NEXT: addi sp, sp, 16
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_v16i8_loads_gather:
; RVA22U64: # %bb.0:
; RVA22U64-NEXT: lbu a1, 1(a0)
; RVA22U64-NEXT: lbu a2, 0(a0)
; RVA22U64-NEXT: lbu a3, 22(a0)
; RVA22U64-NEXT: lbu a4, 31(a0)
; RVA22U64-NEXT: slli a1, a1, 8
; RVA22U64-NEXT: or a1, a1, a2
; RVA22U64-NEXT: slli a3, a3, 16
; RVA22U64-NEXT: slli a4, a4, 24
; RVA22U64-NEXT: or a3, a3, a4
; RVA22U64-NEXT: lbu a2, 44(a0)
; RVA22U64-NEXT: or a1, a1, a3
; RVA22U64-NEXT: lbu a3, 55(a0)
; RVA22U64-NEXT: lbu a4, 623(a0)
; RVA22U64-NEXT: slli a2, a2, 32
; RVA22U64-NEXT: lbu a5, 75(a0)
; RVA22U64-NEXT: slli a3, a3, 40
; RVA22U64-NEXT: or a2, a2, a3
; RVA22U64-NEXT: slli a4, a4, 48
; RVA22U64-NEXT: slli a5, a5, 56
; RVA22U64-NEXT: or a4, a4, a5
; RVA22U64-NEXT: or a2, a2, a4
; RVA22U64-NEXT: or a1, a1, a2
; RVA22U64-NEXT: lbu a2, 93(a0)
; RVA22U64-NEXT: lbu a3, 82(a0)
; RVA22U64-NEXT: lbu a4, 105(a0)
; RVA22U64-NEXT: lbu a5, 161(a0)
; RVA22U64-NEXT: slli a2, a2, 8
; RVA22U64-NEXT: or a2, a2, a3
; RVA22U64-NEXT: slli a4, a4, 16
; RVA22U64-NEXT: slli a5, a5, 24
; RVA22U64-NEXT: or a4, a4, a5
; RVA22U64-NEXT: lbu a3, 124(a0)
; RVA22U64-NEXT: or a2, a2, a4
; RVA22U64-NEXT: lbu a4, 163(a0)
; RVA22U64-NEXT: lbu a5, 144(a0)
; RVA22U64-NEXT: slli a3, a3, 32
; RVA22U64-NEXT: lbu a0, 154(a0)
; RVA22U64-NEXT: slli a4, a4, 40
; RVA22U64-NEXT: or a3, a3, a4
; RVA22U64-NEXT: slli a5, a5, 48
; RVA22U64-NEXT: slli a0, a0, 56
; RVA22U64-NEXT: or a0, a0, a5
; RVA22U64-NEXT: or a0, a0, a3
; RVA22U64-NEXT: or a0, a0, a2
; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-NEXT: vmv.v.x v8, a1
; RVA22U64-NEXT: vslide1down.vx v8, v8, a0
; RVA22U64-NEXT: ret
;
; RVA22U64-PACK-LABEL: buildvec_v16i8_loads_gather:
; RVA22U64-PACK: # %bb.0:
; RVA22U64-PACK-NEXT: lbu a1, 0(a0)
; RVA22U64-PACK-NEXT: lbu a2, 1(a0)
; RVA22U64-PACK-NEXT: lbu a3, 22(a0)
; RVA22U64-PACK-NEXT: lbu a4, 31(a0)
; RVA22U64-PACK-NEXT: packh a1, a1, a2
; RVA22U64-PACK-NEXT: packh a2, a3, a4
; RVA22U64-PACK-NEXT: lbu a3, 44(a0)
; RVA22U64-PACK-NEXT: lbu a4, 55(a0)
; RVA22U64-PACK-NEXT: packw a6, a1, a2
; RVA22U64-PACK-NEXT: lbu a2, 623(a0)
; RVA22U64-PACK-NEXT: lbu a5, 75(a0)
; RVA22U64-PACK-NEXT: packh a3, a3, a4
; RVA22U64-PACK-NEXT: lbu a4, 82(a0)
; RVA22U64-PACK-NEXT: lbu a1, 93(a0)
; RVA22U64-PACK-NEXT: packh a2, a2, a5
; RVA22U64-PACK-NEXT: packw a2, a3, a2
; RVA22U64-PACK-NEXT: pack a6, a6, a2
; RVA22U64-PACK-NEXT: packh a7, a4, a1
; RVA22U64-PACK-NEXT: lbu a3, 105(a0)
; RVA22U64-PACK-NEXT: lbu a4, 161(a0)
; RVA22U64-PACK-NEXT: lbu a5, 124(a0)
; RVA22U64-PACK-NEXT: lbu a2, 163(a0)
; RVA22U64-PACK-NEXT: lbu a1, 144(a0)
; RVA22U64-PACK-NEXT: lbu a0, 154(a0)
; RVA22U64-PACK-NEXT: packh a3, a3, a4
; RVA22U64-PACK-NEXT: packw a3, a7, a3
; RVA22U64-PACK-NEXT: packh a2, a5, a2
; RVA22U64-PACK-NEXT: packh a0, a1, a0
; RVA22U64-PACK-NEXT: packw a0, a2, a0
; RVA22U64-PACK-NEXT: pack a0, a3, a0
; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-PACK-NEXT: vmv.v.x v8, a6
; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0
; RVA22U64-PACK-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_v16i8_loads_gather:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: addi sp, sp, -16
; RV64ZVE32-NEXT: .cfi_def_cfa_offset 16
; RV64ZVE32-NEXT: sd s0, 8(sp) # 8-byte Folded Spill
; RV64ZVE32-NEXT: .cfi_offset s0, -8
; RV64ZVE32-NEXT: lbu a1, 0(a0)
; RV64ZVE32-NEXT: lbu a2, 1(a0)
; RV64ZVE32-NEXT: lbu a3, 22(a0)
; RV64ZVE32-NEXT: lbu a4, 31(a0)
; RV64ZVE32-NEXT: lbu a5, 44(a0)
; RV64ZVE32-NEXT: lbu a6, 55(a0)
; RV64ZVE32-NEXT: lbu a7, 623(a0)
; RV64ZVE32-NEXT: lbu t0, 75(a0)
; RV64ZVE32-NEXT: lbu t1, 82(a0)
; RV64ZVE32-NEXT: lbu t2, 93(a0)
; RV64ZVE32-NEXT: lbu t3, 105(a0)
; RV64ZVE32-NEXT: lbu t4, 161(a0)
; RV64ZVE32-NEXT: lbu t5, 124(a0)
; RV64ZVE32-NEXT: lbu t6, 163(a0)
; RV64ZVE32-NEXT: lbu s0, 144(a0)
; RV64ZVE32-NEXT: lbu a0, 154(a0)
; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64ZVE32-NEXT: vmv.v.x v8, a1
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
; RV64ZVE32-NEXT: vslide1down.vx v9, v8, t0
; RV64ZVE32-NEXT: vmv.v.x v8, t1
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t2
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t3
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t4
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t5
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t6
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, s0
; RV64ZVE32-NEXT: li a1, 255
; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32-NEXT: vmv.s.x v0, a1
; RV64ZVE32-NEXT: vsetvli zero, zero, e8, m1, ta, mu
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
; RV64ZVE32-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV64ZVE32-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
; RV64ZVE32-NEXT: addi sp, sp, 16
; RV64ZVE32-NEXT: ret
%p2 = getelementptr i8, ptr %p, i32 1
%p3 = getelementptr i8, ptr %p, i32 22
%p4 = getelementptr i8, ptr %p, i32 31
%p5 = getelementptr i8, ptr %p, i32 44
%p6 = getelementptr i8, ptr %p, i32 55
%p7 = getelementptr i8, ptr %p, i32 623
%p8 = getelementptr i8, ptr %p, i32 75
%p9 = getelementptr i8, ptr %p, i32 82
%p10 = getelementptr i8, ptr %p, i32 93
%p11 = getelementptr i8, ptr %p, i32 105
%p12 = getelementptr i8, ptr %p, i32 161
%p13 = getelementptr i8, ptr %p, i32 124
%p14 = getelementptr i8, ptr %p, i32 163
%p15 = getelementptr i8, ptr %p, i32 144
%p16 = getelementptr i8, ptr %p, i32 154
%ld1 = load i8, ptr %p
%ld2 = load i8, ptr %p2
%ld3 = load i8, ptr %p3
%ld4 = load i8, ptr %p4
%ld5 = load i8, ptr %p5
%ld6 = load i8, ptr %p6
%ld7 = load i8, ptr %p7
%ld8 = load i8, ptr %p8
%ld9 = load i8, ptr %p9
%ld10 = load i8, ptr %p10
%ld11 = load i8, ptr %p11
%ld12 = load i8, ptr %p12
%ld13 = load i8, ptr %p13
%ld14 = load i8, ptr %p14
%ld15 = load i8, ptr %p15
%ld16 = load i8, ptr %p16
%v1 = insertelement <16 x i8> poison, i8 %ld1, i32 0
%v2 = insertelement <16 x i8> %v1, i8 %ld2, i32 1
%v3 = insertelement <16 x i8> %v2, i8 %ld3, i32 2
%v4 = insertelement <16 x i8> %v3, i8 %ld4, i32 3
%v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4
%v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5
%v7 = insertelement <16 x i8> %v6, i8 %ld7, i32 6
%v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7
%v9 = insertelement <16 x i8> %v8, i8 %ld9, i32 8
%v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9
%v11 = insertelement <16 x i8> %v10, i8 %ld11, i32 10
%v12 = insertelement <16 x i8> %v11, i8 %ld12, i32 11
%v13 = insertelement <16 x i8> %v12, i8 %ld13, i32 12
%v14 = insertelement <16 x i8> %v13, i8 %ld14, i32 13
%v15 = insertelement <16 x i8> %v14, i8 %ld15, i32 14
%v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15
ret <16 x i8> %v16
}
define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) {
; RV32-ONLY-LABEL: buildvec_v16i8_undef_low_half:
; RV32-ONLY: # %bb.0:
; RV32-ONLY-NEXT: lbu a1, 82(a0)
; RV32-ONLY-NEXT: lbu a2, 93(a0)
; RV32-ONLY-NEXT: lbu a3, 105(a0)
; RV32-ONLY-NEXT: lbu a4, 161(a0)
; RV32-ONLY-NEXT: lbu a5, 124(a0)
; RV32-ONLY-NEXT: lbu a6, 163(a0)
; RV32-ONLY-NEXT: lbu a7, 144(a0)
; RV32-ONLY-NEXT: lbu a0, 154(a0)
; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV32-ONLY-NEXT: vmv.v.x v8, a1
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV32-ONLY-NEXT: ret
;
; RV32VB-LABEL: buildvec_v16i8_undef_low_half:
; RV32VB: # %bb.0:
; RV32VB-NEXT: lbu a1, 93(a0)
; RV32VB-NEXT: lbu a2, 82(a0)
; RV32VB-NEXT: lbu a3, 105(a0)
; RV32VB-NEXT: lbu a4, 161(a0)
; RV32VB-NEXT: slli a1, a1, 8
; RV32VB-NEXT: or a1, a2, a1
; RV32VB-NEXT: slli a3, a3, 16
; RV32VB-NEXT: slli a4, a4, 24
; RV32VB-NEXT: or a3, a4, a3
; RV32VB-NEXT: or a1, a1, a3
; RV32VB-NEXT: lbu a2, 163(a0)
; RV32VB-NEXT: lbu a3, 124(a0)
; RV32VB-NEXT: lbu a4, 144(a0)
; RV32VB-NEXT: lbu a0, 154(a0)
; RV32VB-NEXT: slli a2, a2, 8
; RV32VB-NEXT: or a2, a3, a2
; RV32VB-NEXT: slli a4, a4, 16
; RV32VB-NEXT: slli a0, a0, 24
; RV32VB-NEXT: or a0, a0, a4
; RV32VB-NEXT: or a0, a2, a0
; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-NEXT: vmv.v.i v8, 0
; RV32VB-NEXT: vslide1down.vx v8, v8, zero
; RV32VB-NEXT: vslide1down.vx v8, v8, a1
; RV32VB-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-NEXT: ret
;
; RV32VB-PACK-LABEL: buildvec_v16i8_undef_low_half:
; RV32VB-PACK: # %bb.0:
; RV32VB-PACK-NEXT: lbu a1, 82(a0)
; RV32VB-PACK-NEXT: lbu a2, 93(a0)
; RV32VB-PACK-NEXT: packh a1, a1, a2
; RV32VB-PACK-NEXT: lbu a2, 105(a0)
; RV32VB-PACK-NEXT: lbu a3, 161(a0)
; RV32VB-PACK-NEXT: lbu a4, 124(a0)
; RV32VB-PACK-NEXT: lbu a5, 163(a0)
; RV32VB-PACK-NEXT: lbu a6, 144(a0)
; RV32VB-PACK-NEXT: lbu a0, 154(a0)
; RV32VB-PACK-NEXT: packh a2, a2, a3
; RV32VB-PACK-NEXT: pack a1, a1, a2
; RV32VB-PACK-NEXT: packh a2, a4, a5
; RV32VB-PACK-NEXT: packh a0, a6, a0
; RV32VB-PACK-NEXT: pack a0, a2, a0
; RV32VB-PACK-NEXT: packh a2, a0, a0
; RV32VB-PACK-NEXT: pack a2, a2, a2
; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-PACK-NEXT: vmv.v.x v8, a2
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a1
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-PACK-NEXT: ret
;
; RV64V-ONLY-LABEL: buildvec_v16i8_undef_low_half:
; RV64V-ONLY: # %bb.0:
; RV64V-ONLY-NEXT: lbu a1, 82(a0)
; RV64V-ONLY-NEXT: lbu a2, 93(a0)
; RV64V-ONLY-NEXT: lbu a3, 105(a0)
; RV64V-ONLY-NEXT: lbu a4, 161(a0)
; RV64V-ONLY-NEXT: lbu a5, 124(a0)
; RV64V-ONLY-NEXT: lbu a6, 163(a0)
; RV64V-ONLY-NEXT: lbu a7, 144(a0)
; RV64V-ONLY-NEXT: lbu a0, 154(a0)
; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64V-ONLY-NEXT: vmv.v.x v8, a1
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_v16i8_undef_low_half:
; RVA22U64: # %bb.0:
; RVA22U64-NEXT: lbu a1, 93(a0)
; RVA22U64-NEXT: lbu a2, 82(a0)
; RVA22U64-NEXT: lbu a3, 105(a0)
; RVA22U64-NEXT: lbu a4, 161(a0)
; RVA22U64-NEXT: slli a1, a1, 8
; RVA22U64-NEXT: or a1, a1, a2
; RVA22U64-NEXT: slli a3, a3, 16
; RVA22U64-NEXT: slli a4, a4, 24
; RVA22U64-NEXT: or a3, a3, a4
; RVA22U64-NEXT: lbu a2, 124(a0)
; RVA22U64-NEXT: or a1, a1, a3
; RVA22U64-NEXT: lbu a3, 163(a0)
; RVA22U64-NEXT: lbu a4, 144(a0)
; RVA22U64-NEXT: slli a2, a2, 32
; RVA22U64-NEXT: lbu a0, 154(a0)
; RVA22U64-NEXT: slli a3, a3, 40
; RVA22U64-NEXT: or a2, a2, a3
; RVA22U64-NEXT: slli a4, a4, 48
; RVA22U64-NEXT: slli a0, a0, 56
; RVA22U64-NEXT: or a0, a0, a4
; RVA22U64-NEXT: or a0, a0, a2
; RVA22U64-NEXT: or a0, a0, a1
; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-NEXT: vmv.v.i v8, 0
; RVA22U64-NEXT: vslide1down.vx v8, v8, a0
; RVA22U64-NEXT: ret
;
; RVA22U64-PACK-LABEL: buildvec_v16i8_undef_low_half:
; RVA22U64-PACK: # %bb.0:
; RVA22U64-PACK-NEXT: lbu a1, 82(a0)
; RVA22U64-PACK-NEXT: lbu a2, 93(a0)
; RVA22U64-PACK-NEXT: packh a6, a1, a2
; RVA22U64-PACK-NEXT: lbu a2, 105(a0)
; RVA22U64-PACK-NEXT: lbu a3, 161(a0)
; RVA22U64-PACK-NEXT: lbu a4, 124(a0)
; RVA22U64-PACK-NEXT: lbu a5, 163(a0)
; RVA22U64-PACK-NEXT: lbu a1, 144(a0)
; RVA22U64-PACK-NEXT: lbu a0, 154(a0)
; RVA22U64-PACK-NEXT: packh a2, a2, a3
; RVA22U64-PACK-NEXT: packw a2, a6, a2
; RVA22U64-PACK-NEXT: packh a3, a4, a5
; RVA22U64-PACK-NEXT: packh a0, a1, a0
; RVA22U64-PACK-NEXT: packw a0, a3, a0
; RVA22U64-PACK-NEXT: pack a0, a2, a0
; RVA22U64-PACK-NEXT: packh a1, a0, a0
; RVA22U64-PACK-NEXT: packw a1, a1, a1
; RVA22U64-PACK-NEXT: pack a1, a1, a1
; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-PACK-NEXT: vmv.v.x v8, a1
; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0
; RVA22U64-PACK-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_v16i8_undef_low_half:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: lbu a1, 82(a0)
; RV64ZVE32-NEXT: lbu a2, 93(a0)
; RV64ZVE32-NEXT: lbu a3, 105(a0)
; RV64ZVE32-NEXT: lbu a4, 161(a0)
; RV64ZVE32-NEXT: lbu a5, 124(a0)
; RV64ZVE32-NEXT: lbu a6, 163(a0)
; RV64ZVE32-NEXT: lbu a7, 144(a0)
; RV64ZVE32-NEXT: lbu a0, 154(a0)
; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64ZVE32-NEXT: vmv.v.x v8, a1
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
; RV64ZVE32-NEXT: ret
%p9 = getelementptr i8, ptr %p, i32 82
%p10 = getelementptr i8, ptr %p, i32 93
%p11 = getelementptr i8, ptr %p, i32 105
%p12 = getelementptr i8, ptr %p, i32 161
%p13 = getelementptr i8, ptr %p, i32 124
%p14 = getelementptr i8, ptr %p, i32 163
%p15 = getelementptr i8, ptr %p, i32 144
%p16 = getelementptr i8, ptr %p, i32 154
%ld9 = load i8, ptr %p9
%ld10 = load i8, ptr %p10
%ld11 = load i8, ptr %p11
%ld12 = load i8, ptr %p12
%ld13 = load i8, ptr %p13
%ld14 = load i8, ptr %p14
%ld15 = load i8, ptr %p15
%ld16 = load i8, ptr %p16
%v9 = insertelement <16 x i8> poison, i8 %ld9, i32 8
%v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9
%v11 = insertelement <16 x i8> %v10, i8 %ld11, i32 10
%v12 = insertelement <16 x i8> %v11, i8 %ld12, i32 11
%v13 = insertelement <16 x i8> %v12, i8 %ld13, i32 12
%v14 = insertelement <16 x i8> %v13, i8 %ld14, i32 13
%v15 = insertelement <16 x i8> %v14, i8 %ld15, i32 14
%v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15
ret <16 x i8> %v16
}
define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) {
; RV32-ONLY-LABEL: buildvec_v16i8_undef_high_half:
; RV32-ONLY: # %bb.0:
; RV32-ONLY-NEXT: lbu a1, 0(a0)
; RV32-ONLY-NEXT: lbu a2, 1(a0)
; RV32-ONLY-NEXT: lbu a3, 22(a0)
; RV32-ONLY-NEXT: lbu a4, 31(a0)
; RV32-ONLY-NEXT: lbu a5, 44(a0)
; RV32-ONLY-NEXT: lbu a6, 55(a0)
; RV32-ONLY-NEXT: lbu a7, 623(a0)
; RV32-ONLY-NEXT: lbu a0, 75(a0)
; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV32-ONLY-NEXT: vmv.v.x v8, a1
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 8
; RV32-ONLY-NEXT: ret
;
; RV32VB-LABEL: buildvec_v16i8_undef_high_half:
; RV32VB: # %bb.0:
; RV32VB-NEXT: lbu a1, 1(a0)
; RV32VB-NEXT: lbu a2, 0(a0)
; RV32VB-NEXT: lbu a3, 22(a0)
; RV32VB-NEXT: lbu a4, 31(a0)
; RV32VB-NEXT: slli a1, a1, 8
; RV32VB-NEXT: or a1, a2, a1
; RV32VB-NEXT: slli a3, a3, 16
; RV32VB-NEXT: slli a4, a4, 24
; RV32VB-NEXT: or a3, a4, a3
; RV32VB-NEXT: or a1, a1, a3
; RV32VB-NEXT: lbu a2, 55(a0)
; RV32VB-NEXT: lbu a3, 44(a0)
; RV32VB-NEXT: lbu a4, 623(a0)
; RV32VB-NEXT: lbu a0, 75(a0)
; RV32VB-NEXT: slli a2, a2, 8
; RV32VB-NEXT: or a2, a3, a2
; RV32VB-NEXT: slli a4, a4, 16
; RV32VB-NEXT: slli a0, a0, 24
; RV32VB-NEXT: or a0, a0, a4
; RV32VB-NEXT: or a0, a2, a0
; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-NEXT: vmv.v.x v8, a1
; RV32VB-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-NEXT: vslide1down.vx v8, v8, zero
; RV32VB-NEXT: vslide1down.vx v8, v8, zero
; RV32VB-NEXT: ret
;
; RV32VB-PACK-LABEL: buildvec_v16i8_undef_high_half:
; RV32VB-PACK: # %bb.0:
; RV32VB-PACK-NEXT: lbu a1, 0(a0)
; RV32VB-PACK-NEXT: lbu a2, 1(a0)
; RV32VB-PACK-NEXT: packh a1, a1, a2
; RV32VB-PACK-NEXT: lbu a2, 22(a0)
; RV32VB-PACK-NEXT: lbu a3, 31(a0)
; RV32VB-PACK-NEXT: lbu a4, 44(a0)
; RV32VB-PACK-NEXT: lbu a5, 55(a0)
; RV32VB-PACK-NEXT: lbu a6, 623(a0)
; RV32VB-PACK-NEXT: lbu a0, 75(a0)
; RV32VB-PACK-NEXT: packh a2, a2, a3
; RV32VB-PACK-NEXT: pack a1, a1, a2
; RV32VB-PACK-NEXT: packh a2, a4, a5
; RV32VB-PACK-NEXT: packh a0, a6, a0
; RV32VB-PACK-NEXT: pack a0, a2, a0
; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-PACK-NEXT: vmv.v.x v8, a1
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-PACK-NEXT: packh a0, a0, a0
; RV32VB-PACK-NEXT: pack a0, a0, a0
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-PACK-NEXT: ret
;
; RV64V-ONLY-LABEL: buildvec_v16i8_undef_high_half:
; RV64V-ONLY: # %bb.0:
; RV64V-ONLY-NEXT: lbu a1, 0(a0)
; RV64V-ONLY-NEXT: lbu a2, 1(a0)
; RV64V-ONLY-NEXT: lbu a3, 22(a0)
; RV64V-ONLY-NEXT: lbu a4, 31(a0)
; RV64V-ONLY-NEXT: lbu a5, 44(a0)
; RV64V-ONLY-NEXT: lbu a6, 55(a0)
; RV64V-ONLY-NEXT: lbu a7, 623(a0)
; RV64V-ONLY-NEXT: lbu a0, 75(a0)
; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64V-ONLY-NEXT: vmv.v.x v8, a1
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 8
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_v16i8_undef_high_half:
; RVA22U64: # %bb.0:
; RVA22U64-NEXT: lbu a1, 1(a0)
; RVA22U64-NEXT: lbu a2, 0(a0)
; RVA22U64-NEXT: lbu a3, 22(a0)
; RVA22U64-NEXT: lbu a4, 31(a0)
; RVA22U64-NEXT: slli a1, a1, 8
; RVA22U64-NEXT: or a1, a1, a2
; RVA22U64-NEXT: slli a3, a3, 16
; RVA22U64-NEXT: slli a4, a4, 24
; RVA22U64-NEXT: or a3, a3, a4
; RVA22U64-NEXT: lbu a2, 44(a0)
; RVA22U64-NEXT: or a1, a1, a3
; RVA22U64-NEXT: lbu a3, 55(a0)
; RVA22U64-NEXT: lbu a4, 623(a0)
; RVA22U64-NEXT: slli a2, a2, 32
; RVA22U64-NEXT: lbu a0, 75(a0)
; RVA22U64-NEXT: slli a3, a3, 40
; RVA22U64-NEXT: or a2, a2, a3
; RVA22U64-NEXT: slli a4, a4, 48
; RVA22U64-NEXT: slli a0, a0, 56
; RVA22U64-NEXT: or a0, a0, a4
; RVA22U64-NEXT: or a0, a0, a2
; RVA22U64-NEXT: or a0, a0, a1
; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-NEXT: vmv.v.x v8, a0
; RVA22U64-NEXT: vslide1down.vx v8, v8, zero
; RVA22U64-NEXT: ret
;
; RVA22U64-PACK-LABEL: buildvec_v16i8_undef_high_half:
; RVA22U64-PACK: # %bb.0:
; RVA22U64-PACK-NEXT: lbu a1, 0(a0)
; RVA22U64-PACK-NEXT: lbu a2, 1(a0)
; RVA22U64-PACK-NEXT: packh a6, a1, a2
; RVA22U64-PACK-NEXT: lbu a2, 22(a0)
; RVA22U64-PACK-NEXT: lbu a3, 31(a0)
; RVA22U64-PACK-NEXT: lbu a4, 44(a0)
; RVA22U64-PACK-NEXT: lbu a5, 55(a0)
; RVA22U64-PACK-NEXT: lbu a1, 623(a0)
; RVA22U64-PACK-NEXT: lbu a0, 75(a0)
; RVA22U64-PACK-NEXT: packh a2, a2, a3
; RVA22U64-PACK-NEXT: packw a2, a6, a2
; RVA22U64-PACK-NEXT: packh a3, a4, a5
; RVA22U64-PACK-NEXT: packh a0, a1, a0
; RVA22U64-PACK-NEXT: packw a0, a3, a0
; RVA22U64-PACK-NEXT: pack a0, a2, a0
; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-PACK-NEXT: vmv.v.x v8, a0
; RVA22U64-PACK-NEXT: packh a0, a0, a0
; RVA22U64-PACK-NEXT: packw a0, a0, a0
; RVA22U64-PACK-NEXT: pack a0, a0, a0
; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0
; RVA22U64-PACK-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_v16i8_undef_high_half:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: lbu a1, 0(a0)
; RV64ZVE32-NEXT: lbu a2, 1(a0)
; RV64ZVE32-NEXT: lbu a3, 22(a0)
; RV64ZVE32-NEXT: lbu a4, 31(a0)
; RV64ZVE32-NEXT: lbu a5, 44(a0)
; RV64ZVE32-NEXT: lbu a6, 55(a0)
; RV64ZVE32-NEXT: lbu a7, 623(a0)
; RV64ZVE32-NEXT: lbu a0, 75(a0)
; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64ZVE32-NEXT: vmv.v.x v8, a1
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 8
; RV64ZVE32-NEXT: ret
%p2 = getelementptr i8, ptr %p, i32 1
%p3 = getelementptr i8, ptr %p, i32 22
%p4 = getelementptr i8, ptr %p, i32 31
%p5 = getelementptr i8, ptr %p, i32 44
%p6 = getelementptr i8, ptr %p, i32 55
%p7 = getelementptr i8, ptr %p, i32 623
%p8 = getelementptr i8, ptr %p, i32 75
%ld1 = load i8, ptr %p
%ld2 = load i8, ptr %p2
%ld3 = load i8, ptr %p3
%ld4 = load i8, ptr %p4
%ld5 = load i8, ptr %p5
%ld6 = load i8, ptr %p6
%ld7 = load i8, ptr %p7
%ld8 = load i8, ptr %p8
%v1 = insertelement <16 x i8> poison, i8 %ld1, i32 0
%v2 = insertelement <16 x i8> %v1, i8 %ld2, i32 1
%v3 = insertelement <16 x i8> %v2, i8 %ld3, i32 2
%v4 = insertelement <16 x i8> %v3, i8 %ld4, i32 3
%v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4
%v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5
%v7 = insertelement <16 x i8> %v6, i8 %ld7, i32 6
%v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7
ret <16 x i8> %v8
}
define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) {
; RV32-ONLY-LABEL: buildvec_v16i8_undef_edges:
; RV32-ONLY: # %bb.0:
; RV32-ONLY-NEXT: lbu a1, 31(a0)
; RV32-ONLY-NEXT: lbu a2, 44(a0)
; RV32-ONLY-NEXT: lbu a3, 55(a0)
; RV32-ONLY-NEXT: lbu a4, 623(a0)
; RV32-ONLY-NEXT: lbu a5, 75(a0)
; RV32-ONLY-NEXT: lbu a6, 82(a0)
; RV32-ONLY-NEXT: lbu a7, 93(a0)
; RV32-ONLY-NEXT: lbu t0, 105(a0)
; RV32-ONLY-NEXT: lbu a0, 161(a0)
; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV32-ONLY-NEXT: vmv.v.x v8, a1
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4
; RV32-ONLY-NEXT: vslide1down.vx v9, v8, a5
; RV32-ONLY-NEXT: vmv.v.x v8, a6
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t0
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV32-ONLY-NEXT: li a0, 255
; RV32-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV32-ONLY-NEXT: vmv.s.x v0, a0
; RV32-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu
; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 4
; RV32-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV32-ONLY-NEXT: ret
;
; RV32VB-LABEL: buildvec_v16i8_undef_edges:
; RV32VB: # %bb.0:
; RV32VB-NEXT: lbu a1, 31(a0)
; RV32VB-NEXT: lbu a2, 55(a0)
; RV32VB-NEXT: lbu a3, 44(a0)
; RV32VB-NEXT: lbu a4, 623(a0)
; RV32VB-NEXT: lbu a5, 75(a0)
; RV32VB-NEXT: slli a2, a2, 8
; RV32VB-NEXT: or a2, a3, a2
; RV32VB-NEXT: slli a4, a4, 16
; RV32VB-NEXT: slli a5, a5, 24
; RV32VB-NEXT: lbu a3, 93(a0)
; RV32VB-NEXT: or a4, a5, a4
; RV32VB-NEXT: or a2, a2, a4
; RV32VB-NEXT: lbu a4, 82(a0)
; RV32VB-NEXT: slli a3, a3, 8
; RV32VB-NEXT: lbu a5, 105(a0)
; RV32VB-NEXT: lbu a0, 161(a0)
; RV32VB-NEXT: or a3, a4, a3
; RV32VB-NEXT: slli a1, a1, 24
; RV32VB-NEXT: slli a5, a5, 16
; RV32VB-NEXT: slli a0, a0, 24
; RV32VB-NEXT: or a0, a0, a5
; RV32VB-NEXT: or a0, a3, a0
; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-NEXT: vmv.v.x v8, a1
; RV32VB-NEXT: vslide1down.vx v8, v8, a2
; RV32VB-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-NEXT: vslide1down.vx v8, v8, zero
; RV32VB-NEXT: ret
;
; RV32VB-PACK-LABEL: buildvec_v16i8_undef_edges:
; RV32VB-PACK: # %bb.0:
; RV32VB-PACK-NEXT: lbu a1, 31(a0)
; RV32VB-PACK-NEXT: lbu a2, 44(a0)
; RV32VB-PACK-NEXT: lbu a3, 55(a0)
; RV32VB-PACK-NEXT: lbu a4, 623(a0)
; RV32VB-PACK-NEXT: lbu a5, 75(a0)
; RV32VB-PACK-NEXT: packh a1, a0, a1
; RV32VB-PACK-NEXT: packh a2, a2, a3
; RV32VB-PACK-NEXT: packh a3, a4, a5
; RV32VB-PACK-NEXT: lbu a4, 82(a0)
; RV32VB-PACK-NEXT: lbu a5, 93(a0)
; RV32VB-PACK-NEXT: pack a2, a2, a3
; RV32VB-PACK-NEXT: lbu a3, 105(a0)
; RV32VB-PACK-NEXT: lbu a0, 161(a0)
; RV32VB-PACK-NEXT: packh a4, a4, a5
; RV32VB-PACK-NEXT: packh a5, a0, a0
; RV32VB-PACK-NEXT: pack a1, a5, a1
; RV32VB-PACK-NEXT: packh a0, a3, a0
; RV32VB-PACK-NEXT: pack a0, a4, a0
; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-PACK-NEXT: vmv.v.x v8, a1
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-PACK-NEXT: pack a0, a5, a5
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-PACK-NEXT: ret
;
; RV64V-ONLY-LABEL: buildvec_v16i8_undef_edges:
; RV64V-ONLY: # %bb.0:
; RV64V-ONLY-NEXT: lbu a1, 31(a0)
; RV64V-ONLY-NEXT: lbu a2, 44(a0)
; RV64V-ONLY-NEXT: lbu a3, 55(a0)
; RV64V-ONLY-NEXT: lbu a4, 623(a0)
; RV64V-ONLY-NEXT: lbu a5, 75(a0)
; RV64V-ONLY-NEXT: lbu a6, 82(a0)
; RV64V-ONLY-NEXT: lbu a7, 93(a0)
; RV64V-ONLY-NEXT: lbu t0, 105(a0)
; RV64V-ONLY-NEXT: lbu a0, 161(a0)
; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64V-ONLY-NEXT: vmv.v.x v8, a1
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
; RV64V-ONLY-NEXT: vslide1down.vx v9, v8, a5
; RV64V-ONLY-NEXT: vmv.v.x v8, a6
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t0
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV64V-ONLY-NEXT: li a0, 255
; RV64V-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64V-ONLY-NEXT: vmv.s.x v0, a0
; RV64V-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu
; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 4
; RV64V-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_v16i8_undef_edges:
; RVA22U64: # %bb.0:
; RVA22U64-NEXT: lbu a1, 44(a0)
; RVA22U64-NEXT: lbu a2, 55(a0)
; RVA22U64-NEXT: lbu a3, 31(a0)
; RVA22U64-NEXT: lbu a4, 623(a0)
; RVA22U64-NEXT: slli a1, a1, 32
; RVA22U64-NEXT: slli a2, a2, 40
; RVA22U64-NEXT: lbu a5, 75(a0)
; RVA22U64-NEXT: or a1, a1, a2
; RVA22U64-NEXT: slli a3, a3, 24
; RVA22U64-NEXT: slli a4, a4, 48
; RVA22U64-NEXT: slli a5, a5, 56
; RVA22U64-NEXT: or a4, a4, a5
; RVA22U64-NEXT: or a1, a1, a4
; RVA22U64-NEXT: add.uw a1, a3, a1
; RVA22U64-NEXT: lbu a2, 93(a0)
; RVA22U64-NEXT: lbu a3, 82(a0)
; RVA22U64-NEXT: lbu a4, 105(a0)
; RVA22U64-NEXT: lbu a0, 161(a0)
; RVA22U64-NEXT: slli a2, a2, 8
; RVA22U64-NEXT: or a2, a2, a3
; RVA22U64-NEXT: slli a4, a4, 16
; RVA22U64-NEXT: slli a0, a0, 24
; RVA22U64-NEXT: or a0, a0, a4
; RVA22U64-NEXT: or a0, a0, a2
; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-NEXT: vmv.v.x v8, a1
; RVA22U64-NEXT: vslide1down.vx v8, v8, a0
; RVA22U64-NEXT: ret
;
; RVA22U64-PACK-LABEL: buildvec_v16i8_undef_edges:
; RVA22U64-PACK: # %bb.0:
; RVA22U64-PACK-NEXT: lbu a1, 31(a0)
; RVA22U64-PACK-NEXT: lbu a2, 44(a0)
; RVA22U64-PACK-NEXT: lbu a3, 55(a0)
; RVA22U64-PACK-NEXT: lbu a4, 623(a0)
; RVA22U64-PACK-NEXT: lbu a5, 75(a0)
; RVA22U64-PACK-NEXT: packh a6, a0, a1
; RVA22U64-PACK-NEXT: packh a1, a0, a0
; RVA22U64-PACK-NEXT: packh a2, a2, a3
; RVA22U64-PACK-NEXT: packh a3, a4, a5
; RVA22U64-PACK-NEXT: packw a7, a2, a3
; RVA22U64-PACK-NEXT: lbu a3, 82(a0)
; RVA22U64-PACK-NEXT: lbu a4, 93(a0)
; RVA22U64-PACK-NEXT: lbu a5, 105(a0)
; RVA22U64-PACK-NEXT: lbu a0, 161(a0)
; RVA22U64-PACK-NEXT: packw a2, a1, a6
; RVA22U64-PACK-NEXT: pack a2, a2, a7
; RVA22U64-PACK-NEXT: packh a3, a3, a4
; RVA22U64-PACK-NEXT: packh a0, a5, a0
; RVA22U64-PACK-NEXT: packw a0, a3, a0
; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-PACK-NEXT: vmv.v.x v8, a2
; RVA22U64-PACK-NEXT: packw a1, a1, a1
; RVA22U64-PACK-NEXT: pack a0, a0, a1
; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0
; RVA22U64-PACK-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_v16i8_undef_edges:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: lbu a1, 31(a0)
; RV64ZVE32-NEXT: lbu a2, 44(a0)
; RV64ZVE32-NEXT: lbu a3, 55(a0)
; RV64ZVE32-NEXT: lbu a4, 623(a0)
; RV64ZVE32-NEXT: lbu a5, 75(a0)
; RV64ZVE32-NEXT: lbu a6, 82(a0)
; RV64ZVE32-NEXT: lbu a7, 93(a0)
; RV64ZVE32-NEXT: lbu t0, 105(a0)
; RV64ZVE32-NEXT: lbu a0, 161(a0)
; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64ZVE32-NEXT: vmv.v.x v8, a1
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
; RV64ZVE32-NEXT: vslide1down.vx v9, v8, a5
; RV64ZVE32-NEXT: vmv.v.x v8, a6
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t0
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
; RV64ZVE32-NEXT: li a0, 255
; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32-NEXT: vmv.s.x v0, a0
; RV64ZVE32-NEXT: vsetvli zero, zero, e8, m1, ta, mu
; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV64ZVE32-NEXT: ret
%p4 = getelementptr i8, ptr %p, i32 31
%p5 = getelementptr i8, ptr %p, i32 44
%p6 = getelementptr i8, ptr %p, i32 55
%p7 = getelementptr i8, ptr %p, i32 623
%p8 = getelementptr i8, ptr %p, i32 75
%p9 = getelementptr i8, ptr %p, i32 82
%p10 = getelementptr i8, ptr %p, i32 93
%p11 = getelementptr i8, ptr %p, i32 105
%p12 = getelementptr i8, ptr %p, i32 161
%ld4 = load i8, ptr %p4
%ld5 = load i8, ptr %p5
%ld6 = load i8, ptr %p6
%ld7 = load i8, ptr %p7
%ld8 = load i8, ptr %p8
%ld9 = load i8, ptr %p9
%ld10 = load i8, ptr %p10
%ld11 = load i8, ptr %p11
%ld12 = load i8, ptr %p12
%v4 = insertelement <16 x i8> poison, i8 %ld4, i32 3
%v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4
%v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5
%v7 = insertelement <16 x i8> %v6, i8 %ld7, i32 6
%v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7
%v9 = insertelement <16 x i8> %v8, i8 %ld9, i32 8
%v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9
%v11 = insertelement <16 x i8> %v10, i8 %ld11, i32 10
%v12 = insertelement <16 x i8> %v11, i8 %ld12, i32 11
ret <16 x i8> %v12
}
define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) {
; RV32-ONLY-LABEL: buildvec_v16i8_loads_undef_scattered:
; RV32-ONLY: # %bb.0:
; RV32-ONLY-NEXT: lbu a1, 0(a0)
; RV32-ONLY-NEXT: lbu a2, 1(a0)
; RV32-ONLY-NEXT: lbu a3, 44(a0)
; RV32-ONLY-NEXT: lbu a4, 55(a0)
; RV32-ONLY-NEXT: lbu a5, 75(a0)
; RV32-ONLY-NEXT: lbu a6, 82(a0)
; RV32-ONLY-NEXT: lbu a7, 93(a0)
; RV32-ONLY-NEXT: lbu t0, 124(a0)
; RV32-ONLY-NEXT: lbu t1, 144(a0)
; RV32-ONLY-NEXT: lbu a0, 154(a0)
; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV32-ONLY-NEXT: vmv.v.x v8, a1
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 2
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4
; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 1
; RV32-ONLY-NEXT: vslide1down.vx v9, v8, a5
; RV32-ONLY-NEXT: vmv.v.x v8, a6
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7
; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 2
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t0
; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 1
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t1
; RV32-ONLY-NEXT: li a1, 255
; RV32-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV32-ONLY-NEXT: vmv.s.x v0, a1
; RV32-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV32-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV32-ONLY-NEXT: ret
;
; RV32VB-LABEL: buildvec_v16i8_loads_undef_scattered:
; RV32VB: # %bb.0:
; RV32VB-NEXT: lbu a1, 1(a0)
; RV32VB-NEXT: lbu a2, 0(a0)
; RV32VB-NEXT: slli a1, a1, 8
; RV32VB-NEXT: lbu a3, 55(a0)
; RV32VB-NEXT: lbu a4, 44(a0)
; RV32VB-NEXT: or a1, a2, a1
; RV32VB-NEXT: lbu a2, 75(a0)
; RV32VB-NEXT: slli a3, a3, 8
; RV32VB-NEXT: or a3, a4, a3
; RV32VB-NEXT: lbu a4, 93(a0)
; RV32VB-NEXT: slli a2, a2, 24
; RV32VB-NEXT: or a2, a3, a2
; RV32VB-NEXT: lbu a3, 82(a0)
; RV32VB-NEXT: slli a4, a4, 8
; RV32VB-NEXT: lbu a5, 144(a0)
; RV32VB-NEXT: lbu a6, 154(a0)
; RV32VB-NEXT: or a3, a3, a4
; RV32VB-NEXT: lbu a0, 124(a0)
; RV32VB-NEXT: slli a5, a5, 16
; RV32VB-NEXT: slli a6, a6, 24
; RV32VB-NEXT: or a4, a6, a5
; RV32VB-NEXT: or a0, a0, a4
; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-NEXT: vmv.v.x v8, a1
; RV32VB-NEXT: vslide1down.vx v8, v8, a2
; RV32VB-NEXT: vslide1down.vx v8, v8, a3
; RV32VB-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-NEXT: ret
;
; RV32VB-PACK-LABEL: buildvec_v16i8_loads_undef_scattered:
; RV32VB-PACK: # %bb.0:
; RV32VB-PACK-NEXT: lbu a1, 0(a0)
; RV32VB-PACK-NEXT: lbu a2, 1(a0)
; RV32VB-PACK-NEXT: lbu a3, 44(a0)
; RV32VB-PACK-NEXT: lbu a4, 55(a0)
; RV32VB-PACK-NEXT: lbu a5, 75(a0)
; RV32VB-PACK-NEXT: packh a1, a1, a2
; RV32VB-PACK-NEXT: packh a2, a3, a4
; RV32VB-PACK-NEXT: packh a3, a0, a5
; RV32VB-PACK-NEXT: lbu a4, 82(a0)
; RV32VB-PACK-NEXT: lbu a5, 93(a0)
; RV32VB-PACK-NEXT: lbu a6, 144(a0)
; RV32VB-PACK-NEXT: lbu a7, 154(a0)
; RV32VB-PACK-NEXT: lbu a0, 124(a0)
; RV32VB-PACK-NEXT: pack a2, a2, a3
; RV32VB-PACK-NEXT: packh a3, a4, a5
; RV32VB-PACK-NEXT: packh a4, a6, a7
; RV32VB-PACK-NEXT: packh a0, a0, a0
; RV32VB-PACK-NEXT: pack a0, a0, a4
; RV32VB-PACK-NEXT: packh a4, a0, a0
; RV32VB-PACK-NEXT: pack a1, a1, a4
; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-PACK-NEXT: vmv.v.x v8, a1
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2
; RV32VB-PACK-NEXT: pack a1, a3, a4
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a1
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-PACK-NEXT: ret
;
; RV64V-ONLY-LABEL: buildvec_v16i8_loads_undef_scattered:
; RV64V-ONLY: # %bb.0:
; RV64V-ONLY-NEXT: lbu a1, 0(a0)
; RV64V-ONLY-NEXT: lbu a2, 1(a0)
; RV64V-ONLY-NEXT: lbu a3, 44(a0)
; RV64V-ONLY-NEXT: lbu a4, 55(a0)
; RV64V-ONLY-NEXT: lbu a5, 75(a0)
; RV64V-ONLY-NEXT: lbu a6, 82(a0)
; RV64V-ONLY-NEXT: lbu a7, 93(a0)
; RV64V-ONLY-NEXT: lbu t0, 124(a0)
; RV64V-ONLY-NEXT: lbu t1, 144(a0)
; RV64V-ONLY-NEXT: lbu a0, 154(a0)
; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64V-ONLY-NEXT: vmv.v.x v8, a1
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 2
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 1
; RV64V-ONLY-NEXT: vslide1down.vx v9, v8, a5
; RV64V-ONLY-NEXT: vmv.v.x v8, a6
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 2
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t0
; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 1
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t1
; RV64V-ONLY-NEXT: li a1, 255
; RV64V-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64V-ONLY-NEXT: vmv.s.x v0, a1
; RV64V-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV64V-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_v16i8_loads_undef_scattered:
; RVA22U64: # %bb.0:
; RVA22U64-NEXT: lbu a1, 1(a0)
; RVA22U64-NEXT: lbu a2, 0(a0)
; RVA22U64-NEXT: slli a1, a1, 8
; RVA22U64-NEXT: lbu a3, 44(a0)
; RVA22U64-NEXT: lbu a4, 55(a0)
; RVA22U64-NEXT: or a1, a1, a2
; RVA22U64-NEXT: lbu a2, 75(a0)
; RVA22U64-NEXT: slli a3, a3, 32
; RVA22U64-NEXT: slli a4, a4, 40
; RVA22U64-NEXT: or a3, a3, a4
; RVA22U64-NEXT: slli a2, a2, 56
; RVA22U64-NEXT: lbu a4, 93(a0)
; RVA22U64-NEXT: or a2, a2, a3
; RVA22U64-NEXT: or a1, a1, a2
; RVA22U64-NEXT: lbu a2, 82(a0)
; RVA22U64-NEXT: slli a4, a4, 8
; RVA22U64-NEXT: lbu a3, 144(a0)
; RVA22U64-NEXT: lbu a5, 154(a0)
; RVA22U64-NEXT: or a2, a2, a4
; RVA22U64-NEXT: lbu a0, 124(a0)
; RVA22U64-NEXT: slli a3, a3, 48
; RVA22U64-NEXT: slli a5, a5, 56
; RVA22U64-NEXT: or a3, a3, a5
; RVA22U64-NEXT: slli a0, a0, 32
; RVA22U64-NEXT: or a0, a0, a3
; RVA22U64-NEXT: or a0, a0, a2
; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-NEXT: vmv.v.x v8, a1
; RVA22U64-NEXT: vslide1down.vx v8, v8, a0
; RVA22U64-NEXT: ret
;
; RVA22U64-PACK-LABEL: buildvec_v16i8_loads_undef_scattered:
; RVA22U64-PACK: # %bb.0:
; RVA22U64-PACK-NEXT: lbu a1, 0(a0)
; RVA22U64-PACK-NEXT: lbu a2, 1(a0)
; RVA22U64-PACK-NEXT: lbu a3, 44(a0)
; RVA22U64-PACK-NEXT: lbu a4, 55(a0)
; RVA22U64-PACK-NEXT: lbu a5, 75(a0)
; RVA22U64-PACK-NEXT: packh a1, a1, a2
; RVA22U64-PACK-NEXT: packh a2, a3, a4
; RVA22U64-PACK-NEXT: packh a3, a0, a5
; RVA22U64-PACK-NEXT: packw a6, a2, a3
; RVA22U64-PACK-NEXT: packh a3, a0, a0
; RVA22U64-PACK-NEXT: packw a7, a1, a3
; RVA22U64-PACK-NEXT: lbu a4, 82(a0)
; RVA22U64-PACK-NEXT: lbu a5, 93(a0)
; RVA22U64-PACK-NEXT: lbu a2, 144(a0)
; RVA22U64-PACK-NEXT: lbu a1, 154(a0)
; RVA22U64-PACK-NEXT: lbu a0, 124(a0)
; RVA22U64-PACK-NEXT: pack a6, a7, a6
; RVA22U64-PACK-NEXT: packh a4, a4, a5
; RVA22U64-PACK-NEXT: packh a1, a2, a1
; RVA22U64-PACK-NEXT: packh a0, a0, a0
; RVA22U64-PACK-NEXT: packw a0, a0, a1
; RVA22U64-PACK-NEXT: packw a1, a4, a3
; RVA22U64-PACK-NEXT: pack a0, a1, a0
; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-PACK-NEXT: vmv.v.x v8, a6
; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0
; RVA22U64-PACK-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_v16i8_loads_undef_scattered:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: lbu a1, 0(a0)
; RV64ZVE32-NEXT: lbu a2, 1(a0)
; RV64ZVE32-NEXT: lbu a3, 44(a0)
; RV64ZVE32-NEXT: lbu a4, 55(a0)
; RV64ZVE32-NEXT: lbu a5, 75(a0)
; RV64ZVE32-NEXT: lbu a6, 82(a0)
; RV64ZVE32-NEXT: lbu a7, 93(a0)
; RV64ZVE32-NEXT: lbu t0, 124(a0)
; RV64ZVE32-NEXT: lbu t1, 144(a0)
; RV64ZVE32-NEXT: lbu a0, 154(a0)
; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64ZVE32-NEXT: vmv.v.x v8, a1
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32-NEXT: vslide1down.vx v9, v8, a5
; RV64ZVE32-NEXT: vmv.v.x v8, a6
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t0
; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t1
; RV64ZVE32-NEXT: li a1, 255
; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32-NEXT: vmv.s.x v0, a1
; RV64ZVE32-NEXT: vsetvli zero, zero, e8, m1, ta, mu
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
; RV64ZVE32-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV64ZVE32-NEXT: ret
%p2 = getelementptr i8, ptr %p, i32 1
%p3 = getelementptr i8, ptr %p, i32 22
%p4 = getelementptr i8, ptr %p, i32 31
%p5 = getelementptr i8, ptr %p, i32 44
%p6 = getelementptr i8, ptr %p, i32 55
%p7 = getelementptr i8, ptr %p, i32 623
%p8 = getelementptr i8, ptr %p, i32 75
%p9 = getelementptr i8, ptr %p, i32 82
%p10 = getelementptr i8, ptr %p, i32 93
%p11 = getelementptr i8, ptr %p, i32 105
%p12 = getelementptr i8, ptr %p, i32 161
%p13 = getelementptr i8, ptr %p, i32 124
%p14 = getelementptr i8, ptr %p, i32 163
%p15 = getelementptr i8, ptr %p, i32 144
%p16 = getelementptr i8, ptr %p, i32 154
%ld1 = load i8, ptr %p
%ld2 = load i8, ptr %p2
%ld3 = load i8, ptr %p3
%ld4 = load i8, ptr %p4
%ld5 = load i8, ptr %p5
%ld6 = load i8, ptr %p6
%ld7 = load i8, ptr %p7
%ld8 = load i8, ptr %p8
%ld9 = load i8, ptr %p9
%ld10 = load i8, ptr %p10
%ld11 = load i8, ptr %p11
%ld12 = load i8, ptr %p12
%ld13 = load i8, ptr %p13
%ld14 = load i8, ptr %p14
%ld15 = load i8, ptr %p15
%ld16 = load i8, ptr %p16
%v1 = insertelement <16 x i8> poison, i8 %ld1, i32 0
%v2 = insertelement <16 x i8> %v1, i8 %ld2, i32 1
%v3 = insertelement <16 x i8> %v2, i8 undef, i32 2
%v4 = insertelement <16 x i8> %v3, i8 undef, i32 3
%v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4
%v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5
%v7 = insertelement <16 x i8> %v6, i8 undef, i32 6
%v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7
%v9 = insertelement <16 x i8> %v8, i8 %ld9, i32 8
%v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9
%v11 = insertelement <16 x i8> %v10, i8 undef, i32 10
%v12 = insertelement <16 x i8> %v11, i8 undef, i32 11
%v13 = insertelement <16 x i8> %v12, i8 %ld13, i32 12
%v14 = insertelement <16 x i8> %v13, i8 undef, i32 13
%v15 = insertelement <16 x i8> %v14, i8 %ld15, i32 14
%v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15
ret <16 x i8> %v16
}
define <8 x i8> @buildvec_v8i8_pack(i8 %e1, i8 %e2, i8 %e3, i8 %e4, i8 %e5, i8 %e6, i8 %e7, i8 %e8) {
; RV32-ONLY-LABEL: buildvec_v8i8_pack:
; RV32-ONLY: # %bb.0:
; RV32-ONLY-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV32-ONLY-NEXT: vmv.v.x v8, a0
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a1
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV32-ONLY-NEXT: vslide1down.vx v9, v8, a3
; RV32-ONLY-NEXT: vmv.v.x v8, a4
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6
; RV32-ONLY-NEXT: vmv.v.i v0, 15
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7
; RV32-ONLY-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV32-ONLY-NEXT: ret
;
; RV32VB-LABEL: buildvec_v8i8_pack:
; RV32VB: # %bb.0:
; RV32VB-NEXT: slli a7, a7, 24
; RV32VB-NEXT: andi a6, a6, 255
; RV32VB-NEXT: slli a6, a6, 16
; RV32VB-NEXT: or a6, a7, a6
; RV32VB-NEXT: andi a4, a4, 255
; RV32VB-NEXT: andi a5, a5, 255
; RV32VB-NEXT: slli a5, a5, 8
; RV32VB-NEXT: or a4, a4, a5
; RV32VB-NEXT: or a4, a4, a6
; RV32VB-NEXT: slli a3, a3, 24
; RV32VB-NEXT: andi a2, a2, 255
; RV32VB-NEXT: slli a2, a2, 16
; RV32VB-NEXT: or a2, a3, a2
; RV32VB-NEXT: andi a0, a0, 255
; RV32VB-NEXT: andi a1, a1, 255
; RV32VB-NEXT: slli a1, a1, 8
; RV32VB-NEXT: or a0, a0, a1
; RV32VB-NEXT: or a0, a0, a2
; RV32VB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32VB-NEXT: vmv.v.x v8, a0
; RV32VB-NEXT: vslide1down.vx v8, v8, a4
; RV32VB-NEXT: ret
;
; RV32VB-PACK-LABEL: buildvec_v8i8_pack:
; RV32VB-PACK: # %bb.0:
; RV32VB-PACK-NEXT: packh a6, a6, a7
; RV32VB-PACK-NEXT: packh a4, a4, a5
; RV32VB-PACK-NEXT: pack a4, a4, a6
; RV32VB-PACK-NEXT: packh a2, a2, a3
; RV32VB-PACK-NEXT: packh a0, a0, a1
; RV32VB-PACK-NEXT: pack a0, a0, a2
; RV32VB-PACK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32VB-PACK-NEXT: vmv.v.x v8, a0
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a4
; RV32VB-PACK-NEXT: ret
;
; RV64V-ONLY-LABEL: buildvec_v8i8_pack:
; RV64V-ONLY: # %bb.0:
; RV64V-ONLY-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV64V-ONLY-NEXT: vmv.v.x v8, a0
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV64V-ONLY-NEXT: vslide1down.vx v9, v8, a3
; RV64V-ONLY-NEXT: vmv.v.x v8, a4
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6
; RV64V-ONLY-NEXT: vmv.v.i v0, 15
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
; RV64V-ONLY-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_v8i8_pack:
; RVA22U64: # %bb.0:
; RVA22U64-NEXT: andi a4, a4, 255
; RVA22U64-NEXT: slli a4, a4, 32
; RVA22U64-NEXT: andi a5, a5, 255
; RVA22U64-NEXT: slli a5, a5, 40
; RVA22U64-NEXT: or a4, a4, a5
; RVA22U64-NEXT: slli a7, a7, 56
; RVA22U64-NEXT: andi a5, a6, 255
; RVA22U64-NEXT: slli a5, a5, 48
; RVA22U64-NEXT: or a5, a7, a5
; RVA22U64-NEXT: or a4, a4, a5
; RVA22U64-NEXT: andi a2, a2, 255
; RVA22U64-NEXT: slli a2, a2, 16
; RVA22U64-NEXT: andi a3, a3, 255
; RVA22U64-NEXT: slli a3, a3, 24
; RVA22U64-NEXT: or a2, a2, a3
; RVA22U64-NEXT: andi a0, a0, 255
; RVA22U64-NEXT: andi a1, a1, 255
; RVA22U64-NEXT: slli a1, a1, 8
; RVA22U64-NEXT: or a0, a0, a1
; RVA22U64-NEXT: or a0, a0, a2
; RVA22U64-NEXT: or a0, a0, a4
; RVA22U64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RVA22U64-NEXT: vmv.s.x v8, a0
; RVA22U64-NEXT: ret
;
; RVA22U64-PACK-LABEL: buildvec_v8i8_pack:
; RVA22U64-PACK: # %bb.0:
; RVA22U64-PACK-NEXT: packh a6, a6, a7
; RVA22U64-PACK-NEXT: packh a4, a4, a5
; RVA22U64-PACK-NEXT: packw a4, a4, a6
; RVA22U64-PACK-NEXT: packh a2, a2, a3
; RVA22U64-PACK-NEXT: packh a0, a0, a1
; RVA22U64-PACK-NEXT: packw a0, a0, a2
; RVA22U64-PACK-NEXT: pack a0, a0, a4
; RVA22U64-PACK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RVA22U64-PACK-NEXT: vmv.s.x v8, a0
; RVA22U64-PACK-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_v8i8_pack:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV64ZVE32-NEXT: vmv.v.x v8, a0
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32-NEXT: vslide1down.vx v9, v8, a3
; RV64ZVE32-NEXT: vmv.v.x v8, a4
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32-NEXT: vmv.v.i v0, 15
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
; RV64ZVE32-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64ZVE32-NEXT: ret
%v1 = insertelement <8 x i8> poison, i8 %e1, i32 0
%v2 = insertelement <8 x i8> %v1, i8 %e2, i32 1
%v3 = insertelement <8 x i8> %v2, i8 %e3, i32 2
%v4 = insertelement <8 x i8> %v3, i8 %e4, i32 3
%v5 = insertelement <8 x i8> %v4, i8 %e5, i32 4
%v6 = insertelement <8 x i8> %v5, i8 %e6, i32 5
%v7 = insertelement <8 x i8> %v6, i8 %e7, i32 6
%v8 = insertelement <8 x i8> %v7, i8 %e8, i32 7
ret <8 x i8> %v8
}
define <6 x i8> @buildvec_v6i8_pack(i8 %e1, i8 %e2, i8 %e3, i8 %e4, i8 %e5, i8 %e6) {
; RV32-ONLY-LABEL: buildvec_v6i8_pack:
; RV32-ONLY: # %bb.0:
; RV32-ONLY-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32-ONLY-NEXT: vmv.v.x v8, a0
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a1
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 2
; RV32-ONLY-NEXT: ret
;
; RV32VB-LABEL: buildvec_v6i8_pack:
; RV32VB: # %bb.0:
; RV32VB-NEXT: slli a3, a3, 24
; RV32VB-NEXT: andi a2, a2, 255
; RV32VB-NEXT: slli a2, a2, 16
; RV32VB-NEXT: or a2, a3, a2
; RV32VB-NEXT: andi a0, a0, 255
; RV32VB-NEXT: andi a1, a1, 255
; RV32VB-NEXT: slli a1, a1, 8
; RV32VB-NEXT: or a0, a0, a1
; RV32VB-NEXT: or a0, a0, a2
; RV32VB-NEXT: andi a1, a4, 255
; RV32VB-NEXT: andi a2, a5, 255
; RV32VB-NEXT: slli a2, a2, 8
; RV32VB-NEXT: or a1, a1, a2
; RV32VB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32VB-NEXT: vmv.v.x v8, a0
; RV32VB-NEXT: vslide1down.vx v8, v8, a1
; RV32VB-NEXT: ret
;
; RV32VB-PACK-LABEL: buildvec_v6i8_pack:
; RV32VB-PACK: # %bb.0:
; RV32VB-PACK-NEXT: packh a2, a2, a3
; RV32VB-PACK-NEXT: packh a0, a0, a1
; RV32VB-PACK-NEXT: pack a0, a0, a2
; RV32VB-PACK-NEXT: packh a1, a4, a5
; RV32VB-PACK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32VB-PACK-NEXT: vmv.v.x v8, a0
; RV32VB-PACK-NEXT: packh a0, a0, a0
; RV32VB-PACK-NEXT: pack a0, a1, a0
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-PACK-NEXT: ret
;
; RV64V-ONLY-LABEL: buildvec_v6i8_pack:
; RV64V-ONLY: # %bb.0:
; RV64V-ONLY-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64V-ONLY-NEXT: vmv.v.x v8, a0
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 2
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_v6i8_pack:
; RVA22U64: # %bb.0:
; RVA22U64-NEXT: andi a2, a2, 255
; RVA22U64-NEXT: slli a2, a2, 16
; RVA22U64-NEXT: andi a3, a3, 255
; RVA22U64-NEXT: slli a3, a3, 24
; RVA22U64-NEXT: or a2, a2, a3
; RVA22U64-NEXT: andi a0, a0, 255
; RVA22U64-NEXT: andi a1, a1, 255
; RVA22U64-NEXT: slli a1, a1, 8
; RVA22U64-NEXT: or a0, a0, a1
; RVA22U64-NEXT: or a0, a0, a2
; RVA22U64-NEXT: andi a1, a4, 255
; RVA22U64-NEXT: slli a1, a1, 32
; RVA22U64-NEXT: andi a2, a5, 255
; RVA22U64-NEXT: slli a2, a2, 40
; RVA22U64-NEXT: or a1, a1, a2
; RVA22U64-NEXT: or a0, a0, a1
; RVA22U64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RVA22U64-NEXT: vmv.s.x v8, a0
; RVA22U64-NEXT: ret
;
; RVA22U64-PACK-LABEL: buildvec_v6i8_pack:
; RVA22U64-PACK: # %bb.0:
; RVA22U64-PACK-NEXT: packh a2, a2, a3
; RVA22U64-PACK-NEXT: packh a0, a0, a1
; RVA22U64-PACK-NEXT: packw a0, a0, a2
; RVA22U64-PACK-NEXT: packh a1, a4, a5
; RVA22U64-PACK-NEXT: packh a2, a0, a0
; RVA22U64-PACK-NEXT: packw a1, a1, a2
; RVA22U64-PACK-NEXT: pack a0, a0, a1
; RVA22U64-PACK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RVA22U64-PACK-NEXT: vmv.s.x v8, a0
; RVA22U64-PACK-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_v6i8_pack:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64ZVE32-NEXT: vmv.v.x v8, a0
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32-NEXT: ret
%v1 = insertelement <6 x i8> poison, i8 %e1, i32 0
%v2 = insertelement <6 x i8> %v1, i8 %e2, i32 1
%v3 = insertelement <6 x i8> %v2, i8 %e3, i32 2
%v4 = insertelement <6 x i8> %v3, i8 %e4, i32 3
%v5 = insertelement <6 x i8> %v4, i8 %e5, i32 4
%v6 = insertelement <6 x i8> %v5, i8 %e6, i32 5
ret <6 x i8> %v6
}
define <4 x i16> @buildvec_v4i16_pack(i16 %e1, i16 %e2, i16 %e3, i16 %e4) {
; RV32-ONLY-LABEL: buildvec_v4i16_pack:
; RV32-ONLY: # %bb.0:
; RV32-ONLY-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV32-ONLY-NEXT: vmv.v.x v8, a0
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a1
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV32-ONLY-NEXT: ret
;
; RV32VB-LABEL: buildvec_v4i16_pack:
; RV32VB: # %bb.0:
; RV32VB-NEXT: slli a3, a3, 16
; RV32VB-NEXT: zext.h a2, a2
; RV32VB-NEXT: or a2, a2, a3
; RV32VB-NEXT: slli a1, a1, 16
; RV32VB-NEXT: zext.h a0, a0
; RV32VB-NEXT: or a0, a0, a1
; RV32VB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32VB-NEXT: vmv.v.x v8, a0
; RV32VB-NEXT: vslide1down.vx v8, v8, a2
; RV32VB-NEXT: ret
;
; RV32VB-PACK-LABEL: buildvec_v4i16_pack:
; RV32VB-PACK: # %bb.0:
; RV32VB-PACK-NEXT: pack a2, a2, a3
; RV32VB-PACK-NEXT: pack a0, a0, a1
; RV32VB-PACK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32VB-PACK-NEXT: vmv.v.x v8, a0
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2
; RV32VB-PACK-NEXT: ret
;
; RV64V-ONLY-LABEL: buildvec_v4i16_pack:
; RV64V-ONLY: # %bb.0:
; RV64V-ONLY-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV64V-ONLY-NEXT: vmv.v.x v8, a0
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_v4i16_pack:
; RVA22U64: # %bb.0:
; RVA22U64-NEXT: slli a3, a3, 48
; RVA22U64-NEXT: zext.h a2, a2
; RVA22U64-NEXT: slli a2, a2, 32
; RVA22U64-NEXT: or a2, a2, a3
; RVA22U64-NEXT: zext.h a0, a0
; RVA22U64-NEXT: zext.h a1, a1
; RVA22U64-NEXT: slli a1, a1, 16
; RVA22U64-NEXT: or a0, a0, a1
; RVA22U64-NEXT: or a0, a0, a2
; RVA22U64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RVA22U64-NEXT: vmv.s.x v8, a0
; RVA22U64-NEXT: ret
;
; RVA22U64-PACK-LABEL: buildvec_v4i16_pack:
; RVA22U64-PACK: # %bb.0:
; RVA22U64-PACK-NEXT: packw a2, a2, a3
; RVA22U64-PACK-NEXT: packw a0, a0, a1
; RVA22U64-PACK-NEXT: pack a0, a0, a2
; RVA22U64-PACK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RVA22U64-PACK-NEXT: vmv.s.x v8, a0
; RVA22U64-PACK-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_v4i16_pack:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV64ZVE32-NEXT: vmv.v.x v8, a0
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
; RV64ZVE32-NEXT: ret
%v1 = insertelement <4 x i16> poison, i16 %e1, i32 0
%v2 = insertelement <4 x i16> %v1, i16 %e2, i32 1
%v3 = insertelement <4 x i16> %v2, i16 %e3, i32 2
%v4 = insertelement <4 x i16> %v3, i16 %e4, i32 3
ret <4 x i16> %v4
}
define <2 x i32> @buildvec_v2i32_pack(i32 %e1, i32 %e2) {
; RV32-LABEL: buildvec_v2i32_pack:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32-NEXT: vmv.v.x v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: ret
;
; RV64V-ONLY-LABEL: buildvec_v2i32_pack:
; RV64V-ONLY: # %bb.0:
; RV64V-ONLY-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV64V-ONLY-NEXT: vmv.v.x v8, a0
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_v2i32_pack:
; RVA22U64: # %bb.0:
; RVA22U64-NEXT: slli a1, a1, 32
; RVA22U64-NEXT: add.uw a0, a0, a1
; RVA22U64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RVA22U64-NEXT: vmv.s.x v8, a0
; RVA22U64-NEXT: ret
;
; RVA22U64-PACK-LABEL: buildvec_v2i32_pack:
; RVA22U64-PACK: # %bb.0:
; RVA22U64-PACK-NEXT: pack a0, a0, a1
; RVA22U64-PACK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RVA22U64-PACK-NEXT: vmv.s.x v8, a0
; RVA22U64-PACK-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_v2i32_pack:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32-NEXT: vmv.v.x v8, a0
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1
; RV64ZVE32-NEXT: ret
%v1 = insertelement <2 x i32> poison, i32 %e1, i32 0
%v2 = insertelement <2 x i32> %v1, i32 %e2, i32 1
ret <2 x i32> %v2
}
define <1 x i16> @buildvec_v1i16_pack(i16 %e1) {
; CHECK-LABEL: buildvec_v1i16_pack:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
%v1 = insertelement <1 x i16> poison, i16 %e1, i32 0
ret <1 x i16> %v1
}
define <1 x i32> @buildvec_v1i32_pack(i32 %e1) {
; CHECK-LABEL: buildvec_v1i32_pack:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
%v1 = insertelement <1 x i32> poison, i32 %e1, i32 0
ret <1 x i32> %v1
}