; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; CHECK-GI: warning: Instruction selection used fallback path for shufflevector_v2i1
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v2i1_zeroes
; ===== Legal Vector Types =====
define <8 x i8> @shufflevector_v8i8(<8 x i8> %a, <8 x i8> %b) {
; CHECK-SD-LABEL: shufflevector_v8i8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: adrp x8, .LCPI0_0
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT: ldr d1, [x8, :lo12:.LCPI0_0]
; CHECK-SD-NEXT: tbl v0.8b, { v0.16b }, v1.8b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v8i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: adrp x8, .LCPI0_0
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI0_0]
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
%c = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15>
ret <8 x i8> %c
}
define <16 x i8> @shufflevector_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-SD-LABEL: shufflevector_v16i8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: adrp x8, .LCPI1_0
; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI1_0]
; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v16i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI1_0
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI1_0]
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-GI-NEXT: ret
%c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15, i32 2, i32 4, i32 6, i32 8, i32 25, i32 30, i32 31, i32 31>
ret <16 x i8> %c
}
define <4 x i16> @shufflevector_v4i16(<4 x i16> %a, <4 x i16> %b) {
; CHECK-LABEL: shufflevector_v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: uzp2 v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ret
%c = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
ret <4 x i16> %c
}
define <8 x i16> @shufflevector_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK-SD-LABEL: shufflevector_v8i16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: adrp x8, .LCPI3_0
; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI3_0]
; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v8i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI3_0
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI3_0]
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-GI-NEXT: ret
%c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15>
ret <8 x i16> %c
}
define <2 x i32> @shufflevector_v2i32(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: shufflevector_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: zip2 v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
%c = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
ret <2 x i32> %c
}
define <4 x i32> @shufflevector_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: shufflevector_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
ret <4 x i32> %c
}
define <2 x i64> @shufflevector_v2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: shufflevector_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: zip2 v0.2d, v0.2d, v1.2d
; CHECK-NEXT: ret
%c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
ret <2 x i64> %c
}
; ===== Legal Vector Types with Zero Masks =====
define <8 x i8> @shufflevector_v8i8_zeroes(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: shufflevector_v8i8_zeroes:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: dup v0.8b, v0.b[0]
; CHECK-NEXT: ret
%c = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x i8> %c
}
define <16 x i8> @shufflevector_v16i8_zeroes(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: shufflevector_v16i8_zeroes:
; CHECK: // %bb.0:
; CHECK-NEXT: dup v0.16b, v0.b[0]
; CHECK-NEXT: ret
%c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <16 x i8> %c
}
define <4 x i16> @shufflevector_v4i16_zeroes(<4 x i16> %a, <4 x i16> %b) {
; CHECK-LABEL: shufflevector_v4i16_zeroes:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: dup v0.4h, v0.h[0]
; CHECK-NEXT: ret
%c = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
ret <4 x i16> %c
}
define <8 x i16> @shufflevector_v8i16_zeroes(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: shufflevector_v8i16_zeroes:
; CHECK: // %bb.0:
; CHECK-NEXT: dup v0.8h, v0.h[0]
; CHECK-NEXT: ret
%c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x i16> %c
}
define <2 x i32> @shufflevector_v2i32_zeroes(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: shufflevector_v2i32_zeroes:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: dup v0.2s, v0.s[0]
; CHECK-NEXT: ret
%c = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 0>
ret <2 x i32> %c
}
define <4 x i32> @shufflevector_v4i32_zeroes(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: shufflevector_v4i32_zeroes:
; CHECK: // %bb.0:
; CHECK-NEXT: dup v0.4s, v0.s[0]
; CHECK-NEXT: ret
%c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
ret <4 x i32> %c
}
define <2 x i64> @shufflevector_v2i64_zeroes(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: shufflevector_v2i64_zeroes:
; CHECK: // %bb.0:
; CHECK-NEXT: dup v0.2d, v0.d[0]
; CHECK-NEXT: ret
%c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
ret <2 x i64> %c
}
; ===== Smaller/Larger Width Vectors with Legal Element Sizes =====
define <2 x i1> @shufflevector_v2i1(<2 x i1> %a, <2 x i1> %b){
; CHECK-LABEL: shufflevector_v2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: mov v0.s[1], v1.s[1]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%c = shufflevector <2 x i1> %a, <2 x i1> %b, <2 x i32> <i32 0, i32 3>
ret <2 x i1> %c
}
define i32 @shufflevector_v4i8(<4 x i8> %a, <4 x i8> %b){
; CHECK-SD-LABEL: shufflevector_v4i8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sub sp, sp, #16
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
; CHECK-SD-NEXT: ext v0.8b, v1.8b, v0.8b, #6
; CHECK-SD-NEXT: zip1 v1.4h, v1.4h, v0.4h
; CHECK-SD-NEXT: ext v0.8b, v0.8b, v1.8b, #4
; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b
; CHECK-SD-NEXT: fmov w0, s0
; CHECK-SD-NEXT: add sp, sp, #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v4i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: mov h2, v0.h[1]
; CHECK-GI-NEXT: mov h3, v1.h[1]
; CHECK-GI-NEXT: mov h4, v0.h[2]
; CHECK-GI-NEXT: mov h5, v0.h[3]
; CHECK-GI-NEXT: fmov w8, s2
; CHECK-GI-NEXT: mov h2, v1.h[2]
; CHECK-GI-NEXT: fmov w9, s3
; CHECK-GI-NEXT: mov h3, v1.h[3]
; CHECK-GI-NEXT: mov v0.b[1], w8
; CHECK-GI-NEXT: mov v1.b[1], w9
; CHECK-GI-NEXT: fmov w8, s4
; CHECK-GI-NEXT: fmov w9, s2
; CHECK-GI-NEXT: mov v0.b[2], w8
; CHECK-GI-NEXT: mov v1.b[2], w9
; CHECK-GI-NEXT: fmov w8, s5
; CHECK-GI-NEXT: fmov w9, s3
; CHECK-GI-NEXT: mov v0.b[3], w8
; CHECK-GI-NEXT: mov v1.b[3], w9
; CHECK-GI-NEXT: adrp x8, .LCPI15_0
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI15_0]
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
%c = shufflevector <4 x i8> %a, <4 x i8> %b, <4 x i32> <i32 1, i32 2, i32 4, i32 7>
%d = bitcast <4 x i8> %c to i32
ret i32 %d
}
define <32 x i8> @shufflevector_v32i8(<32 x i8> %a, <32 x i8> %b){
; CHECK-SD-LABEL: shufflevector_v32i8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $q2 killed $q2 def $q1_q2
; CHECK-SD-NEXT: adrp x8, .LCPI16_0
; CHECK-SD-NEXT: adrp x9, .LCPI16_1
; CHECK-SD-NEXT: mov v1.16b, v0.16b
; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI16_0]
; CHECK-SD-NEXT: ldr q4, [x9, :lo12:.LCPI16_1]
; CHECK-SD-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b
; CHECK-SD-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v32i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov v3.16b, v0.16b
; CHECK-GI-NEXT: adrp x8, .LCPI16_1
; CHECK-GI-NEXT: adrp x9, .LCPI16_0
; CHECK-GI-NEXT: mov v4.16b, v2.16b
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI16_1]
; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI16_0]
; CHECK-GI-NEXT: tbl v0.16b, { v3.16b, v4.16b }, v0.16b
; CHECK-GI-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v1.16b
; CHECK-GI-NEXT: ret
%c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32>
ret <32 x i8> %c
}
define i32 @shufflevector_v2i16(<2 x i16> %a, <2 x i16> %b){
; CHECK-SD-LABEL: shufflevector_v2i16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sub sp, sp, #16
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
; CHECK-SD-NEXT: ext v0.8b, v0.8b, v1.8b, #4
; CHECK-SD-NEXT: mov w8, v0.s[1]
; CHECK-SD-NEXT: fmov w9, s0
; CHECK-SD-NEXT: strh w9, [sp, #12]
; CHECK-SD-NEXT: strh w8, [sp, #14]
; CHECK-SD-NEXT: ldr w0, [sp, #12]
; CHECK-SD-NEXT: add sp, sp, #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v2i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: mov w8, v0.s[1]
; CHECK-GI-NEXT: mov w9, v1.s[1]
; CHECK-GI-NEXT: mov v0.h[1], w8
; CHECK-GI-NEXT: mov v1.h[1], w9
; CHECK-GI-NEXT: adrp x8, .LCPI17_0
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI17_0]
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
%c = shufflevector <2 x i16> %a, <2 x i16> %b, <2 x i32> <i32 1, i32 2>
%d = bitcast <2 x i16> %c to i32
ret i32 %d
}
define <16 x i16> @shufflevector_v16i16(<16 x i16> %a, <16 x i16> %b){
; CHECK-SD-LABEL: shufflevector_v16i16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $q2 killed $q2 def $q1_q2
; CHECK-SD-NEXT: adrp x8, .LCPI18_0
; CHECK-SD-NEXT: adrp x9, .LCPI18_1
; CHECK-SD-NEXT: mov v1.16b, v0.16b
; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI18_0]
; CHECK-SD-NEXT: ldr q4, [x9, :lo12:.LCPI18_1]
; CHECK-SD-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b
; CHECK-SD-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v16i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov v3.16b, v0.16b
; CHECK-GI-NEXT: adrp x8, .LCPI18_1
; CHECK-GI-NEXT: adrp x9, .LCPI18_0
; CHECK-GI-NEXT: mov v4.16b, v2.16b
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI18_1]
; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI18_0]
; CHECK-GI-NEXT: tbl v0.16b, { v3.16b, v4.16b }, v0.16b
; CHECK-GI-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v1.16b
; CHECK-GI-NEXT: ret
%c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16>
ret <16 x i16> %c
}
define <1 x i32> @shufflevector_v1i32(<1 x i32> %a, <1 x i32> %b) {
; CHECK-LABEL: shufflevector_v1i32:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
%c = shufflevector <1 x i32> %a, <1 x i32> %b, <1 x i32> <i32 1>
ret <1 x i32> %c
}
define <8 x i32> @shufflevector_v8i32(<8 x i32> %a, <8 x i32> %b) {
; CHECK-SD-LABEL: shufflevector_v8i32:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: uzp1 v2.4s, v2.4s, v3.4s
; CHECK-SD-NEXT: uzp2 v0.4s, v0.4s, v1.4s
; CHECK-SD-NEXT: mov v2.s[3], v3.s[3]
; CHECK-SD-NEXT: mov v1.16b, v2.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v8i32:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI20_0
; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: uzp2 v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI20_0]
; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: tbl v1.16b, { v2.16b, v3.16b }, v4.16b
; CHECK-GI-NEXT: ret
%c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15>
ret <8 x i32> %c
}
define <4 x i64> @shufflevector_v4i64(<4 x i64> %a, <4 x i64> %b) {
; CHECK-SD-LABEL: shufflevector_v4i64:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: zip2 v2.2d, v2.2d, v3.2d
; CHECK-SD-NEXT: zip2 v0.2d, v0.2d, v1.2d
; CHECK-SD-NEXT: mov v1.16b, v2.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v4i64:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: zip2 v0.2d, v0.2d, v1.2d
; CHECK-GI-NEXT: zip2 v1.2d, v2.2d, v3.2d
; CHECK-GI-NEXT: ret
%c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
ret <4 x i64> %c
}
; ===== Smaller/Larger Width Vectors with Zero Masks =====
define <2 x i1> @shufflevector_v2i1_zeroes(<2 x i1> %a, <2 x i1> %b){
; CHECK-LABEL: shufflevector_v2i1_zeroes:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: dup v0.2s, v0.s[0]
; CHECK-NEXT: ret
%c = shufflevector <2 x i1> %a, <2 x i1> %b, <2 x i32> <i32 0, i32 0>
ret <2 x i1> %c
}
define i32 @shufflevector_v4i8_zeroes(<4 x i8> %a, <4 x i8> %b){
; CHECK-SD-LABEL: shufflevector_v4i8_zeroes:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sub sp, sp, #16
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: dup v0.4h, v0.h[0]
; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b
; CHECK-SD-NEXT: fmov w0, s0
; CHECK-SD-NEXT: add sp, sp, #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v4i8_zeroes:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: mov h1, v0.h[1]
; CHECK-GI-NEXT: mov h2, v0.h[2]
; CHECK-GI-NEXT: fmov w8, s1
; CHECK-GI-NEXT: mov h1, v0.h[3]
; CHECK-GI-NEXT: mov v0.b[1], w8
; CHECK-GI-NEXT: fmov w8, s2
; CHECK-GI-NEXT: mov v0.b[2], w8
; CHECK-GI-NEXT: fmov w8, s1
; CHECK-GI-NEXT: mov v0.b[3], w8
; CHECK-GI-NEXT: dup v0.8b, v0.b[0]
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
%c = shufflevector <4 x i8> %a, <4 x i8> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%d = bitcast <4 x i8> %c to i32
ret i32 %d
}
define <32 x i8> @shufflevector_v32i8_zeroes(<32 x i8> %a, <32 x i8> %b){
; CHECK-LABEL: shufflevector_v32i8_zeroes:
; CHECK: // %bb.0:
; CHECK-NEXT: dup v0.16b, v0.b[0]
; CHECK-NEXT: mov v1.16b, v0.16b
; CHECK-NEXT: ret
%c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %c
}
define i32 @shufflevector_v2i16_zeroes(<2 x i16> %a, <2 x i16> %b){
; CHECK-SD-LABEL: shufflevector_v2i16_zeroes:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sub sp, sp, #16
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: dup v1.2s, v0.s[0]
; CHECK-SD-NEXT: fmov w9, s0
; CHECK-SD-NEXT: strh w9, [sp, #12]
; CHECK-SD-NEXT: mov w8, v1.s[1]
; CHECK-SD-NEXT: strh w8, [sp, #14]
; CHECK-SD-NEXT: ldr w0, [sp, #12]
; CHECK-SD-NEXT: add sp, sp, #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v2i16_zeroes:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: mov w8, v0.s[1]
; CHECK-GI-NEXT: mov v0.h[1], w8
; CHECK-GI-NEXT: dup v0.4h, v0.h[0]
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
%c = shufflevector <2 x i16> %a, <2 x i16> %b, <2 x i32> <i32 0, i32 0>
%d = bitcast <2 x i16> %c to i32
ret i32 %d
}
define <16 x i16> @shufflevector_v16i16_zeroes(<16 x i16> %a, <16 x i16> %b){
; CHECK-LABEL: shufflevector_v16i16_zeroes:
; CHECK: // %bb.0:
; CHECK-NEXT: dup v0.8h, v0.h[0]
; CHECK-NEXT: mov v1.16b, v0.16b
; CHECK-NEXT: ret
%c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <16 x i16> %c
}
define <1 x i32> @shufflevector_v1i32_zeroes(<1 x i32> %a, <1 x i32> %b) {
; CHECK-LABEL: shufflevector_v1i32_zeroes:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%c = shufflevector <1 x i32> %a, <1 x i32> %b, <1 x i32> <i32 0>
ret <1 x i32> %c
}
define <8 x i32> @shufflevector_v8i32_zeroes(<8 x i32> %a, <8 x i32> %b) {
; CHECK-LABEL: shufflevector_v8i32_zeroes:
; CHECK: // %bb.0:
; CHECK-NEXT: dup v0.4s, v0.s[0]
; CHECK-NEXT: mov v1.16b, v0.16b
; CHECK-NEXT: ret
%c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x i32> %c
}
define <4 x i64> @shufflevector_v4i64_zeroes(<4 x i64> %a, <4 x i64> %b) {
; CHECK-LABEL: shufflevector_v4i64_zeroes:
; CHECK: // %bb.0:
; CHECK-NEXT: dup v0.2d, v0.d[0]
; CHECK-NEXT: mov v1.16b, v0.16b
; CHECK-NEXT: ret
%c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
ret <4 x i64> %c
}
; ===== Vectors with Non-Pow 2 Widths =====
define <3 x i8> @shufflevector_v3i8(<3 x i8> %a, <3 x i8> %b) {
; CHECK-SD-LABEL: shufflevector_v3i8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: mov w0, w1
; CHECK-SD-NEXT: mov w1, w2
; CHECK-SD-NEXT: mov w2, w4
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v3i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov s0, w0
; CHECK-GI-NEXT: fmov s1, w3
; CHECK-GI-NEXT: adrp x8, .LCPI30_0
; CHECK-GI-NEXT: mov v0.b[1], w1
; CHECK-GI-NEXT: mov v1.b[1], w4
; CHECK-GI-NEXT: mov v0.b[2], w2
; CHECK-GI-NEXT: mov v1.b[2], w5
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI30_0]
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b
; CHECK-GI-NEXT: mov b1, v0.b[1]
; CHECK-GI-NEXT: mov b2, v0.b[2]
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: fmov w1, s1
; CHECK-GI-NEXT: fmov w2, s2
; CHECK-GI-NEXT: ret
%c = shufflevector <3 x i8> %a, <3 x i8> %b, <3 x i32> <i32 1, i32 2, i32 4>
ret <3 x i8> %c
}
define <7 x i8> @shufflevector_v7i8(<7 x i8> %a, <7 x i8> %b) {
; CHECK-SD-LABEL: shufflevector_v7i8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: adrp x8, .LCPI31_0
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT: ldr d1, [x8, :lo12:.LCPI31_0]
; CHECK-SD-NEXT: tbl v0.8b, { v0.16b }, v1.8b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v7i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: adrp x8, .LCPI31_0
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI31_0]
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
%c = shufflevector <7 x i8> %a, <7 x i8> %b, <7 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12>
ret <7 x i8> %c
}
define <3 x i16> @shufflevector_v3i16(<3 x i16> %a, <3 x i16> %b) {
; CHECK-SD-LABEL: shufflevector_v3i16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: zip1 v1.4h, v0.4h, v1.4h
; CHECK-SD-NEXT: zip2 v0.4h, v1.4h, v0.4h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v3i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: adrp x8, .LCPI32_0
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI32_0]
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
%c = shufflevector <3 x i16> %a, <3 x i16> %b, <3 x i32> <i32 1, i32 2, i32 4>
ret <3 x i16> %c
}
define <7 x i16> @shufflevector_v7i16(<7 x i16> %a, <7 x i16> %b) {
; CHECK-SD-LABEL: shufflevector_v7i16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: adrp x8, .LCPI33_0
; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI33_0]
; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v7i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI33_0
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI33_0]
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-GI-NEXT: ret
%c = shufflevector <7 x i16> %a, <7 x i16> %b, <7 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12>
ret <7 x i16> %c
}
define <3 x i32> @shufflevector_v3i32(<3 x i32> %a, <3 x i32> %b) {
; CHECK-SD-LABEL: shufflevector_v3i32:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: zip1 v1.4s, v0.4s, v1.4s
; CHECK-SD-NEXT: zip2 v0.4s, v1.4s, v0.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v3i32:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI34_0
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI34_0]
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-GI-NEXT: ret
%c = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> <i32 1, i32 2, i32 4>
ret <3 x i32> %c
}
; ===== Vectors with Non-Pow 2 Widths with Zero Masks =====
define <3 x i8> @shufflevector_v3i8_zeroes(<3 x i8> %a, <3 x i8> %b) {
; CHECK-SD-LABEL: shufflevector_v3i8_zeroes:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: mov w1, w0
; CHECK-SD-NEXT: mov w2, w0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v3i8_zeroes:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov s0, w0
; CHECK-GI-NEXT: mov v0.b[1], w1
; CHECK-GI-NEXT: mov v0.b[2], w2
; CHECK-GI-NEXT: dup v0.8b, v0.b[0]
; CHECK-GI-NEXT: mov b1, v0.b[1]
; CHECK-GI-NEXT: mov b2, v0.b[2]
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: fmov w1, s1
; CHECK-GI-NEXT: fmov w2, s2
; CHECK-GI-NEXT: ret
%c = shufflevector <3 x i8> %a, <3 x i8> %b, <3 x i32> <i32 0, i32 0, i32 0>
ret <3 x i8> %c
}
define <7 x i8> @shufflevector_v7i8_zeroes(<7 x i8> %a, <7 x i8> %b) {
; CHECK-LABEL: shufflevector_v7i8_zeroes:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: dup v0.8b, v0.b[0]
; CHECK-NEXT: ret
%c = shufflevector <7 x i8> %a, <7 x i8> %b, <7 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <7 x i8> %c
}
define <3 x i16> @shufflevector_v3i16_zeroes(<3 x i16> %a, <3 x i16> %b) {
; CHECK-LABEL: shufflevector_v3i16_zeroes:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: dup v0.4h, v0.h[0]
; CHECK-NEXT: ret
%c = shufflevector <3 x i16> %a, <3 x i16> %b, <3 x i32> <i32 0, i32 0, i32 0>
ret <3 x i16> %c
}
define <7 x i16> @shufflevector_v7i16_zeroes(<7 x i16> %a, <7 x i16> %b) {
; CHECK-LABEL: shufflevector_v7i16_zeroes:
; CHECK: // %bb.0:
; CHECK-NEXT: dup v0.8h, v0.h[0]
; CHECK-NEXT: ret
%c = shufflevector <7 x i16> %a, <7 x i16> %b, <7 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <7 x i16> %c
}
define <3 x i32> @shufflevector_v3i32_zeroes(<3 x i32> %a, <3 x i32> %b) {
; CHECK-LABEL: shufflevector_v3i32_zeroes:
; CHECK: // %bb.0:
; CHECK-NEXT: dup v0.4s, v0.s[0]
; CHECK-NEXT: ret
%c = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> <i32 0, i32 0, i32 0>
ret <3 x i32> %c
}