; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind {
; CHECK-LABEL: tbl1_8b:
; CHECK: // %bb.0:
; CHECK-NEXT: tbl.8b v0, { v0 }, v1
; CHECK-NEXT: ret
%tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %A, <8 x i8> %B)
ret <8 x i8> %tmp3
}
define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind {
; CHECK-LABEL: tbl1_16b:
; CHECK: // %bb.0:
; CHECK-NEXT: tbl.16b v0, { v0 }, v1
; CHECK-NEXT: ret
%tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %A, <16 x i8> %B)
ret <16 x i8> %tmp3
}
define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) {
; CHECK-SD-LABEL: tbl2_8b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: tbl.8b v0, { v0, v1 }, v2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: tbl2_8b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: tbl.8b v0, { v0, v1 }, v2
; CHECK-GI-NEXT: ret
%tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C)
ret <8 x i8> %tmp3
}
define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) {
; CHECK-SD-LABEL: tbl2_16b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: tbl.16b v0, { v0, v1 }, v2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: tbl2_16b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
; CHECK-GI-NEXT: ret
%tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
ret <16 x i8> %tmp3
}
define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
; CHECK-SD-LABEL: tbl3_8b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: tbl.8b v0, { v0, v1, v2 }, v3
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: tbl3_8b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: tbl.8b v0, { v0, v1, v2 }, v3
; CHECK-GI-NEXT: ret
%tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
ret <8 x i8> %tmp3
}
define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
; CHECK-SD-LABEL: tbl3_16b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2 }, v3
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: tbl3_16b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1, v2 }, v3
; CHECK-GI-NEXT: ret
%tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
ret <16 x i8> %tmp3
}
define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
; CHECK-SD-LABEL: tbl4_8b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: tbl4_8b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4
; CHECK-GI-NEXT: ret
%tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
ret <8 x i8> %tmp3
}
define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
; CHECK-SD-LABEL: tbl4_16b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: tbl4_16b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
; CHECK-GI-NEXT: ret
%tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
ret <16 x i8> %tmp3
}
; CHECK-SD-LABEL: .LCPI8_0:
; CHECK-SD: .byte 0 // 0x0
; CHECK-SD-NEXT: .byte 4 // 0x4
; CHECK-SD-NEXT: .byte 8 // 0x8
; CHECK-SD-NEXT: .byte 12 // 0xc
; CHECK-SD-NEXT: .byte 255 // 0xff
; CHECK-SD-NEXT: .byte 255 // 0xff
; CHECK-SD-NEXT: .byte 255 // 0xff
; CHECK-SD-NEXT: .byte 255 // 0xff
; CHECK-GI-LABEL: .LCPI8_0:
; CHECK-GI: .byte 0 // 0x0
; CHECK-GI-NEXT: .byte 1 // 0x1
; CHECK-GI-NEXT: .byte 2 // 0x2
; CHECK-GI-NEXT: .byte 3 // 0x3
; CHECK-GI-NEXT: .byte 12 // 0xc
; CHECK-GI-NEXT: .byte 13 // 0xd
; CHECK-GI-NEXT: .byte 14 // 0xe
; CHECK-GI-NEXT: .byte 15 // 0xf
; CHECK-GI-LABEL: .LCPI8_1:
; CHECK-GI: .byte 0 // 0x0
; CHECK-GI-NEXT: .byte 4 // 0x4
; CHECK-GI-NEXT: .byte 8 // 0x8
; CHECK-GI-NEXT: .byte 12 // 0xc
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_v8i8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: adrp x8, .LCPI8_0
; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-SD-NEXT: ldr d4, [x8, :lo12:.LCPI8_0]
; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-SD-NEXT: tbl.8b v0, { v0, v1 }, v4
; CHECK-SD-NEXT: tbl.8b v1, { v2, v3 }, v4
; CHECK-SD-NEXT: mov.s v0[1], v1[1]
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_v8i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI8_1
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: ldr d4, [x8, :lo12:.LCPI8_1]
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: adrp x8, .LCPI8_0
; CHECK-GI-NEXT: tbl.8b v0, { v0, v1 }, v4
; CHECK-GI-NEXT: tbl.8b v1, { v2, v3 }, v4
; CHECK-GI-NEXT: mov.d v0[1], v1[0]
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI8_0]
; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
%t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
%t2 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %c, <16 x i8> %d, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
%s = shufflevector <8 x i8> %t1, <8 x i8> %t2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
ret <8 x i8> %s
}
; CHECK-SD-LABEL: .LCPI9_0:
; CHECK-SD-NEXT: .byte 0 // 0x0
; CHECK-SD-NEXT: .byte 4 // 0x4
; CHECK-SD-NEXT: .byte 8 // 0x8
; CHECK-SD-NEXT: .byte 12 // 0xc
; CHECK-SD-NEXT: .byte 16 // 0x10
; CHECK-SD-NEXT: .byte 20 // 0x14
; CHECK-SD-NEXT: .byte 24 // 0x18
; CHECK-SD-NEXT: .byte 28 // 0x1c
; CHECK-SD-NEXT: .byte 32 // 0x20
; CHECK-SD-NEXT: .byte 36 // 0x24
; CHECK-SD-NEXT: .byte 40 // 0x28
; CHECK-SD-NEXT: .byte 44 // 0x2c
; CHECK-SD-NEXT: .byte 48 // 0x30
; CHECK-SD-NEXT: .byte 52 // 0x34
; CHECK-SD-NEXT: .byte 56 // 0x38
; CHECK-SD-NEXT: .byte 60 // 0x3c
;CHECK-GI-LABEL: .LCPI9_0:
;CHECK-GI: .byte 0 // 0x0
;CHECK-GI-NEXT: .byte 1 // 0x1
;CHECK-GI-NEXT: .byte 2 // 0x2
;CHECK-GI-NEXT: .byte 3 // 0x3
;CHECK-GI-NEXT: .byte 4 // 0x4
;CHECK-GI-NEXT: .byte 5 // 0x5
;CHECK-GI-NEXT: .byte 6 // 0x6
;CHECK-GI-NEXT: .byte 7 // 0x7
;CHECK-GI-NEXT: .byte 16 // 0x10
;CHECK-GI-NEXT: .byte 17 // 0x11
;CHECK-GI-NEXT: .byte 18 // 0x12
;CHECK-GI-NEXT: .byte 19 // 0x13
;CHECK-GI-NEXT: .byte 20 // 0x14
;CHECK-GI-NEXT: .byte 21 // 0x15
;CHECK-GI-NEXT: .byte 22 // 0x16
;CHECK-GI-NEXT: .byte 23 // 0x17
;CHECK-GI-LABEL: .LCPI9_1:
;CHECK-GI: .byte 0 // 0x0
;CHECK-GI-NEXT: .byte 4 // 0x4
;CHECK-GI-NEXT: .byte 8 // 0x8
;CHECK-GI-NEXT: .byte 12 // 0xc
;CHECK-GI-NEXT: .byte 16 // 0x10
;CHECK-GI-NEXT: .byte 20 // 0x14
;CHECK-GI-NEXT: .byte 24 // 0x18
;CHECK-GI-NEXT: .byte 28 // 0x1c
;CHECK-GI-NEXT: .byte 255 // 0xff
;CHECK-GI-NEXT: .byte 255 // 0xff
;CHECK-GI-NEXT: .byte 255 // 0xff
;CHECK-GI-NEXT: .byte 255 // 0xff
;CHECK-GI-NEXT: .byte 255 // 0xff
;CHECK-GI-NEXT: .byte 255 // 0xff
;CHECK-GI-NEXT: .byte 255 // 0xff
;CHECK-GI-NEXT: .byte 255 // 0xff
define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: adrp x8, .LCPI9_0
; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI9_0]
; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI9_1
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI9_1]
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: adrp x8, .LCPI9_0
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4
; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v4
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI9_0]
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
; CHECK-GI-NEXT: ret
%t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
ret <16 x i8> %s
}
; CHECK-GI-LABEL: .LCPI10_0:
; CHECK-GI: .byte 0 // 0x0
; CHECK-GI-NEXT: .byte 1 // 0x1
; CHECK-GI-NEXT: .byte 2 // 0x2
; CHECK-GI-NEXT: .byte 3 // 0x3
; CHECK-GI-NEXT: .byte 4 // 0x4
; CHECK-GI-NEXT: .byte 5 // 0x5
; CHECK-GI-NEXT: .byte 6 // 0x6
; CHECK-GI-NEXT: .byte 7 // 0x7
; CHECK-GI-NEXT: .byte 16 // 0x10
; CHECK-GI-NEXT: .byte 17 // 0x11
; CHECK-GI-NEXT: .byte 18 // 0x12
; CHECK-GI-NEXT: .byte 19 // 0x13
; CHECK-GI-NEXT: .byte 20 // 0x14
; CHECK-GI-NEXT: .byte 21 // 0x15
; CHECK-GI-NEXT: .byte 22 // 0x16
; CHECK-GI-NEXT: .byte 23 // 0x17
; CHECK-GI-LABEL: .LCPI10_1:
; CHECK-GI: .byte 0 // 0x0
; CHECK-GI-NEXT: .byte 4 // 0x4
; CHECK-GI-NEXT: .byte 8 // 0x8
; CHECK-GI-NEXT: .byte 12 // 0xc
; CHECK-GI-NEXT: .byte 16 // 0x10
; CHECK-GI-NEXT: .byte 20 // 0x14
; CHECK-GI-NEXT: .byte 24 // 0x18
; CHECK-GI-NEXT: .byte 28 // 0x1c
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: fmov s4, w0
; CHECK-SD-NEXT: mov w8, #32 // =0x20
; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: mov.b v4[1], w0
; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: mov.b v4[2], w0
; CHECK-SD-NEXT: mov.b v4[3], w0
; CHECK-SD-NEXT: mov.b v4[4], w0
; CHECK-SD-NEXT: mov.b v4[5], w0
; CHECK-SD-NEXT: mov.b v4[6], w0
; CHECK-SD-NEXT: mov.b v4[7], w0
; CHECK-SD-NEXT: mov.b v4[8], w8
; CHECK-SD-NEXT: mov w8, #36 // =0x24
; CHECK-SD-NEXT: mov.b v4[9], w8
; CHECK-SD-NEXT: mov w8, #40 // =0x28
; CHECK-SD-NEXT: mov.b v4[10], w8
; CHECK-SD-NEXT: mov w8, #44 // =0x2c
; CHECK-SD-NEXT: mov.b v4[11], w8
; CHECK-SD-NEXT: mov w8, #48 // =0x30
; CHECK-SD-NEXT: mov.b v4[12], w8
; CHECK-SD-NEXT: mov w8, #52 // =0x34
; CHECK-SD-NEXT: mov.b v4[13], w8
; CHECK-SD-NEXT: mov w8, #56 // =0x38
; CHECK-SD-NEXT: mov.b v4[14], w8
; CHECK-SD-NEXT: mov w8, #60 // =0x3c
; CHECK-SD-NEXT: mov.b v4[15], w8
; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov s4, w0
; CHECK-GI-NEXT: mov w8, #255 // =0xff
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: mov.16b v5, v4
; CHECK-GI-NEXT: mov.b v5[1], v4[0]
; CHECK-GI-NEXT: mov.b v5[2], v4[0]
; CHECK-GI-NEXT: mov.b v5[3], v4[0]
; CHECK-GI-NEXT: mov.b v5[4], v4[0]
; CHECK-GI-NEXT: mov.b v5[5], v4[0]
; CHECK-GI-NEXT: mov.b v5[6], v4[0]
; CHECK-GI-NEXT: mov.b v5[7], v4[0]
; CHECK-GI-NEXT: fmov s4, w8
; CHECK-GI-NEXT: adrp x8, .LCPI10_1
; CHECK-GI-NEXT: mov.b v5[8], v4[0]
; CHECK-GI-NEXT: mov.b v5[9], v4[0]
; CHECK-GI-NEXT: mov.b v5[10], v4[0]
; CHECK-GI-NEXT: mov.b v5[11], v4[0]
; CHECK-GI-NEXT: mov.b v5[12], v4[0]
; CHECK-GI-NEXT: mov.b v5[13], v4[0]
; CHECK-GI-NEXT: mov.b v5[14], v4[0]
; CHECK-GI-NEXT: mov.b v5[15], v4[0]
; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI10_1]
; CHECK-GI-NEXT: adrp x8, .LCPI10_0
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v5
; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v4
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI10_0]
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
; CHECK-GI-NEXT: ret
%ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0
%ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1
%ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2
%ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3
%ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4
%ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5
%ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6
%ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7
%ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
%ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
%ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
%ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
%ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12
%ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13
%ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14
%ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15
%t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
%t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
ret <16 x i8> %s
}
; CHECK-GI-LABEL: .LCPI11_0:
; CHECK-GI: .byte 0 // 0x0
; CHECK-GI-NEXT: .byte 1 // 0x1
; CHECK-GI-NEXT: .byte 2 // 0x2
; CHECK-GI-NEXT: .byte 3 // 0x3
; CHECK-GI-NEXT: .byte 4 // 0x4
; CHECK-GI-NEXT: .byte 5 // 0x5
; CHECK-GI-NEXT: .byte 6 // 0x6
; CHECK-GI-NEXT: .byte 15 // 0xf
; CHECK-GI-NEXT: .byte 16 // 0x10
; CHECK-GI-NEXT: .byte 17 // 0x11
; CHECK-GI-NEXT: .byte 18 // 0x12
; CHECK-GI-NEXT: .byte 19 // 0x13
; CHECK-GI-NEXT: .byte 20 // 0x14
; CHECK-GI-NEXT: .byte 21 // 0x15
; CHECK-GI-NEXT: .byte 22 // 0x16
; CHECK-GI-NEXT: .byte 31 // 0x1f
; CHECK-GI-LABEL: .LCPI11_1:
; CHECK-GI: .byte 0 // 0x0
; CHECK-GI-NEXT: .byte 4 // 0x4
; CHECK-GI-NEXT: .byte 8 // 0x8
; CHECK-GI-NEXT: .byte 12 // 0xc
; CHECK-GI-NEXT: .byte 16 // 0x10
; CHECK-GI-NEXT: .byte 20 // 0x14
; CHECK-GI-NEXT: .byte 24 // 0x18
; CHECK-GI-NEXT: .byte 28 // 0x1c
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask2:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: mov w8, #1 // =0x1
; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: fmov s4, w8
; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: mov.b v4[1], w8
; CHECK-SD-NEXT: mov.b v4[2], w8
; CHECK-SD-NEXT: mov.b v4[3], w8
; CHECK-SD-NEXT: mov.b v4[4], w8
; CHECK-SD-NEXT: mov.b v4[5], w8
; CHECK-SD-NEXT: mov.b v4[6], w8
; CHECK-SD-NEXT: mov w8, #32 // =0x20
; CHECK-SD-NEXT: mov.b v4[7], w0
; CHECK-SD-NEXT: mov.b v4[8], w8
; CHECK-SD-NEXT: mov w8, #36 // =0x24
; CHECK-SD-NEXT: mov.b v4[9], w8
; CHECK-SD-NEXT: mov w8, #40 // =0x28
; CHECK-SD-NEXT: mov.b v4[10], w8
; CHECK-SD-NEXT: mov w8, #44 // =0x2c
; CHECK-SD-NEXT: mov.b v4[11], w8
; CHECK-SD-NEXT: mov w8, #48 // =0x30
; CHECK-SD-NEXT: mov.b v4[12], w8
; CHECK-SD-NEXT: mov w8, #52 // =0x34
; CHECK-SD-NEXT: mov.b v4[13], w8
; CHECK-SD-NEXT: mov w8, #56 // =0x38
; CHECK-SD-NEXT: mov.b v4[14], w8
; CHECK-SD-NEXT: mov w8, #31 // =0x1f
; CHECK-SD-NEXT: mov.b v4[15], w8
; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask2:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: fmov s6, w0
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: fmov s4, w8
; CHECK-GI-NEXT: mov w8, #255 // =0xff
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: mov.16b v5, v4
; CHECK-GI-NEXT: mov.b v5[1], v4[0]
; CHECK-GI-NEXT: mov.b v5[2], v4[0]
; CHECK-GI-NEXT: mov.b v5[3], v4[0]
; CHECK-GI-NEXT: mov.b v5[4], v4[0]
; CHECK-GI-NEXT: mov.b v5[5], v4[0]
; CHECK-GI-NEXT: mov.b v5[6], v4[0]
; CHECK-GI-NEXT: mov.b v5[7], v4[0]
; CHECK-GI-NEXT: fmov s4, w8
; CHECK-GI-NEXT: adrp x8, .LCPI11_1
; CHECK-GI-NEXT: mov.b v5[8], v4[0]
; CHECK-GI-NEXT: mov.b v5[9], v4[0]
; CHECK-GI-NEXT: mov.b v5[10], v4[0]
; CHECK-GI-NEXT: mov.b v5[11], v4[0]
; CHECK-GI-NEXT: mov.b v5[12], v6[0]
; CHECK-GI-NEXT: mov.b v5[13], v6[0]
; CHECK-GI-NEXT: mov.b v5[14], v4[0]
; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI11_1]
; CHECK-GI-NEXT: adrp x8, .LCPI11_0
; CHECK-GI-NEXT: mov.b v5[15], v6[0]
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v5
; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v4
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI11_0]
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
; CHECK-GI-NEXT: ret
%ins.0 = insertelement <16 x i8> poison, i8 1, i32 0
%ins.1 = insertelement <16 x i8> %ins.0, i8 1, i32 1
%ins.2 = insertelement <16 x i8> %ins.1, i8 1, i32 2
%ins.3 = insertelement <16 x i8> %ins.2, i8 1, i32 3
%ins.4 = insertelement <16 x i8> %ins.3, i8 1, i32 4
%ins.5 = insertelement <16 x i8> %ins.4, i8 1, i32 5
%ins.6 = insertelement <16 x i8> %ins.5, i8 1, i32 6
%ins.7 = insertelement <16 x i8> %ins.6, i8 1, i32 7
%ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
%ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
%ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
%ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
%ins.12 = insertelement <16 x i8> %ins.11, i8 %v, i32 12
%ins.13 = insertelement <16 x i8> %ins.12, i8 %v, i32 13
%ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14
%ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15
%t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
%t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 31>
ret <16 x i8> %s
}
; CHECK-SD-LABEL: .LCPI12_0:
; CHECK-SD: .byte 0 // 0x0
; CHECK-SD-NEXT: .byte 4 // 0x4
; CHECK-SD-NEXT: .byte 8 // 0x8
; CHECK-SD-NEXT: .byte 12 // 0xc
; CHECK-SD-NEXT: .byte 16 // 0x10
; CHECK-SD-NEXT: .byte 20 // 0x14
; CHECK-SD-NEXT: .byte 24 // 0x18
; CHECK-SD-NEXT: .byte 28 // 0x1c
; CHECK-SD-NEXT: .byte 255 // 0xff
; CHECK-SD-NEXT: .byte 255 // 0xff
; CHECK-SD-NEXT: .byte 255 // 0xff
; CHECK-SD-NEXT: .byte 255 // 0xff
; CHECK-SD-NEXT: .byte 255 // 0xff
; CHECK-SD-NEXT: .byte 255 // 0xff
; CHECK-SD-NEXT: .byte 255 // 0xff
; CHECK-SD-NEXT: .byte 255 // 0xff
; CHECK-GI-LABEL: .LCPI12_0:
; CHECK-GI: .byte 0 // 0x0
; CHECK-GI-NEXT: .byte 1 // 0x1
; CHECK-GI-NEXT: .byte 2 // 0x2
; CHECK-GI-NEXT: .byte 3 // 0x3
; CHECK-GI-NEXT: .byte 4 // 0x4
; CHECK-GI-NEXT: .byte 5 // 0x5
; CHECK-GI-NEXT: .byte 6 // 0x6
; CHECK-GI-NEXT: .byte 7 // 0x7
; CHECK-GI-NEXT: .byte 16 // 0x10
; CHECK-GI-NEXT: .byte 17 // 0x11
; CHECK-GI-NEXT: .byte 18 // 0x12
; CHECK-GI-NEXT: .byte 19 // 0x13
; CHECK-GI-NEXT: .byte 20 // 0x14
; CHECK-GI-NEXT: .byte 21 // 0x15
; CHECK-GI-NEXT: .byte 22 // 0x16
; CHECK-GI-NEXT: .byte 23 // 0x17
; CHECK-GI-LABEL: .LCPI12_1:
; CHECK-GI: .byte 0 // 0x0
; CHECK-GI-NEXT: .byte 4 // 0x4
; CHECK-GI-NEXT: .byte 8 // 0x8
; CHECK-GI-NEXT: .byte 12 // 0xc
; CHECK-GI-NEXT: .byte 16 // 0x10
; CHECK-GI-NEXT: .byte 20 // 0x14
; CHECK-GI-NEXT: .byte 24 // 0x18
; CHECK-GI-NEXT: .byte 28 // 0x1c
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: movi.2d v4, #0xffffffffffffffff
; CHECK-SD-NEXT: adrp x8, .LCPI12_0
; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: ldr q5, [x8, :lo12:.LCPI12_0]
; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: tbl.16b v2, { v2, v3 }, v5
; CHECK-SD-NEXT: mov.b v4[0], w0
; CHECK-SD-NEXT: mov.b v4[1], w0
; CHECK-SD-NEXT: mov.b v4[2], w0
; CHECK-SD-NEXT: mov.b v4[3], w0
; CHECK-SD-NEXT: mov.b v4[4], w0
; CHECK-SD-NEXT: mov.b v4[5], w0
; CHECK-SD-NEXT: mov.b v4[6], w0
; CHECK-SD-NEXT: mov.b v4[7], w0
; CHECK-SD-NEXT: tbl.16b v0, { v0, v1 }, v4
; CHECK-SD-NEXT: mov.d v2[1], v0[0]
; CHECK-SD-NEXT: mov.16b v0, v2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov s4, w0
; CHECK-GI-NEXT: mov w8, #255 // =0xff
; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: mov.16b v5, v4
; CHECK-GI-NEXT: mov.b v5[1], v4[0]
; CHECK-GI-NEXT: mov.b v5[2], v4[0]
; CHECK-GI-NEXT: mov.b v5[3], v4[0]
; CHECK-GI-NEXT: mov.b v5[4], v4[0]
; CHECK-GI-NEXT: mov.b v5[5], v4[0]
; CHECK-GI-NEXT: mov.b v5[6], v4[0]
; CHECK-GI-NEXT: mov.b v5[7], v4[0]
; CHECK-GI-NEXT: fmov s4, w8
; CHECK-GI-NEXT: adrp x8, .LCPI12_1
; CHECK-GI-NEXT: mov.b v5[8], v4[0]
; CHECK-GI-NEXT: mov.b v5[9], v4[0]
; CHECK-GI-NEXT: mov.b v5[10], v4[0]
; CHECK-GI-NEXT: mov.b v5[11], v4[0]
; CHECK-GI-NEXT: mov.b v5[12], v4[0]
; CHECK-GI-NEXT: mov.b v5[13], v4[0]
; CHECK-GI-NEXT: mov.b v5[14], v4[0]
; CHECK-GI-NEXT: mov.b v5[15], v4[0]
; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI12_1]
; CHECK-GI-NEXT: adrp x8, .LCPI12_0
; CHECK-GI-NEXT: tbl.16b v2, { v2, v3 }, v4
; CHECK-GI-NEXT: tbl.16b v3, { v0, v1 }, v5
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI12_0]
; CHECK-GI-NEXT: tbl.16b v0, { v2, v3 }, v0
; CHECK-GI-NEXT: ret
%ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0
%ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1
%ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2
%ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3
%ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4
%ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5
%ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6
%ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7
%ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
%ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
%ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
%ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
%ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12
%ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13
%ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14
%ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15
%t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
%s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
ret <16 x i8> %s
}
; CHECK-SD-LABEL: .LCPI13_0:
; CHECK-SD: .byte 0 // 0x0
; CHECK-SD-NEXT: .byte 4 // 0x4
; CHECK-SD-NEXT: .byte 8 // 0x8
; CHECK-SD-NEXT: .byte 12 // 0xc
; CHECK-SD-NEXT: .byte 16 // 0x10
; CHECK-SD-NEXT: .byte 20 // 0x14
; CHECK-SD-NEXT: .byte 24 // 0x18
; CHECK-SD-NEXT: .byte 28 // 0x1c
; CHECK-SD-NEXT: .byte 255 // 0xff
; CHECK-SD-NEXT: .byte 255 // 0xff
; CHECK-SD-NEXT: .byte 255 // 0xff
; CHECK-SD-NEXT: .byte 255 // 0xff
; CHECK-SD-NEXT: .byte 255 // 0xff
; CHECK-SD-NEXT: .byte 255 // 0xff
; CHECK-SD-NEXT: .byte 255 // 0xff
; CHECK-SD-NEXT: .byte 255 // 0xff
; CHECK-SD-LABEL: .LCPI13_1:
; CHECK-SD: .byte 0 // 0x0
; CHECK-SD-NEXT: .byte 1 // 0x1
; CHECK-SD-NEXT: .byte 2 // 0x2
; CHECK-SD-NEXT: .byte 3 // 0x3
; CHECK-SD-NEXT: .byte 4 // 0x4
; CHECK-SD-NEXT: .byte 5 // 0x5
; CHECK-SD-NEXT: .byte 6 // 0x6
; CHECK-SD-NEXT: .byte 7 // 0x7
; CHECK-SD-NEXT: .byte 16 // 0x10
; CHECK-SD-NEXT: .byte 17 // 0x11
; CHECK-SD-NEXT: .byte 18 // 0x12
; CHECK-SD-NEXT: .byte 19 // 0x13
; CHECK-SD-NEXT: .byte 20 // 0x14
; CHECK-SD-NEXT: .byte 21 // 0x15
; CHECK-SD-NEXT: .byte 30 // 0x1e
; CHECK-SD-NEXT: .byte 31 // 0x1f
; CHECK-GI-LABEL: .LCPI13_0:
; CHECK-GI: .byte 0 // 0x0
; CHECK-GI-NEXT: .byte 1 // 0x1
; CHECK-GI-NEXT: .byte 2 // 0x2
; CHECK-GI-NEXT: .byte 3 // 0x3
; CHECK-GI-NEXT: .byte 4 // 0x4
; CHECK-GI-NEXT: .byte 5 // 0x5
; CHECK-GI-NEXT: .byte 6 // 0x6
; CHECK-GI-NEXT: .byte 7 // 0x7
; CHECK-GI-NEXT: .byte 16 // 0x10
; CHECK-GI-NEXT: .byte 17 // 0x11
; CHECK-GI-NEXT: .byte 18 // 0x12
; CHECK-GI-NEXT: .byte 19 // 0x13
; CHECK-GI-NEXT: .byte 20 // 0x14
; CHECK-GI-NEXT: .byte 21 // 0x15
; CHECK-GI-NEXT: .byte 30 // 0x1e
; CHECK-GI-NEXT: .byte 31 // 0x1f
; CHECK-GI-LABEL: .LCPI13_1:
; CHECK-GI: .byte 0 // 0x0
; CHECK-GI-NEXT: .byte 4 // 0x4
; CHECK-GI-NEXT: .byte 8 // 0x8
; CHECK-GI-NEXT: .byte 12 // 0xc
; CHECK-GI-NEXT: .byte 16 // 0x10
; CHECK-GI-NEXT: .byte 20 // 0x14
; CHECK-GI-NEXT: .byte 24 // 0x18
; CHECK-GI-NEXT: .byte 28 // 0x1c
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask2:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: dup.16b v4, w0
; CHECK-SD-NEXT: mov w8, #255 // =0xff
; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: mov.b v4[8], w8
; CHECK-SD-NEXT: mov.b v4[9], w8
; CHECK-SD-NEXT: mov.b v4[10], w8
; CHECK-SD-NEXT: mov.b v4[11], w8
; CHECK-SD-NEXT: mov.b v4[12], w8
; CHECK-SD-NEXT: mov.b v4[13], w8
; CHECK-SD-NEXT: adrp x8, .LCPI13_0
; CHECK-SD-NEXT: ldr q5, [x8, :lo12:.LCPI13_0]
; CHECK-SD-NEXT: adrp x8, .LCPI13_1
; CHECK-SD-NEXT: tbl.16b v2, { v2, v3 }, v5
; CHECK-SD-NEXT: tbl.16b v3, { v0, v1 }, v4
; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI13_1]
; CHECK-SD-NEXT: tbl.16b v0, { v2, v3 }, v0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask2:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov s4, w0
; CHECK-GI-NEXT: mov w8, #255 // =0xff
; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: fmov s6, w8
; CHECK-GI-NEXT: adrp x8, .LCPI13_1
; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: mov.16b v5, v4
; CHECK-GI-NEXT: mov.b v5[1], v4[0]
; CHECK-GI-NEXT: mov.b v5[2], v4[0]
; CHECK-GI-NEXT: mov.b v5[3], v4[0]
; CHECK-GI-NEXT: mov.b v5[4], v4[0]
; CHECK-GI-NEXT: mov.b v5[5], v4[0]
; CHECK-GI-NEXT: mov.b v5[6], v4[0]
; CHECK-GI-NEXT: mov.b v5[7], v4[0]
; CHECK-GI-NEXT: mov.b v5[8], v6[0]
; CHECK-GI-NEXT: mov.b v5[9], v6[0]
; CHECK-GI-NEXT: mov.b v5[10], v6[0]
; CHECK-GI-NEXT: mov.b v5[11], v6[0]
; CHECK-GI-NEXT: mov.b v5[12], v6[0]
; CHECK-GI-NEXT: mov.b v5[13], v6[0]
; CHECK-GI-NEXT: mov.b v5[14], v4[0]
; CHECK-GI-NEXT: mov.b v5[15], v4[0]
; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI13_1]
; CHECK-GI-NEXT: adrp x8, .LCPI13_0
; CHECK-GI-NEXT: tbl.16b v2, { v2, v3 }, v4
; CHECK-GI-NEXT: tbl.16b v3, { v0, v1 }, v5
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI13_0]
; CHECK-GI-NEXT: tbl.16b v0, { v2, v3 }, v0
; CHECK-GI-NEXT: ret
%ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0
%ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1
%ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2
%ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3
%ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4
%ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5
%ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6
%ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7
%ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
%ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
%ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
%ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
%ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12
%ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13
%ins.14 = insertelement <16 x i8> %ins.13, i8 %v, i32 14
%ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15
%t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
%s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 30, i32 31>
ret <16 x i8> %s
}
; CHECK-SD-LABEL: .LCPI14_0:
; CHECK-SD: .byte 0 // 0x0
; CHECK-SD-NEXT: .byte 4 // 0x4
; CHECK-SD-NEXT: .byte 52 // 0x34
; CHECK-SD-NEXT: .byte 12 // 0xc
; CHECK-SD-NEXT: .byte 16 // 0x10
; CHECK-SD-NEXT: .byte 20 // 0x14
; CHECK-SD-NEXT: .byte 24 // 0x18
; CHECK-SD-NEXT: .byte 28 // 0x1c
; CHECK-SD-NEXT: .byte 32 // 0x20
; CHECK-SD-NEXT: .byte 36 // 0x24
; CHECK-SD-NEXT: .byte 40 // 0x28
; CHECK-SD-NEXT: .byte 44 // 0x2c
; CHECK-SD-NEXT: .byte 48 // 0x30
; CHECK-SD-NEXT: .byte 52 // 0x34
; CHECK-SD-NEXT: .byte 56 // 0x38
; CHECK-SD-NEXT: .byte 60 // 0x3c
; CHECK-GI-LABEL: .LCPI14_0:
; CHECK-GI: .byte 0 // 0x0
; CHECK-GI-NEXT: .byte 1 // 0x1
; CHECK-GI-NEXT: .byte 21 // 0x15
; CHECK-GI-NEXT: .byte 3 // 0x3
; CHECK-GI-NEXT: .byte 4 // 0x4
; CHECK-GI-NEXT: .byte 5 // 0x5
; CHECK-GI-NEXT: .byte 6 // 0x6
; CHECK-GI-NEXT: .byte 7 // 0x7
; CHECK-GI-NEXT: .byte 16 // 0x10
; CHECK-GI-NEXT: .byte 17 // 0x11
; CHECK-GI-NEXT: .byte 18 // 0x12
; CHECK-GI-NEXT: .byte 19 // 0x13
; CHECK-GI-NEXT: .byte 20 // 0x14
; CHECK-GI-NEXT: .byte 21 // 0x15
; CHECK-GI-NEXT: .byte 22 // 0x16
; CHECK-GI-NEXT: .byte 23 // 0x17
; CHECK-GI-LABEL: .LCPI14_1:
; CHECK-GI: .byte 0 // 0x0
; CHECK-GI-NEXT: .byte 4 // 0x4
; CHECK-GI-NEXT: .byte 8 // 0x8
; CHECK-GI-NEXT: .byte 12 // 0xc
; CHECK-GI-NEXT: .byte 16 // 0x10
; CHECK-GI-NEXT: .byte 20 // 0x14
; CHECK-GI-NEXT: .byte 24 // 0x18
; CHECK-GI-NEXT: .byte 28 // 0x1c
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_shuffle:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: adrp x8, .LCPI14_0
; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI14_0]
; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_shuffle:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI14_1
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI14_1]
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: adrp x8, .LCPI14_0
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4
; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v4
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI14_0]
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
; CHECK-GI-NEXT: ret
%t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
ret <16 x i8> %s
}
; CHECK-SD-LABEL: .LCPI15_0:
; CHECK-SD: .byte 0 // 0x0
; CHECK-SD-NEXT: .byte 4 // 0x4
; CHECK-SD-NEXT: .byte 52 // 0x34
; CHECK-SD-NEXT: .byte 12 // 0xc
; CHECK-SD-NEXT: .byte 16 // 0x10
; CHECK-SD-NEXT: .byte 20 // 0x14
; CHECK-SD-NEXT: .byte 24 // 0x18
; CHECK-SD-NEXT: .byte 28 // 0x1c
; CHECK-SD-NEXT: .byte 32 // 0x20
; CHECK-SD-NEXT: .byte 36 // 0x24
; CHECK-SD-NEXT: .byte 40 // 0x28
; CHECK-SD-NEXT: .byte 44 // 0x2c
; CHECK-SD-NEXT: .byte 48 // 0x30
; CHECK-SD-NEXT: .byte 52 // 0x34
; CHECK-SD-NEXT: .byte 56 // 0x38
; CHECK-SD-NEXT: .byte 60 // 0x3c
; CHECK-GI-LABEL: .LCPI15_0:
; CHECK-GI: .byte 0 // 0x0
; CHECK-GI-NEXT: .byte 1 // 0x1
; CHECK-GI-NEXT: .byte 21 // 0x15
; CHECK-GI-NEXT: .byte 3 // 0x3
; CHECK-GI-NEXT: .byte 4 // 0x4
; CHECK-GI-NEXT: .byte 5 // 0x5
; CHECK-GI-NEXT: .byte 6 // 0x6
; CHECK-GI-NEXT: .byte 7 // 0x7
; CHECK-GI-NEXT: .byte 16 // 0x10
; CHECK-GI-NEXT: .byte 17 // 0x11
; CHECK-GI-NEXT: .byte 18 // 0x12
; CHECK-GI-NEXT: .byte 19 // 0x13
; CHECK-GI-NEXT: .byte 20 // 0x14
; CHECK-GI-NEXT: .byte 21 // 0x15
; CHECK-GI-NEXT: .byte 22 // 0x16
; CHECK-GI-NEXT: .byte 23 // 0x17
; CHECK-GI-LABEL: .LCPI15_1:
; CHECK-GI: .byte 0 // 0x0
; CHECK-GI-NEXT: .byte 4 // 0x4
; CHECK-GI-NEXT: .byte 8 // 0x8
; CHECK-GI-NEXT: .byte 12 // 0xc
; CHECK-GI-NEXT: .byte 16 // 0x10
; CHECK-GI-NEXT: .byte 20 // 0x14
; CHECK-GI-NEXT: .byte 24 // 0x18
; CHECK-GI-NEXT: .byte 28 // 0x1c
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-LABEL: .LCPI15_2:
; CHECK-GI: .byte 0 // 0x0
; CHECK-GI-NEXT: .byte 4 // 0x4
; CHECK-GI-NEXT: .byte 8 // 0x8
; CHECK-GI-NEXT: .byte 12 // 0xc
; CHECK-GI-NEXT: .byte 16 // 0x10
; CHECK-GI-NEXT: .byte 20 // 0x14
; CHECK-GI-NEXT: .byte 24 // 0x18
; CHECK-GI-NEXT: .byte 28 // 0x1c
; CHECK-GI-NEXT: .byte 0 // 0x0
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask1:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: adrp x8, .LCPI15_0
; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI15_0]
; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask1:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI15_2
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI15_2]
; CHECK-GI-NEXT: adrp x8, .LCPI15_1
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI15_1]
; CHECK-GI-NEXT: adrp x8, .LCPI15_0
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4
; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v5
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI15_0]
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
; CHECK-GI-NEXT: ret
%t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
ret <16 x i8> %s
}
; CHECK-SD-LABEL: .LCPI16_0:
; CHECK-SD: .byte 0 // 0x0
; CHECK-SD-NEXT: .byte 4 // 0x4
; CHECK-SD-NEXT: .byte 52 // 0x34
; CHECK-SD-NEXT: .byte 12 // 0xc
; CHECK-SD-NEXT: .byte 16 // 0x10
; CHECK-SD-NEXT: .byte 20 // 0x14
; CHECK-SD-NEXT: .byte 24 // 0x18
; CHECK-SD-NEXT: .byte 28 // 0x1c
; CHECK-SD-NEXT: .byte 32 // 0x20
; CHECK-SD-NEXT: .byte 36 // 0x24
; CHECK-SD-NEXT: .byte 40 // 0x28
; CHECK-SD-NEXT: .byte 44 // 0x2c
; CHECK-SD-NEXT: .byte 48 // 0x30
; CHECK-SD-NEXT: .byte 52 // 0x34
; CHECK-SD-NEXT: .byte 56 // 0x38
; CHECK-SD-NEXT: .byte 60 // 0x3c
; CHECK-GI-LABEL: .LCPI16_0:
; CHECK-GI: .byte 0 // 0x0
; CHECK-GI-NEXT: .byte 1 // 0x1
; CHECK-GI-NEXT: .byte 21 // 0x15
; CHECK-GI-NEXT: .byte 3 // 0x3
; CHECK-GI-NEXT: .byte 4 // 0x4
; CHECK-GI-NEXT: .byte 5 // 0x5
; CHECK-GI-NEXT: .byte 6 // 0x6
; CHECK-GI-NEXT: .byte 7 // 0x7
; CHECK-GI-NEXT: .byte 16 // 0x10
; CHECK-GI-NEXT: .byte 17 // 0x11
; CHECK-GI-NEXT: .byte 18 // 0x12
; CHECK-GI-NEXT: .byte 19 // 0x13
; CHECK-GI-NEXT: .byte 20 // 0x14
; CHECK-GI-NEXT: .byte 21 // 0x15
; CHECK-GI-NEXT: .byte 22 // 0x16
; CHECK-GI-NEXT: .byte 23 // 0x17
; CHECK-GI-LABEL: .LCPI16_1:
; CHECK-GI: .byte 0 // 0x0
; CHECK-GI-NEXT: .byte 4 // 0x4
; CHECK-GI-NEXT: .byte 8 // 0x8
; CHECK-GI-NEXT: .byte 12 // 0xc
; CHECK-GI-NEXT: .byte 16 // 0x10
; CHECK-GI-NEXT: .byte 20 // 0x14
; CHECK-GI-NEXT: .byte 24 // 0x18
; CHECK-GI-NEXT: .byte 28 // 0x1c
; CHECK-GI-NEXT: .byte 0 // 0x0
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-LABEL: .LCPI16_2:
; CHECK-GI: .byte 0 // 0x0
; CHECK-GI-NEXT: .byte 4 // 0x4
; CHECK-GI-NEXT: .byte 8 // 0x8
; CHECK-GI-NEXT: .byte 12 // 0xc
; CHECK-GI-NEXT: .byte 16 // 0x10
; CHECK-GI-NEXT: .byte 20 // 0x14
; CHECK-GI-NEXT: .byte 24 // 0x18
; CHECK-GI-NEXT: .byte 28 // 0x1c
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
; CHECK-GI-NEXT: .byte 255 // 0xff
define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask2:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: adrp x8, .LCPI16_0
; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_0]
; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask2:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI16_2
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI16_2]
; CHECK-GI-NEXT: adrp x8, .LCPI16_1
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI16_1]
; CHECK-GI-NEXT: adrp x8, .LCPI16_0
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4
; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v5
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI16_0]
; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
; CHECK-GI-NEXT: ret
%t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
ret <16 x i8> %s
}
declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) nounwind {
; CHECK-LABEL: tbx1_8b:
; CHECK: // %bb.0:
; CHECK-NEXT: tbx.8b v0, { v1 }, v2
; CHECK-NEXT: ret
%tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C)
ret <8 x i8> %tmp3
}
define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind {
; CHECK-LABEL: tbx1_16b:
; CHECK: // %bb.0:
; CHECK-NEXT: tbx.16b v0, { v1 }, v2
; CHECK-NEXT: ret
%tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
ret <16 x i8> %tmp3
}
define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
; CHECK-SD-LABEL: tbx2_8b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
; CHECK-SD-NEXT: tbx.8b v0, { v1, v2 }, v3
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: tbx2_8b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
; CHECK-GI-NEXT: tbx.8b v0, { v1, v2 }, v3
; CHECK-GI-NEXT: ret
%tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
ret <8 x i8> %tmp3
}
define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
; CHECK-SD-LABEL: tbx2_16b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
; CHECK-SD-NEXT: tbx.16b v0, { v1, v2 }, v3
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: tbx2_16b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
; CHECK-GI-NEXT: tbx.16b v0, { v1, v2 }, v3
; CHECK-GI-NEXT: ret
%tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
ret <16 x i8> %tmp3
}
define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
; CHECK-SD-LABEL: tbx3_8b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-SD-NEXT: tbx.8b v0, { v1, v2, v3 }, v4
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: tbx3_8b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-GI-NEXT: tbx.8b v0, { v1, v2, v3 }, v4
; CHECK-GI-NEXT: ret
%tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
ret <8 x i8> %tmp3
}
define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
; CHECK-SD-LABEL: tbx3_16b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-SD-NEXT: tbx.16b v0, { v1, v2, v3 }, v4
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: tbx3_16b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-GI-NEXT: tbx.16b v0, { v1, v2, v3 }, v4
; CHECK-GI-NEXT: ret
%tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
ret <16 x i8> %tmp3
}
define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) {
; CHECK-SD-LABEL: tbx4_8b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-SD-NEXT: tbx.8b v0, { v1, v2, v3, v4 }, v5
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: tbx4_8b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-GI-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-GI-NEXT: tbx.8b v0, { v1, v2, v3, v4 }, v5
; CHECK-GI-NEXT: ret
%tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F)
ret <8 x i8> %tmp3
}
define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) {
; CHECK-SD-LABEL: tbx4_16b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-SD-NEXT: tbx.16b v0, { v1, v2, v3, v4 }, v5
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: tbx4_16b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-GI-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-GI-NEXT: tbx.16b v0, { v1, v2, v3, v4 }, v5
; CHECK-GI-NEXT: ret
%tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F)
ret <16 x i8> %tmp3
}
declare <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone