llvm/llvm/test/CodeGen/AArch64/arm64-tbl.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI

define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind {
; CHECK-LABEL: tbl1_8b:
; CHECK:       // %bb.0:
; CHECK-NEXT:    tbl.8b v0, { v0 }, v1
; CHECK-NEXT:    ret
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %A, <8 x i8> %B)
  ret <8 x i8> %tmp3
}

define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind {
; CHECK-LABEL: tbl1_16b:
; CHECK:       // %bb.0:
; CHECK-NEXT:    tbl.16b v0, { v0 }, v1
; CHECK-NEXT:    ret
  %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %A, <16 x i8> %B)
  ret <16 x i8> %tmp3
}

define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) {
; CHECK-SD-LABEL: tbl2_8b:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    tbl.8b v0, { v0, v1 }, v2
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: tbl2_8b:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    tbl.8b v0, { v0, v1 }, v2
; CHECK-GI-NEXT:    ret
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C)
  ret <8 x i8> %tmp3
}

define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) {
; CHECK-SD-LABEL: tbl2_16b:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    tbl.16b v0, { v0, v1 }, v2
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: tbl2_16b:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v2
; CHECK-GI-NEXT:    ret
  %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
  ret <16 x i8> %tmp3
}

define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
; CHECK-SD-LABEL: tbl3_8b:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    tbl.8b v0, { v0, v1, v2 }, v3
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: tbl3_8b:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    tbl.8b v0, { v0, v1, v2 }, v3
; CHECK-GI-NEXT:    ret
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
  ret <8 x i8> %tmp3
}

define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
; CHECK-SD-LABEL: tbl3_16b:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    tbl.16b v0, { v0, v1, v2 }, v3
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: tbl3_16b:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1, v2 }, v3
; CHECK-GI-NEXT:    ret
  %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
  ret <16 x i8> %tmp3
}

define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
; CHECK-SD-LABEL: tbl4_8b:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    tbl.8b v0, { v0, v1, v2, v3 }, v4
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: tbl4_8b:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    tbl.8b v0, { v0, v1, v2, v3 }, v4
; CHECK-GI-NEXT:    ret
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
  ret <8 x i8> %tmp3
}

define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
; CHECK-SD-LABEL: tbl4_16b:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    tbl.16b v0, { v0, v1, v2, v3 }, v4
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: tbl4_16b:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1, v2, v3 }, v4
; CHECK-GI-NEXT:    ret
  %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
  ret <16 x i8> %tmp3
}

; CHECK-SD-LABEL: .LCPI8_0:
; CHECK-SD:              .byte   0                               // 0x0
; CHECK-SD-NEXT:         .byte   4                               // 0x4
; CHECK-SD-NEXT:         .byte   8                               // 0x8
; CHECK-SD-NEXT:         .byte   12                              // 0xc
; CHECK-SD-NEXT:         .byte   255                             // 0xff
; CHECK-SD-NEXT:         .byte   255                             // 0xff
; CHECK-SD-NEXT:         .byte   255                             // 0xff
; CHECK-SD-NEXT:         .byte   255                             // 0xff

; CHECK-GI-LABEL: .LCPI8_0:
; CHECK-GI:              .byte   0                               // 0x0
; CHECK-GI-NEXT:         .byte   1                               // 0x1
; CHECK-GI-NEXT:         .byte   2                               // 0x2
; CHECK-GI-NEXT:         .byte   3                               // 0x3
; CHECK-GI-NEXT:         .byte   12                              // 0xc
; CHECK-GI-NEXT:         .byte   13                              // 0xd
; CHECK-GI-NEXT:         .byte   14                              // 0xe
; CHECK-GI-NEXT:         .byte   15                              // 0xf
; CHECK-GI-LABEL: .LCPI8_1:
; CHECK-GI:              .byte   0                               // 0x0
; CHECK-GI-NEXT:         .byte   4                               // 0x4
; CHECK-GI-NEXT:         .byte   8                               // 0x8
; CHECK-GI-NEXT:         .byte   12                              // 0xc
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff

define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_v8i8:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    adrp x8, .LCPI8_0
; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-SD-NEXT:    ldr d4, [x8, :lo12:.LCPI8_0]
; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-SD-NEXT:    tbl.8b v0, { v0, v1 }, v4
; CHECK-SD-NEXT:    tbl.8b v1, { v2, v3 }, v4
; CHECK-SD-NEXT:    mov.s v0[1], v1[1]
; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_v8i8:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    adrp x8, .LCPI8_1
; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT:    ldr d4, [x8, :lo12:.LCPI8_1]
; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT:    adrp x8, .LCPI8_0
; CHECK-GI-NEXT:    tbl.8b v0, { v0, v1 }, v4
; CHECK-GI-NEXT:    tbl.8b v1, { v2, v3 }, v4
; CHECK-GI-NEXT:    mov.d v0[1], v1[0]
; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI8_0]
; CHECK-GI-NEXT:    tbl.16b v0, { v0 }, v1
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT:    ret
  %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
  %t2 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %c, <16 x i8> %d, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
  %s = shufflevector <8 x i8> %t1, <8 x i8> %t2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
  ret <8 x i8> %s
}

; CHECK-SD-LABEL: .LCPI9_0:
; CHECK-SD-NEXT:     .byte    0                               // 0x0
; CHECK-SD-NEXT:     .byte    4                               // 0x4
; CHECK-SD-NEXT:     .byte    8                               // 0x8
; CHECK-SD-NEXT:     .byte    12                              // 0xc
; CHECK-SD-NEXT:     .byte    16                              // 0x10
; CHECK-SD-NEXT:     .byte    20                              // 0x14
; CHECK-SD-NEXT:     .byte    24                              // 0x18
; CHECK-SD-NEXT:     .byte    28                              // 0x1c
; CHECK-SD-NEXT:     .byte   32                              // 0x20
; CHECK-SD-NEXT:     .byte   36                              // 0x24
; CHECK-SD-NEXT:     .byte   40                              // 0x28
; CHECK-SD-NEXT:     .byte   44                              // 0x2c
; CHECK-SD-NEXT:     .byte   48                              // 0x30
; CHECK-SD-NEXT:     .byte   52                              // 0x34
; CHECK-SD-NEXT:     .byte   56                              // 0x38
; CHECK-SD-NEXT:     .byte   60                              // 0x3c

;CHECK-GI-LABEL: .LCPI9_0:
;CHECK-GI:              .byte   0                               // 0x0
;CHECK-GI-NEXT:         .byte   1                               // 0x1
;CHECK-GI-NEXT:         .byte   2                               // 0x2
;CHECK-GI-NEXT:         .byte   3                               // 0x3
;CHECK-GI-NEXT:         .byte   4                               // 0x4
;CHECK-GI-NEXT:         .byte   5                               // 0x5
;CHECK-GI-NEXT:         .byte   6                               // 0x6
;CHECK-GI-NEXT:         .byte   7                               // 0x7
;CHECK-GI-NEXT:         .byte   16                              // 0x10
;CHECK-GI-NEXT:         .byte   17                              // 0x11
;CHECK-GI-NEXT:         .byte   18                              // 0x12
;CHECK-GI-NEXT:         .byte   19                              // 0x13
;CHECK-GI-NEXT:         .byte   20                              // 0x14
;CHECK-GI-NEXT:         .byte   21                              // 0x15
;CHECK-GI-NEXT:         .byte   22                              // 0x16
;CHECK-GI-NEXT:         .byte   23                              // 0x17
;CHECK-GI-LABEL: .LCPI9_1:
;CHECK-GI:              .byte   0                               // 0x0
;CHECK-GI-NEXT:         .byte   4                               // 0x4
;CHECK-GI-NEXT:         .byte   8                               // 0x8
;CHECK-GI-NEXT:         .byte   12                              // 0xc
;CHECK-GI-NEXT:         .byte   16                              // 0x10
;CHECK-GI-NEXT:         .byte   20                              // 0x14
;CHECK-GI-NEXT:         .byte   24                              // 0x18
;CHECK-GI-NEXT:         .byte   28                              // 0x1c
;CHECK-GI-NEXT:         .byte   255                             // 0xff
;CHECK-GI-NEXT:         .byte   255                             // 0xff
;CHECK-GI-NEXT:         .byte   255                             // 0xff
;CHECK-GI-NEXT:         .byte   255                             // 0xff
;CHECK-GI-NEXT:         .byte   255                             // 0xff
;CHECK-GI-NEXT:         .byte   255                             // 0xff
;CHECK-GI-NEXT:         .byte   255                             // 0xff
;CHECK-GI-NEXT:         .byte   255                             // 0xff

define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    adrp x8, .LCPI9_0
; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI9_0]
; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    tbl.16b v0, { v0, v1, v2, v3 }, v4
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    adrp x8, .LCPI9_1
; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT:    ldr q4, [x8, :lo12:.LCPI9_1]
; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT:    adrp x8, .LCPI9_0
; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v4
; CHECK-GI-NEXT:    tbl.16b v1, { v2, v3 }, v4
; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI9_0]
; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v2
; CHECK-GI-NEXT:    ret
  %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
  %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
  %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
  ret <16 x i8> %s
}

; CHECK-GI-LABEL: .LCPI10_0:
; CHECK-GI:              .byte   0                               // 0x0
; CHECK-GI-NEXT:         .byte   1                               // 0x1
; CHECK-GI-NEXT:         .byte   2                               // 0x2
; CHECK-GI-NEXT:         .byte   3                               // 0x3
; CHECK-GI-NEXT:         .byte   4                               // 0x4
; CHECK-GI-NEXT:         .byte   5                               // 0x5
; CHECK-GI-NEXT:         .byte   6                               // 0x6
; CHECK-GI-NEXT:         .byte   7                               // 0x7
; CHECK-GI-NEXT:         .byte   16                              // 0x10
; CHECK-GI-NEXT:         .byte   17                              // 0x11
; CHECK-GI-NEXT:         .byte   18                              // 0x12
; CHECK-GI-NEXT:         .byte   19                              // 0x13
; CHECK-GI-NEXT:         .byte   20                              // 0x14
; CHECK-GI-NEXT:         .byte   21                              // 0x15
; CHECK-GI-NEXT:         .byte   22                              // 0x16
; CHECK-GI-NEXT:         .byte   23                              // 0x17
; CHECK-GI-LABEL: .LCPI10_1:
; CHECK-GI:              .byte   0                               // 0x0
; CHECK-GI-NEXT:         .byte   4                               // 0x4
; CHECK-GI-NEXT:         .byte   8                               // 0x8
; CHECK-GI-NEXT:         .byte   12                              // 0xc
; CHECK-GI-NEXT:         .byte   16                              // 0x10
; CHECK-GI-NEXT:         .byte   20                              // 0x14
; CHECK-GI-NEXT:         .byte   24                              // 0x18
; CHECK-GI-NEXT:         .byte   28                              // 0x1c
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff

define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    fmov s4, w0
; CHECK-SD-NEXT:    mov w8, #32 // =0x20
; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    mov.b v4[1], w0
; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    mov.b v4[2], w0
; CHECK-SD-NEXT:    mov.b v4[3], w0
; CHECK-SD-NEXT:    mov.b v4[4], w0
; CHECK-SD-NEXT:    mov.b v4[5], w0
; CHECK-SD-NEXT:    mov.b v4[6], w0
; CHECK-SD-NEXT:    mov.b v4[7], w0
; CHECK-SD-NEXT:    mov.b v4[8], w8
; CHECK-SD-NEXT:    mov w8, #36 // =0x24
; CHECK-SD-NEXT:    mov.b v4[9], w8
; CHECK-SD-NEXT:    mov w8, #40 // =0x28
; CHECK-SD-NEXT:    mov.b v4[10], w8
; CHECK-SD-NEXT:    mov w8, #44 // =0x2c
; CHECK-SD-NEXT:    mov.b v4[11], w8
; CHECK-SD-NEXT:    mov w8, #48 // =0x30
; CHECK-SD-NEXT:    mov.b v4[12], w8
; CHECK-SD-NEXT:    mov w8, #52 // =0x34
; CHECK-SD-NEXT:    mov.b v4[13], w8
; CHECK-SD-NEXT:    mov w8, #56 // =0x38
; CHECK-SD-NEXT:    mov.b v4[14], w8
; CHECK-SD-NEXT:    mov w8, #60 // =0x3c
; CHECK-SD-NEXT:    mov.b v4[15], w8
; CHECK-SD-NEXT:    tbl.16b v0, { v0, v1, v2, v3 }, v4
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    fmov s4, w0
; CHECK-GI-NEXT:    mov w8, #255 // =0xff
; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT:    mov.16b v5, v4
; CHECK-GI-NEXT:    mov.b v5[1], v4[0]
; CHECK-GI-NEXT:    mov.b v5[2], v4[0]
; CHECK-GI-NEXT:    mov.b v5[3], v4[0]
; CHECK-GI-NEXT:    mov.b v5[4], v4[0]
; CHECK-GI-NEXT:    mov.b v5[5], v4[0]
; CHECK-GI-NEXT:    mov.b v5[6], v4[0]
; CHECK-GI-NEXT:    mov.b v5[7], v4[0]
; CHECK-GI-NEXT:    fmov s4, w8
; CHECK-GI-NEXT:    adrp x8, .LCPI10_1
; CHECK-GI-NEXT:    mov.b v5[8], v4[0]
; CHECK-GI-NEXT:    mov.b v5[9], v4[0]
; CHECK-GI-NEXT:    mov.b v5[10], v4[0]
; CHECK-GI-NEXT:    mov.b v5[11], v4[0]
; CHECK-GI-NEXT:    mov.b v5[12], v4[0]
; CHECK-GI-NEXT:    mov.b v5[13], v4[0]
; CHECK-GI-NEXT:    mov.b v5[14], v4[0]
; CHECK-GI-NEXT:    mov.b v5[15], v4[0]
; CHECK-GI-NEXT:    ldr q4, [x8, :lo12:.LCPI10_1]
; CHECK-GI-NEXT:    adrp x8, .LCPI10_0
; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v5
; CHECK-GI-NEXT:    tbl.16b v1, { v2, v3 }, v4
; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI10_0]
; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v2
; CHECK-GI-NEXT:    ret
  %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0
  %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1
  %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2
  %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3
  %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4
  %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5
  %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6
  %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7
  %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
  %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
  %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
  %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
  %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12
  %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13
  %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14
  %ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15
  %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
  %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
  %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
  ret <16 x i8> %s
}

; CHECK-GI-LABEL: .LCPI11_0:
; CHECK-GI:              .byte   0                               // 0x0
; CHECK-GI-NEXT:         .byte   1                               // 0x1
; CHECK-GI-NEXT:         .byte   2                               // 0x2
; CHECK-GI-NEXT:         .byte   3                               // 0x3
; CHECK-GI-NEXT:         .byte   4                               // 0x4
; CHECK-GI-NEXT:         .byte   5                               // 0x5
; CHECK-GI-NEXT:         .byte   6                               // 0x6
; CHECK-GI-NEXT:         .byte   15                              // 0xf
; CHECK-GI-NEXT:         .byte   16                              // 0x10
; CHECK-GI-NEXT:         .byte   17                              // 0x11
; CHECK-GI-NEXT:         .byte   18                              // 0x12
; CHECK-GI-NEXT:         .byte   19                              // 0x13
; CHECK-GI-NEXT:         .byte   20                              // 0x14
; CHECK-GI-NEXT:         .byte   21                              // 0x15
; CHECK-GI-NEXT:         .byte   22                              // 0x16
; CHECK-GI-NEXT:         .byte   31                              // 0x1f
; CHECK-GI-LABEL: .LCPI11_1:
; CHECK-GI:              .byte   0                               // 0x0
; CHECK-GI-NEXT:         .byte   4                               // 0x4
; CHECK-GI-NEXT:         .byte   8                               // 0x8
; CHECK-GI-NEXT:         .byte   12                              // 0xc
; CHECK-GI-NEXT:         .byte   16                              // 0x10
; CHECK-GI-NEXT:         .byte   20                              // 0x14
; CHECK-GI-NEXT:         .byte   24                              // 0x18
; CHECK-GI-NEXT:         .byte   28                              // 0x1c
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff

define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask2:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    mov w8, #1 // =0x1
; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    fmov s4, w8
; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    mov.b v4[1], w8
; CHECK-SD-NEXT:    mov.b v4[2], w8
; CHECK-SD-NEXT:    mov.b v4[3], w8
; CHECK-SD-NEXT:    mov.b v4[4], w8
; CHECK-SD-NEXT:    mov.b v4[5], w8
; CHECK-SD-NEXT:    mov.b v4[6], w8
; CHECK-SD-NEXT:    mov w8, #32 // =0x20
; CHECK-SD-NEXT:    mov.b v4[7], w0
; CHECK-SD-NEXT:    mov.b v4[8], w8
; CHECK-SD-NEXT:    mov w8, #36 // =0x24
; CHECK-SD-NEXT:    mov.b v4[9], w8
; CHECK-SD-NEXT:    mov w8, #40 // =0x28
; CHECK-SD-NEXT:    mov.b v4[10], w8
; CHECK-SD-NEXT:    mov w8, #44 // =0x2c
; CHECK-SD-NEXT:    mov.b v4[11], w8
; CHECK-SD-NEXT:    mov w8, #48 // =0x30
; CHECK-SD-NEXT:    mov.b v4[12], w8
; CHECK-SD-NEXT:    mov w8, #52 // =0x34
; CHECK-SD-NEXT:    mov.b v4[13], w8
; CHECK-SD-NEXT:    mov w8, #56 // =0x38
; CHECK-SD-NEXT:    mov.b v4[14], w8
; CHECK-SD-NEXT:    mov w8, #31 // =0x1f
; CHECK-SD-NEXT:    mov.b v4[15], w8
; CHECK-SD-NEXT:    tbl.16b v0, { v0, v1, v2, v3 }, v4
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask2:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    mov w8, #1 // =0x1
; CHECK-GI-NEXT:    fmov s6, w0
; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT:    fmov s4, w8
; CHECK-GI-NEXT:    mov w8, #255 // =0xff
; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT:    mov.16b v5, v4
; CHECK-GI-NEXT:    mov.b v5[1], v4[0]
; CHECK-GI-NEXT:    mov.b v5[2], v4[0]
; CHECK-GI-NEXT:    mov.b v5[3], v4[0]
; CHECK-GI-NEXT:    mov.b v5[4], v4[0]
; CHECK-GI-NEXT:    mov.b v5[5], v4[0]
; CHECK-GI-NEXT:    mov.b v5[6], v4[0]
; CHECK-GI-NEXT:    mov.b v5[7], v4[0]
; CHECK-GI-NEXT:    fmov s4, w8
; CHECK-GI-NEXT:    adrp x8, .LCPI11_1
; CHECK-GI-NEXT:    mov.b v5[8], v4[0]
; CHECK-GI-NEXT:    mov.b v5[9], v4[0]
; CHECK-GI-NEXT:    mov.b v5[10], v4[0]
; CHECK-GI-NEXT:    mov.b v5[11], v4[0]
; CHECK-GI-NEXT:    mov.b v5[12], v6[0]
; CHECK-GI-NEXT:    mov.b v5[13], v6[0]
; CHECK-GI-NEXT:    mov.b v5[14], v4[0]
; CHECK-GI-NEXT:    ldr q4, [x8, :lo12:.LCPI11_1]
; CHECK-GI-NEXT:    adrp x8, .LCPI11_0
; CHECK-GI-NEXT:    mov.b v5[15], v6[0]
; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v5
; CHECK-GI-NEXT:    tbl.16b v1, { v2, v3 }, v4
; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI11_0]
; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v2
; CHECK-GI-NEXT:    ret
  %ins.0 = insertelement <16 x i8> poison, i8 1, i32 0
  %ins.1 = insertelement <16 x i8> %ins.0, i8 1, i32 1
  %ins.2 = insertelement <16 x i8> %ins.1, i8 1, i32 2
  %ins.3 = insertelement <16 x i8> %ins.2, i8 1, i32 3
  %ins.4 = insertelement <16 x i8> %ins.3, i8 1, i32 4
  %ins.5 = insertelement <16 x i8> %ins.4, i8 1, i32 5
  %ins.6 = insertelement <16 x i8> %ins.5, i8 1, i32 6
  %ins.7 = insertelement <16 x i8> %ins.6, i8 1, i32 7
  %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
  %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
  %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
  %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
  %ins.12 = insertelement <16 x i8> %ins.11, i8 %v, i32 12
  %ins.13 = insertelement <16 x i8> %ins.12, i8 %v, i32 13
  %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14
  %ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15
  %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
  %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
  %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 31>
  ret <16 x i8> %s
}

; CHECK-SD-LABEL: .LCPI12_0:
; CHECK-SD:              .byte   0                               // 0x0
; CHECK-SD-NEXT:         .byte   4                               // 0x4
; CHECK-SD-NEXT:         .byte   8                               // 0x8
; CHECK-SD-NEXT:         .byte   12                              // 0xc
; CHECK-SD-NEXT:         .byte   16                              // 0x10
; CHECK-SD-NEXT:         .byte   20                              // 0x14
; CHECK-SD-NEXT:         .byte   24                              // 0x18
; CHECK-SD-NEXT:         .byte   28                              // 0x1c
; CHECK-SD-NEXT:         .byte   255                             // 0xff
; CHECK-SD-NEXT:         .byte   255                             // 0xff
; CHECK-SD-NEXT:         .byte   255                             // 0xff
; CHECK-SD-NEXT:         .byte   255                             // 0xff
; CHECK-SD-NEXT:         .byte   255                             // 0xff
; CHECK-SD-NEXT:         .byte   255                             // 0xff
; CHECK-SD-NEXT:         .byte   255                             // 0xff
; CHECK-SD-NEXT:         .byte   255                             // 0xff

; CHECK-GI-LABEL: .LCPI12_0:
; CHECK-GI:              .byte   0                               // 0x0
; CHECK-GI-NEXT:         .byte   1                               // 0x1
; CHECK-GI-NEXT:         .byte   2                               // 0x2
; CHECK-GI-NEXT:         .byte   3                               // 0x3
; CHECK-GI-NEXT:         .byte   4                               // 0x4
; CHECK-GI-NEXT:         .byte   5                               // 0x5
; CHECK-GI-NEXT:         .byte   6                               // 0x6
; CHECK-GI-NEXT:         .byte   7                               // 0x7
; CHECK-GI-NEXT:         .byte   16                              // 0x10
; CHECK-GI-NEXT:         .byte   17                              // 0x11
; CHECK-GI-NEXT:         .byte   18                              // 0x12
; CHECK-GI-NEXT:         .byte   19                              // 0x13
; CHECK-GI-NEXT:         .byte   20                              // 0x14
; CHECK-GI-NEXT:         .byte   21                              // 0x15
; CHECK-GI-NEXT:         .byte   22                              // 0x16
; CHECK-GI-NEXT:         .byte   23                              // 0x17
; CHECK-GI-LABEL: .LCPI12_1:
; CHECK-GI:              .byte   0                               // 0x0
; CHECK-GI-NEXT:         .byte   4                               // 0x4
; CHECK-GI-NEXT:         .byte   8                               // 0x8
; CHECK-GI-NEXT:         .byte   12                              // 0xc
; CHECK-GI-NEXT:         .byte   16                              // 0x10
; CHECK-GI-NEXT:         .byte   20                              // 0x14
; CHECK-GI-NEXT:         .byte   24                              // 0x18
; CHECK-GI-NEXT:         .byte   28                              // 0x1c
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff

define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    movi.2d v4, #0xffffffffffffffff
; CHECK-SD-NEXT:    adrp x8, .LCPI12_0
; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ldr q5, [x8, :lo12:.LCPI12_0]
; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    tbl.16b v2, { v2, v3 }, v5
; CHECK-SD-NEXT:    mov.b v4[0], w0
; CHECK-SD-NEXT:    mov.b v4[1], w0
; CHECK-SD-NEXT:    mov.b v4[2], w0
; CHECK-SD-NEXT:    mov.b v4[3], w0
; CHECK-SD-NEXT:    mov.b v4[4], w0
; CHECK-SD-NEXT:    mov.b v4[5], w0
; CHECK-SD-NEXT:    mov.b v4[6], w0
; CHECK-SD-NEXT:    mov.b v4[7], w0
; CHECK-SD-NEXT:    tbl.16b v0, { v0, v1 }, v4
; CHECK-SD-NEXT:    mov.d v2[1], v0[0]
; CHECK-SD-NEXT:    mov.16b v0, v2
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    fmov s4, w0
; CHECK-GI-NEXT:    mov w8, #255 // =0xff
; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    mov.16b v5, v4
; CHECK-GI-NEXT:    mov.b v5[1], v4[0]
; CHECK-GI-NEXT:    mov.b v5[2], v4[0]
; CHECK-GI-NEXT:    mov.b v5[3], v4[0]
; CHECK-GI-NEXT:    mov.b v5[4], v4[0]
; CHECK-GI-NEXT:    mov.b v5[5], v4[0]
; CHECK-GI-NEXT:    mov.b v5[6], v4[0]
; CHECK-GI-NEXT:    mov.b v5[7], v4[0]
; CHECK-GI-NEXT:    fmov s4, w8
; CHECK-GI-NEXT:    adrp x8, .LCPI12_1
; CHECK-GI-NEXT:    mov.b v5[8], v4[0]
; CHECK-GI-NEXT:    mov.b v5[9], v4[0]
; CHECK-GI-NEXT:    mov.b v5[10], v4[0]
; CHECK-GI-NEXT:    mov.b v5[11], v4[0]
; CHECK-GI-NEXT:    mov.b v5[12], v4[0]
; CHECK-GI-NEXT:    mov.b v5[13], v4[0]
; CHECK-GI-NEXT:    mov.b v5[14], v4[0]
; CHECK-GI-NEXT:    mov.b v5[15], v4[0]
; CHECK-GI-NEXT:    ldr q4, [x8, :lo12:.LCPI12_1]
; CHECK-GI-NEXT:    adrp x8, .LCPI12_0
; CHECK-GI-NEXT:    tbl.16b v2, { v2, v3 }, v4
; CHECK-GI-NEXT:    tbl.16b v3, { v0, v1 }, v5
; CHECK-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI12_0]
; CHECK-GI-NEXT:    tbl.16b v0, { v2, v3 }, v0
; CHECK-GI-NEXT:    ret
  %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0
  %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1
  %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2
  %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3
  %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4
  %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5
  %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6
  %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7
  %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
  %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
  %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
  %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
  %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12
  %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13
  %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14
  %ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15
  %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
  %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
  %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
  ret <16 x i8> %s
}

; CHECK-SD-LABEL: .LCPI13_0:
; CHECK-SD:              .byte   0                               // 0x0
; CHECK-SD-NEXT:         .byte   4                               // 0x4
; CHECK-SD-NEXT:         .byte   8                               // 0x8
; CHECK-SD-NEXT:         .byte   12                              // 0xc
; CHECK-SD-NEXT:         .byte   16                              // 0x10
; CHECK-SD-NEXT:         .byte   20                              // 0x14
; CHECK-SD-NEXT:         .byte   24                              // 0x18
; CHECK-SD-NEXT:         .byte   28                              // 0x1c
; CHECK-SD-NEXT:         .byte   255                             // 0xff
; CHECK-SD-NEXT:         .byte   255                             // 0xff
; CHECK-SD-NEXT:         .byte   255                             // 0xff
; CHECK-SD-NEXT:         .byte   255                             // 0xff
; CHECK-SD-NEXT:         .byte   255                             // 0xff
; CHECK-SD-NEXT:         .byte   255                             // 0xff
; CHECK-SD-NEXT:         .byte   255                             // 0xff
; CHECK-SD-NEXT:         .byte   255                             // 0xff
; CHECK-SD-LABEL: .LCPI13_1:
; CHECK-SD:              .byte   0                               // 0x0
; CHECK-SD-NEXT:         .byte   1                               // 0x1
; CHECK-SD-NEXT:         .byte   2                               // 0x2
; CHECK-SD-NEXT:         .byte   3                               // 0x3
; CHECK-SD-NEXT:         .byte   4                               // 0x4
; CHECK-SD-NEXT:         .byte   5                               // 0x5
; CHECK-SD-NEXT:         .byte   6                               // 0x6
; CHECK-SD-NEXT:         .byte   7                               // 0x7
; CHECK-SD-NEXT:         .byte   16                              // 0x10
; CHECK-SD-NEXT:         .byte   17                              // 0x11
; CHECK-SD-NEXT:         .byte   18                              // 0x12
; CHECK-SD-NEXT:         .byte   19                              // 0x13
; CHECK-SD-NEXT:         .byte   20                              // 0x14
; CHECK-SD-NEXT:         .byte   21                              // 0x15
; CHECK-SD-NEXT:         .byte   30                              // 0x1e
; CHECK-SD-NEXT:         .byte   31                              // 0x1f

; CHECK-GI-LABEL: .LCPI13_0:
; CHECK-GI:              .byte   0                               // 0x0
; CHECK-GI-NEXT:         .byte   1                               // 0x1
; CHECK-GI-NEXT:         .byte   2                               // 0x2
; CHECK-GI-NEXT:         .byte   3                               // 0x3
; CHECK-GI-NEXT:         .byte   4                               // 0x4
; CHECK-GI-NEXT:         .byte   5                               // 0x5
; CHECK-GI-NEXT:         .byte   6                               // 0x6
; CHECK-GI-NEXT:         .byte   7                               // 0x7
; CHECK-GI-NEXT:         .byte   16                              // 0x10
; CHECK-GI-NEXT:         .byte   17                              // 0x11
; CHECK-GI-NEXT:         .byte   18                              // 0x12
; CHECK-GI-NEXT:         .byte   19                              // 0x13
; CHECK-GI-NEXT:         .byte   20                              // 0x14
; CHECK-GI-NEXT:         .byte   21                              // 0x15
; CHECK-GI-NEXT:         .byte   30                              // 0x1e
; CHECK-GI-NEXT:         .byte   31                              // 0x1f
; CHECK-GI-LABEL: .LCPI13_1:
; CHECK-GI:              .byte   0                               // 0x0
; CHECK-GI-NEXT:         .byte   4                               // 0x4
; CHECK-GI-NEXT:         .byte   8                               // 0x8
; CHECK-GI-NEXT:         .byte   12                              // 0xc
; CHECK-GI-NEXT:         .byte   16                              // 0x10
; CHECK-GI-NEXT:         .byte   20                              // 0x14
; CHECK-GI-NEXT:         .byte   24                              // 0x18
; CHECK-GI-NEXT:         .byte   28                              // 0x1c
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff

define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask2:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    dup.16b v4, w0
; CHECK-SD-NEXT:    mov w8, #255 // =0xff
; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    mov.b v4[8], w8
; CHECK-SD-NEXT:    mov.b v4[9], w8
; CHECK-SD-NEXT:    mov.b v4[10], w8
; CHECK-SD-NEXT:    mov.b v4[11], w8
; CHECK-SD-NEXT:    mov.b v4[12], w8
; CHECK-SD-NEXT:    mov.b v4[13], w8
; CHECK-SD-NEXT:    adrp x8, .LCPI13_0
; CHECK-SD-NEXT:    ldr q5, [x8, :lo12:.LCPI13_0]
; CHECK-SD-NEXT:    adrp x8, .LCPI13_1
; CHECK-SD-NEXT:    tbl.16b v2, { v2, v3 }, v5
; CHECK-SD-NEXT:    tbl.16b v3, { v0, v1 }, v4
; CHECK-SD-NEXT:    ldr q0, [x8, :lo12:.LCPI13_1]
; CHECK-SD-NEXT:    tbl.16b v0, { v2, v3 }, v0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask2:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    fmov s4, w0
; CHECK-GI-NEXT:    mov w8, #255 // =0xff
; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    fmov s6, w8
; CHECK-GI-NEXT:    adrp x8, .LCPI13_1
; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    mov.16b v5, v4
; CHECK-GI-NEXT:    mov.b v5[1], v4[0]
; CHECK-GI-NEXT:    mov.b v5[2], v4[0]
; CHECK-GI-NEXT:    mov.b v5[3], v4[0]
; CHECK-GI-NEXT:    mov.b v5[4], v4[0]
; CHECK-GI-NEXT:    mov.b v5[5], v4[0]
; CHECK-GI-NEXT:    mov.b v5[6], v4[0]
; CHECK-GI-NEXT:    mov.b v5[7], v4[0]
; CHECK-GI-NEXT:    mov.b v5[8], v6[0]
; CHECK-GI-NEXT:    mov.b v5[9], v6[0]
; CHECK-GI-NEXT:    mov.b v5[10], v6[0]
; CHECK-GI-NEXT:    mov.b v5[11], v6[0]
; CHECK-GI-NEXT:    mov.b v5[12], v6[0]
; CHECK-GI-NEXT:    mov.b v5[13], v6[0]
; CHECK-GI-NEXT:    mov.b v5[14], v4[0]
; CHECK-GI-NEXT:    mov.b v5[15], v4[0]
; CHECK-GI-NEXT:    ldr q4, [x8, :lo12:.LCPI13_1]
; CHECK-GI-NEXT:    adrp x8, .LCPI13_0
; CHECK-GI-NEXT:    tbl.16b v2, { v2, v3 }, v4
; CHECK-GI-NEXT:    tbl.16b v3, { v0, v1 }, v5
; CHECK-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI13_0]
; CHECK-GI-NEXT:    tbl.16b v0, { v2, v3 }, v0
; CHECK-GI-NEXT:    ret
  %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0
  %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1
  %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2
  %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3
  %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4
  %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5
  %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6
  %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7
  %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
  %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
  %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
  %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
  %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12
  %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13
  %ins.14 = insertelement <16 x i8> %ins.13, i8 %v, i32 14
  %ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15
  %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
  %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
  %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 30, i32 31>
  ret <16 x i8> %s
}

; CHECK-SD-LABEL: .LCPI14_0:
; CHECK-SD:              .byte   0                               // 0x0
; CHECK-SD-NEXT:         .byte   4                               // 0x4
; CHECK-SD-NEXT:         .byte   52                              // 0x34
; CHECK-SD-NEXT:         .byte   12                              // 0xc
; CHECK-SD-NEXT:         .byte   16                              // 0x10
; CHECK-SD-NEXT:         .byte   20                              // 0x14
; CHECK-SD-NEXT:         .byte   24                              // 0x18
; CHECK-SD-NEXT:         .byte   28                              // 0x1c
; CHECK-SD-NEXT:         .byte   32                              // 0x20
; CHECK-SD-NEXT:         .byte   36                              // 0x24
; CHECK-SD-NEXT:         .byte   40                              // 0x28
; CHECK-SD-NEXT:         .byte   44                              // 0x2c
; CHECK-SD-NEXT:         .byte   48                              // 0x30
; CHECK-SD-NEXT:         .byte   52                              // 0x34
; CHECK-SD-NEXT:         .byte   56                              // 0x38
; CHECK-SD-NEXT:         .byte   60                              // 0x3c

; CHECK-GI-LABEL: .LCPI14_0:
; CHECK-GI:              .byte   0                               // 0x0
; CHECK-GI-NEXT:         .byte   1                               // 0x1
; CHECK-GI-NEXT:         .byte   21                              // 0x15
; CHECK-GI-NEXT:         .byte   3                               // 0x3
; CHECK-GI-NEXT:         .byte   4                               // 0x4
; CHECK-GI-NEXT:         .byte   5                               // 0x5
; CHECK-GI-NEXT:         .byte   6                               // 0x6
; CHECK-GI-NEXT:         .byte   7                               // 0x7
; CHECK-GI-NEXT:         .byte   16                              // 0x10
; CHECK-GI-NEXT:         .byte   17                              // 0x11
; CHECK-GI-NEXT:         .byte   18                              // 0x12
; CHECK-GI-NEXT:         .byte   19                              // 0x13
; CHECK-GI-NEXT:         .byte   20                              // 0x14
; CHECK-GI-NEXT:         .byte   21                              // 0x15
; CHECK-GI-NEXT:         .byte   22                              // 0x16
; CHECK-GI-NEXT:         .byte   23                              // 0x17
; CHECK-GI-LABEL: .LCPI14_1:
; CHECK-GI:              .byte   0                               // 0x0
; CHECK-GI-NEXT:         .byte   4                               // 0x4
; CHECK-GI-NEXT:         .byte   8                               // 0x8
; CHECK-GI-NEXT:         .byte   12                              // 0xc
; CHECK-GI-NEXT:         .byte   16                              // 0x10
; CHECK-GI-NEXT:         .byte   20                              // 0x14
; CHECK-GI-NEXT:         .byte   24                              // 0x18
; CHECK-GI-NEXT:         .byte   28                              // 0x1c
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff

define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_shuffle:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    adrp x8, .LCPI14_0
; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI14_0]
; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    tbl.16b v0, { v0, v1, v2, v3 }, v4
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_shuffle:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    adrp x8, .LCPI14_1
; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT:    ldr q4, [x8, :lo12:.LCPI14_1]
; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT:    adrp x8, .LCPI14_0
; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v4
; CHECK-GI-NEXT:    tbl.16b v1, { v2, v3 }, v4
; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI14_0]
; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v2
; CHECK-GI-NEXT:    ret
  %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
  %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
  %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
  ret <16 x i8> %s
}

; CHECK-SD-LABEL: .LCPI15_0:
; CHECK-SD:              .byte   0                               // 0x0
; CHECK-SD-NEXT:         .byte   4                               // 0x4
; CHECK-SD-NEXT:         .byte   52                              // 0x34
; CHECK-SD-NEXT:         .byte   12                              // 0xc
; CHECK-SD-NEXT:         .byte   16                              // 0x10
; CHECK-SD-NEXT:         .byte   20                              // 0x14
; CHECK-SD-NEXT:         .byte   24                              // 0x18
; CHECK-SD-NEXT:         .byte   28                              // 0x1c
; CHECK-SD-NEXT:         .byte   32                              // 0x20
; CHECK-SD-NEXT:         .byte   36                              // 0x24
; CHECK-SD-NEXT:         .byte   40                              // 0x28
; CHECK-SD-NEXT:         .byte   44                              // 0x2c
; CHECK-SD-NEXT:         .byte   48                              // 0x30
; CHECK-SD-NEXT:         .byte   52                              // 0x34
; CHECK-SD-NEXT:         .byte   56                              // 0x38
; CHECK-SD-NEXT:         .byte   60                              // 0x3c

; CHECK-GI-LABEL: .LCPI15_0:
; CHECK-GI:              .byte   0                               // 0x0
; CHECK-GI-NEXT:         .byte   1                               // 0x1
; CHECK-GI-NEXT:         .byte   21                              // 0x15
; CHECK-GI-NEXT:         .byte   3                               // 0x3
; CHECK-GI-NEXT:         .byte   4                               // 0x4
; CHECK-GI-NEXT:         .byte   5                               // 0x5
; CHECK-GI-NEXT:         .byte   6                               // 0x6
; CHECK-GI-NEXT:         .byte   7                               // 0x7
; CHECK-GI-NEXT:         .byte   16                              // 0x10
; CHECK-GI-NEXT:         .byte   17                              // 0x11
; CHECK-GI-NEXT:         .byte   18                              // 0x12
; CHECK-GI-NEXT:         .byte   19                              // 0x13
; CHECK-GI-NEXT:         .byte   20                              // 0x14
; CHECK-GI-NEXT:         .byte   21                              // 0x15
; CHECK-GI-NEXT:         .byte   22                              // 0x16
; CHECK-GI-NEXT:         .byte   23                              // 0x17
; CHECK-GI-LABEL: .LCPI15_1:
; CHECK-GI:              .byte   0                               // 0x0
; CHECK-GI-NEXT:         .byte   4                               // 0x4
; CHECK-GI-NEXT:         .byte   8                               // 0x8
; CHECK-GI-NEXT:         .byte   12                              // 0xc
; CHECK-GI-NEXT:         .byte   16                              // 0x10
; CHECK-GI-NEXT:         .byte   20                              // 0x14
; CHECK-GI-NEXT:         .byte   24                              // 0x18
; CHECK-GI-NEXT:         .byte   28                              // 0x1c
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-LABEL: .LCPI15_2:
; CHECK-GI:              .byte   0                               // 0x0
; CHECK-GI-NEXT:         .byte   4                               // 0x4
; CHECK-GI-NEXT:         .byte   8                               // 0x8
; CHECK-GI-NEXT:         .byte   12                              // 0xc
; CHECK-GI-NEXT:         .byte   16                              // 0x10
; CHECK-GI-NEXT:         .byte   20                              // 0x14
; CHECK-GI-NEXT:         .byte   24                              // 0x18
; CHECK-GI-NEXT:         .byte   28                              // 0x1c
; CHECK-GI-NEXT:         .byte   0                               // 0x0
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff

define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask1:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    adrp x8, .LCPI15_0
; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI15_0]
; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    tbl.16b v0, { v0, v1, v2, v3 }, v4
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask1:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    adrp x8, .LCPI15_2
; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT:    ldr q4, [x8, :lo12:.LCPI15_2]
; CHECK-GI-NEXT:    adrp x8, .LCPI15_1
; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT:    ldr q5, [x8, :lo12:.LCPI15_1]
; CHECK-GI-NEXT:    adrp x8, .LCPI15_0
; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v4
; CHECK-GI-NEXT:    tbl.16b v1, { v2, v3 }, v5
; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI15_0]
; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v2
; CHECK-GI-NEXT:    ret
  %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
  %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
  %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
  ret <16 x i8> %s
}

; CHECK-SD-LABEL: .LCPI16_0:
; CHECK-SD:              .byte   0                               // 0x0
; CHECK-SD-NEXT:         .byte   4                               // 0x4
; CHECK-SD-NEXT:         .byte   52                              // 0x34
; CHECK-SD-NEXT:         .byte   12                              // 0xc
; CHECK-SD-NEXT:         .byte   16                              // 0x10
; CHECK-SD-NEXT:         .byte   20                              // 0x14
; CHECK-SD-NEXT:         .byte   24                              // 0x18
; CHECK-SD-NEXT:         .byte   28                              // 0x1c
; CHECK-SD-NEXT:         .byte   32                              // 0x20
; CHECK-SD-NEXT:         .byte   36                              // 0x24
; CHECK-SD-NEXT:         .byte   40                              // 0x28
; CHECK-SD-NEXT:         .byte   44                              // 0x2c
; CHECK-SD-NEXT:         .byte   48                              // 0x30
; CHECK-SD-NEXT:         .byte   52                              // 0x34
; CHECK-SD-NEXT:         .byte   56                              // 0x38
; CHECK-SD-NEXT:         .byte   60                              // 0x3c

; CHECK-GI-LABEL: .LCPI16_0:
; CHECK-GI:              .byte   0                               // 0x0
; CHECK-GI-NEXT:         .byte   1                               // 0x1
; CHECK-GI-NEXT:         .byte   21                              // 0x15
; CHECK-GI-NEXT:         .byte   3                               // 0x3
; CHECK-GI-NEXT:         .byte   4                               // 0x4
; CHECK-GI-NEXT:         .byte   5                               // 0x5
; CHECK-GI-NEXT:         .byte   6                               // 0x6
; CHECK-GI-NEXT:         .byte   7                               // 0x7
; CHECK-GI-NEXT:         .byte   16                              // 0x10
; CHECK-GI-NEXT:         .byte   17                              // 0x11
; CHECK-GI-NEXT:         .byte   18                              // 0x12
; CHECK-GI-NEXT:         .byte   19                              // 0x13
; CHECK-GI-NEXT:         .byte   20                              // 0x14
; CHECK-GI-NEXT:         .byte   21                              // 0x15
; CHECK-GI-NEXT:         .byte   22                              // 0x16
; CHECK-GI-NEXT:         .byte   23                              // 0x17
; CHECK-GI-LABEL: .LCPI16_1:
; CHECK-GI:              .byte   0                               // 0x0
; CHECK-GI-NEXT:         .byte   4                               // 0x4
; CHECK-GI-NEXT:         .byte   8                               // 0x8
; CHECK-GI-NEXT:         .byte   12                              // 0xc
; CHECK-GI-NEXT:         .byte   16                              // 0x10
; CHECK-GI-NEXT:         .byte   20                              // 0x14
; CHECK-GI-NEXT:         .byte   24                              // 0x18
; CHECK-GI-NEXT:         .byte   28                              // 0x1c
; CHECK-GI-NEXT:         .byte   0                               // 0x0
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-LABEL: .LCPI16_2:
; CHECK-GI:              .byte   0                               // 0x0
; CHECK-GI-NEXT:         .byte   4                               // 0x4
; CHECK-GI-NEXT:         .byte   8                               // 0x8
; CHECK-GI-NEXT:         .byte   12                              // 0xc
; CHECK-GI-NEXT:         .byte   16                              // 0x10
; CHECK-GI-NEXT:         .byte   20                              // 0x14
; CHECK-GI-NEXT:         .byte   24                              // 0x18
; CHECK-GI-NEXT:         .byte   28                              // 0x1c
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff
; CHECK-GI-NEXT:         .byte   255                             // 0xff

define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask2:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    adrp x8, .LCPI16_0
; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI16_0]
; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    tbl.16b v0, { v0, v1, v2, v3 }, v4
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask2:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    adrp x8, .LCPI16_2
; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT:    ldr q4, [x8, :lo12:.LCPI16_2]
; CHECK-GI-NEXT:    adrp x8, .LCPI16_1
; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
; CHECK-GI-NEXT:    ldr q5, [x8, :lo12:.LCPI16_1]
; CHECK-GI-NEXT:    adrp x8, .LCPI16_0
; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v4
; CHECK-GI-NEXT:    tbl.16b v1, { v2, v3 }, v5
; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI16_0]
; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v2
; CHECK-GI-NEXT:    ret
  %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
  %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
  %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
  ret <16 x i8> %s
}

declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone

define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) nounwind {
; CHECK-LABEL: tbx1_8b:
; CHECK:       // %bb.0:
; CHECK-NEXT:    tbx.8b v0, { v1 }, v2
; CHECK-NEXT:    ret
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C)
  ret <8 x i8> %tmp3
}

define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind {
; CHECK-LABEL: tbx1_16b:
; CHECK:       // %bb.0:
; CHECK-NEXT:    tbx.16b v0, { v1 }, v2
; CHECK-NEXT:    ret
  %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
  ret <16 x i8> %tmp3
}

define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
; CHECK-SD-LABEL: tbx2_8b:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
; CHECK-SD-NEXT:    tbx.8b v0, { v1, v2 }, v3
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: tbx2_8b:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
; CHECK-GI-NEXT:    tbx.8b v0, { v1, v2 }, v3
; CHECK-GI-NEXT:    ret
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
  ret <8 x i8> %tmp3
}

define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
; CHECK-SD-LABEL: tbx2_16b:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
; CHECK-SD-NEXT:    tbx.16b v0, { v1, v2 }, v3
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: tbx2_16b:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
; CHECK-GI-NEXT:    tbx.16b v0, { v1, v2 }, v3
; CHECK-GI-NEXT:    ret
  %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
  ret <16 x i8> %tmp3
}

define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
; CHECK-SD-LABEL: tbx3_8b:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-SD-NEXT:    tbx.8b v0, { v1, v2, v3 }, v4
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: tbx3_8b:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-GI-NEXT:    tbx.8b v0, { v1, v2, v3 }, v4
; CHECK-GI-NEXT:    ret
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
  ret <8 x i8> %tmp3
}

define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
; CHECK-SD-LABEL: tbx3_16b:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-SD-NEXT:    tbx.16b v0, { v1, v2, v3 }, v4
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: tbx3_16b:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
; CHECK-GI-NEXT:    tbx.16b v0, { v1, v2, v3 }, v4
; CHECK-GI-NEXT:    ret
  %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
  ret <16 x i8> %tmp3
}

define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) {
; CHECK-SD-LABEL: tbx4_8b:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-SD-NEXT:    tbx.8b v0, { v1, v2, v3, v4 }, v5
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: tbx4_8b:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-GI-NEXT:    // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-GI-NEXT:    tbx.8b v0, { v1, v2, v3, v4 }, v5
; CHECK-GI-NEXT:    ret
  %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F)
  ret <8 x i8> %tmp3
}

define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) {
; CHECK-SD-LABEL: tbx4_16b:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-SD-NEXT:    tbx.16b v0, { v1, v2, v3, v4 }, v5
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: tbx4_16b:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-GI-NEXT:    // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
; CHECK-GI-NEXT:    tbx.16b v0, { v1, v2, v3, v4 }, v5
; CHECK-GI-NEXT:    ret
  %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F)
  ret <16 x i8> %tmp3
}

declare <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
declare <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone