llvm/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX

; standard vector concatenations

define <16 x i32> @concat_zext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: define <16 x i32> @concat_zext_v8i16_v16i32(
; CHECK-SAME: <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[A0]], <8 x i16> [[A1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT:    [[R:%.*]] = zext <16 x i16> [[TMP1]] to <16 x i32>
; CHECK-NEXT:    ret <16 x i32> [[R]]
;
  %x0 = zext <8 x i16> %a0 to <8 x i32>
  %x1 = zext <8 x i16> %a1 to <8 x i32>
  %r = shufflevector <8 x i32> %x0, <8 x i32> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  ret <16 x i32> %r
}

define <16 x i32> @concat_zext_nneg_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: define <16 x i32> @concat_zext_nneg_v8i16_v16i32(
; CHECK-SAME: <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[A0]], <8 x i16> [[A1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT:    [[R:%.*]] = zext nneg <16 x i16> [[TMP1]] to <16 x i32>
; CHECK-NEXT:    ret <16 x i32> [[R]]
;
  %x0 = zext nneg <8 x i16> %a0 to <8 x i32>
  %x1 = zext nneg <8 x i16> %a1 to <8 x i32>
  %r = shufflevector <8 x i32> %x0, <8 x i32> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  ret <16 x i32> %r
}

define <16 x i32> @concat_sext_zext_nneg_v8i16_v8i32(<8 x i16> %a0, <8 x i16> %a1) {
; SSE-LABEL: define <16 x i32> @concat_sext_zext_nneg_v8i16_v8i32(
; SSE-SAME: <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) #[[ATTR0]] {
; SSE-NEXT:    [[X0:%.*]] = sext <8 x i16> [[A0]] to <8 x i32>
; SSE-NEXT:    [[X1:%.*]] = zext nneg <8 x i16> [[A1]] to <8 x i32>
; SSE-NEXT:    [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; SSE-NEXT:    ret <16 x i32> [[R]]
;
; AVX-LABEL: define <16 x i32> @concat_sext_zext_nneg_v8i16_v8i32(
; AVX-SAME: <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) #[[ATTR0]] {
; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[A0]], <8 x i16> [[A1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; AVX-NEXT:    [[R:%.*]] = sext <16 x i16> [[TMP1]] to <16 x i32>
; AVX-NEXT:    ret <16 x i32> [[R]]
;
  %x0 = sext <8 x i16> %a0 to <8 x i32>
  %x1 = zext nneg <8 x i16> %a1 to <8 x i32>
  %r = shufflevector <8 x i32> %x0, <8 x i32> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  ret <16 x i32> %r
}

define <16 x i32> @concat_sext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: define <16 x i32> @concat_sext_v8i16_v16i32(
; CHECK-SAME: <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[A0]], <8 x i16> [[A1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT:    [[R:%.*]] = sext <16 x i16> [[TMP1]] to <16 x i32>
; CHECK-NEXT:    ret <16 x i32> [[R]]
;
  %x0 = sext <8 x i16> %a0 to <8 x i32>
  %x1 = sext <8 x i16> %a1 to <8 x i32>
  %r = shufflevector <8 x i32> %x0, <8 x i32> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  ret <16 x i32> %r
}

define <8 x i32> @concat_sext_v4i1_v8i32(<4 x i1> %a0, <4 x i1> %a1) {
; CHECK-LABEL: define <8 x i32> @concat_sext_v4i1_v8i32(
; CHECK-SAME: <4 x i1> [[A0:%.*]], <4 x i1> [[A1:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i1> [[A0]], <4 x i1> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    [[R:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i32>
; CHECK-NEXT:    ret <8 x i32> [[R]]
;
  %x0 = sext <4 x i1> %a0 to <4 x i32>
  %x1 = sext <4 x i1> %a1 to <4 x i32>
  %r = shufflevector <4 x i32> %x0, <4 x i32> %x1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  ret <8 x i32> %r
}

define <8 x i16> @concat_trunc_v4i32_v8i16(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: define <8 x i16> @concat_trunc_v4i32_v8i16(
; CHECK-SAME: <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[A0]], <4 x i32> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    [[R:%.*]] = trunc <8 x i32> [[TMP1]] to <8 x i16>
; CHECK-NEXT:    ret <8 x i16> [[R]]
;
  %x0 = trunc <4 x i32> %a0 to <4 x i16>
  %x1 = trunc <4 x i32> %a1 to <4 x i16>
  %r = shufflevector <4 x i16> %x0, <4 x i16> %x1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  ret <8 x i16> %r
}

define <8 x ptr> @concat_inttoptr_v4i32_v8iptr(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: define <8 x ptr> @concat_inttoptr_v4i32_v8iptr(
; CHECK-SAME: <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[A0]], <4 x i32> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    [[R:%.*]] = inttoptr <8 x i32> [[TMP1]] to <8 x ptr>
; CHECK-NEXT:    ret <8 x ptr> [[R]]
;
  %x0 = inttoptr <4 x i32> %a0 to <4 x ptr>
  %x1 = inttoptr <4 x i32> %a1 to <4 x ptr>
  %r = shufflevector <4 x ptr> %x0, <4 x ptr> %x1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  ret <8 x ptr> %r
}

define <16 x i64> @concat_ptrtoint_v8i16_v16i32(<8 x ptr> %a0, <8 x ptr> %a1) {
; CHECK-LABEL: define <16 x i64> @concat_ptrtoint_v8i16_v16i32(
; CHECK-SAME: <8 x ptr> [[A0:%.*]], <8 x ptr> [[A1:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x ptr> [[A0]], <8 x ptr> [[A1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT:    [[R:%.*]] = ptrtoint <16 x ptr> [[TMP1]] to <16 x i64>
; CHECK-NEXT:    ret <16 x i64> [[R]]
;
  %x0 = ptrtoint <8 x ptr> %a0 to <8 x i64>
  %x1 = ptrtoint <8 x ptr> %a1 to <8 x i64>
  %r = shufflevector <8 x i64> %x0, <8 x i64> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  ret <16 x i64> %r
}

define <8 x double> @concat_fpext_v4f32_v8f64(<4 x float> %a0, <4 x float> %a1) {
; SSE-LABEL: define <8 x double> @concat_fpext_v4f32_v8f64(
; SSE-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] {
; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE-NEXT:    [[R:%.*]] = fpext <8 x float> [[TMP1]] to <8 x double>
; SSE-NEXT:    ret <8 x double> [[R]]
;
; AVX-LABEL: define <8 x double> @concat_fpext_v4f32_v8f64(
; AVX-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] {
; AVX-NEXT:    [[X0:%.*]] = fpext <4 x float> [[A0]] to <4 x double>
; AVX-NEXT:    [[X1:%.*]] = fpext <4 x float> [[A1]] to <4 x double>
; AVX-NEXT:    [[R:%.*]] = shufflevector <4 x double> [[X0]], <4 x double> [[X1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; AVX-NEXT:    ret <8 x double> [[R]]
;
  %x0 = fpext <4 x float> %a0 to <4 x double>
  %x1 = fpext <4 x float> %a1 to <4 x double>
  %r = shufflevector <4 x double> %x0, <4 x double> %x1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  ret <8 x double> %r
}

define <16 x float> @concat_fptrunc_v8f64_v16f32(<8 x double> %a0, <8 x double> %a1) {
; CHECK-LABEL: define <16 x float> @concat_fptrunc_v8f64_v16f32(
; CHECK-SAME: <8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[X0:%.*]] = fptrunc <8 x double> [[A0]] to <8 x float>
; CHECK-NEXT:    [[X1:%.*]] = fptrunc <8 x double> [[A1]] to <8 x float>
; CHECK-NEXT:    [[R:%.*]] = shufflevector <8 x float> [[X0]], <8 x float> [[X1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT:    ret <16 x float> [[R]]
;
  %x0 = fptrunc <8 x double> %a0 to <8 x float>
  %x1 = fptrunc <8 x double> %a1 to <8 x float>
  %r = shufflevector <8 x float> %x0, <8 x float> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  ret <16 x float> %r
}

; commuted vector concatenation

define <16 x i32> @rconcat_sext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: define <16 x i32> @rconcat_sext_v8i16_v16i32(
; CHECK-SAME: <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[A0]], <8 x i16> [[A1]], <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    [[R:%.*]] = sext <16 x i16> [[TMP1]] to <16 x i32>
; CHECK-NEXT:    ret <16 x i32> [[R]]
;
  %x0 = sext <8 x i16> %a0 to <8 x i32>
  %x1 = sext <8 x i16> %a1 to <8 x i32>
  %r = shufflevector <8 x i32> %x0, <8 x i32> %x1, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  ret <16 x i32> %r
}

; interleaved shuffle

define <8 x double> @interleave_fpext_v4f32_v8f64(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: define <8 x double> @interleave_fpext_v4f32_v8f64(
; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> [[A1]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
; CHECK-NEXT:    [[R:%.*]] = fpext <8 x float> [[TMP1]] to <8 x double>
; CHECK-NEXT:    ret <8 x double> [[R]]
;
  %x0 = fpext <4 x float> %a0 to <4 x double>
  %x1 = fpext <4 x float> %a1 to <4 x double>
  %r = shufflevector <4 x double> %x0, <4 x double> %x1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
  ret <8 x double> %r
}

; bitcasts (same element count)

define <8 x float> @concat_bitcast_v4i32_v8f32(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: define <8 x float> @concat_bitcast_v4i32_v8f32(
; CHECK-SAME: <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[A0]], <4 x i32> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    [[R:%.*]] = bitcast <8 x i32> [[TMP1]] to <8 x float>
; CHECK-NEXT:    ret <8 x float> [[R]]
;
  %x0 = bitcast <4 x i32> %a0 to <4 x float>
  %x1 = bitcast <4 x i32> %a1 to <4 x float>
  %r = shufflevector <4 x float> %x0, <4 x float> %x1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  ret <8 x float> %r
}

; bitcasts (lower element count)

define <4 x double> @concat_bitcast_v8i16_v4f64(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: define <4 x double> @concat_bitcast_v8i16_v4f64(
; CHECK-SAME: <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[A0]], <8 x i16> [[A1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT:    [[R:%.*]] = bitcast <16 x i16> [[TMP1]] to <4 x double>
; CHECK-NEXT:    ret <4 x double> [[R]]
;
  %x0 = bitcast <8 x i16> %a0 to <2 x double>
  %x1 = bitcast <8 x i16> %a1 to <2 x double>
  %r = shufflevector <2 x double> %x0, <2 x double> %x1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  ret <4 x double> %r
}

; bitcasts (higher element count)

define <16 x i16> @concat_bitcast_v4i32_v16i16(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: define <16 x i16> @concat_bitcast_v4i32_v16i16(
; CHECK-SAME: <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[A0]], <4 x i32> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    [[R:%.*]] = bitcast <8 x i32> [[TMP1]] to <16 x i16>
; CHECK-NEXT:    ret <16 x i16> [[R]]
;
  %x0 = bitcast <4 x i32> %a0 to <8 x i16>
  %x1 = bitcast <4 x i32> %a1 to <8 x i16>
  %r = shufflevector <8 x i16> %x0, <8 x i16> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  ret <16 x i16> %r
}

; multiuse - ensure cost of any duplicated casts are worth it

define <8 x i16> @concat_trunc_v4i32_v8i16_multiuse(<4 x i32> %a0, <4 x i32> %a1, ptr %a2) {
; CHECK-LABEL: define <8 x i16> @concat_trunc_v4i32_v8i16_multiuse(
; CHECK-SAME: <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]], ptr [[A2:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[X0:%.*]] = trunc <4 x i32> [[A0]] to <4 x i16>
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[A0]], <4 x i32> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    [[R:%.*]] = trunc <8 x i32> [[TMP1]] to <8 x i16>
; CHECK-NEXT:    store <4 x i16> [[X0]], ptr [[A2]], align 8
; CHECK-NEXT:    ret <8 x i16> [[R]]
;
  %x0 = trunc <4 x i32> %a0 to <4 x i16>
  %x1 = trunc <4 x i32> %a1 to <4 x i16>
  %r = shufflevector <4 x i16> %x0, <4 x i16> %x1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  store <4 x i16> %x0, ptr %a2
  ret <8 x i16> %r
}

; negative - multiuse - ensure cost of any duplicated casts are worth it

define <16 x i8> @concat_trunc_v8i64_v16i8_multiuse(<8 x i64> %a0, <8 x i64> %a1, ptr %a2) {
; CHECK-LABEL: define <16 x i8> @concat_trunc_v8i64_v16i8_multiuse(
; CHECK-SAME: <8 x i64> [[A0:%.*]], <8 x i64> [[A1:%.*]], ptr [[A2:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[X0:%.*]] = trunc <8 x i64> [[A0]] to <8 x i8>
; CHECK-NEXT:    [[X1:%.*]] = trunc <8 x i64> [[A1]] to <8 x i8>
; CHECK-NEXT:    [[R:%.*]] = shufflevector <8 x i8> [[X0]], <8 x i8> [[X1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 4, i32 15>
; CHECK-NEXT:    store <8 x i8> [[X0]], ptr [[A2]], align 8
; CHECK-NEXT:    ret <16 x i8> [[R]]
;
  %x0 = trunc <8 x i64> %a0 to <8 x i8>
  %x1 = trunc <8 x i64> %a1 to <8 x i8>
  %r = shufflevector <8 x i8> %x0, <8 x i8> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 4, i32 15>
  store <8 x i8> %x0, ptr %a2
  ret <16 x i8> %r
}

; negative - bitcasts (unscalable higher element count)

define <16 x i16> @revpair_bitcast_v4i32_v16i16(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: define <16 x i16> @revpair_bitcast_v4i32_v16i16(
; CHECK-SAME: <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[X0:%.*]] = bitcast <4 x i32> [[A0]] to <8 x i16>
; CHECK-NEXT:    [[X1:%.*]] = bitcast <4 x i32> [[A1]] to <8 x i16>
; CHECK-NEXT:    [[R:%.*]] = shufflevector <8 x i16> [[X0]], <8 x i16> [[X1]], <16 x i32> <i32 1, i32 0, i32 3, i32 3, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
; CHECK-NEXT:    ret <16 x i16> [[R]]
;
  %x0 = bitcast <4 x i32> %a0 to <8 x i16>
  %x1 = bitcast <4 x i32> %a1 to <8 x i16>
  %r = shufflevector <8 x i16> %x0, <8 x i16> %x1, <16 x i32> <i32 1, i32 0, i32 3, i32 3, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
  ret <16 x i16> %r
}

; negative - bitcasts (unscalable element counts)

define <4 x i32> @shuffle_bitcast_v32i40_v4i32(<32 x i40> %a0, <32 x i40> %a1) {
; CHECK-LABEL: define <4 x i32> @shuffle_bitcast_v32i40_v4i32(
; CHECK-SAME: <32 x i40> [[A0:%.*]], <32 x i40> [[A1:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[X0:%.*]] = bitcast <32 x i40> [[A0]] to <40 x i32>
; CHECK-NEXT:    [[X1:%.*]] = bitcast <32 x i40> [[A1]] to <40 x i32>
; CHECK-NEXT:    [[R:%.*]] = shufflevector <40 x i32> [[X0]], <40 x i32> [[X1]], <4 x i32> <i32 0, i32 42, i32 poison, i32 poison>
; CHECK-NEXT:    ret <4 x i32> [[R]]
;
  %x0 = bitcast <32 x i40> %a0 to <40 x i32>
  %x1 = bitcast <32 x i40> %a1 to <40 x i32>
  %r = shufflevector <40 x i32> %x0, <40 x i32> %x1, <4 x i32> <i32 0, i32 42, i32 poison, i32 poison>
  ret <4 x i32> %r
}

; negative - src type mismatch

define <8 x i32> @concat_sext_v4i8_v4i16_v8i32(<4 x i8> %a0, <4 x i16> %a1) {
; CHECK-LABEL: define <8 x i32> @concat_sext_v4i8_v4i16_v8i32(
; CHECK-SAME: <4 x i8> [[A0:%.*]], <4 x i16> [[A1:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[X0:%.*]] = sext <4 x i8> [[A0]] to <4 x i32>
; CHECK-NEXT:    [[X1:%.*]] = sext <4 x i16> [[A1]] to <4 x i32>
; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[X0]], <4 x i32> [[X1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    ret <8 x i32> [[R]]
;
  %x0 = sext <4 x i8> %a0 to <4 x i32>
  %x1 = sext <4 x i16> %a1 to <4 x i32>
  %r = shufflevector <4 x i32> %x0, <4 x i32> %x1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  ret <8 x i32> %r
}

; negative - castop mismatch

define <16 x i32> @concat_sext_zext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: define <16 x i32> @concat_sext_zext_v8i16_v16i32(
; CHECK-SAME: <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[X0:%.*]] = sext <8 x i16> [[A0]] to <8 x i32>
; CHECK-NEXT:    [[X1:%.*]] = zext <8 x i16> [[A1]] to <8 x i32>
; CHECK-NEXT:    [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT:    ret <16 x i32> [[R]]
;
  %x0 = sext <8 x i16> %a0 to <8 x i32>
  %x1 = zext <8 x i16> %a1 to <8 x i32>
  %r = shufflevector <8 x i32> %x0, <8 x i32> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  ret <16 x i32> %r
}