llvm/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-LE-P8
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-LE-P9
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-BE-P8
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-BE-P9

; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P8
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P9
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P8
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P9

define void @test_none_v8i16(ptr %a0, ptr %a1, <16 x i8> %a, <8 x i16> %b, i8 %arg) {
; CHECK-LE-P8-LABEL: test_none_v8i16:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    addis r4, r2, .LCPI0_0@toc@ha
; CHECK-LE-P8-NEXT:    lhz r3, 0(r3)
; CHECK-LE-P8-NEXT:    addi r4, r4, .LCPI0_0@toc@l
; CHECK-LE-P8-NEXT:    mtvsrd v4, r3
; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-LE-P8-NEXT:    xxswapd vs0, v2
; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_none_v8i16:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lxsihzx f0, 0, r3
; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI0_0@toc@ha
; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI0_0@toc@l
; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-LE-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-LE-P9-NEXT:    stxv v2, 0(r3)
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_none_v8i16:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lhz r3, 0(r3)
; CHECK-BE-P8-NEXT:    mtvsrwz v3, r3
; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI0_0@toc@ha
; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI0_0@toc@l
; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
; CHECK-BE-P8-NEXT:    vperm v2, v2, v3, v4
; CHECK-BE-P8-NEXT:    stxvw4x v2, 0, r3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_none_v8i16:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lxsihzx f0, 0, r3
; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI0_0@toc@ha
; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI0_0@toc@l
; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-BE-P9-NEXT:    xxperm vs0, v2, vs1
; CHECK-BE-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_none_v8i16:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    lhz r3, 0(r3)
; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r3
; CHECK-AIX-64-P8-NEXT:    ld r3, L..C0(r2) # %const.0
; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v3, v4
; CHECK-AIX-64-P8-NEXT:    stxvw4x v2, 0, r3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_none_v8i16:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lxsihzx f0, 0, r3
; CHECK-AIX-64-P9-NEXT:    ld r3, L..C0(r2) # %const.0
; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-64-P9-NEXT:    xxperm vs0, v2, vs1
; CHECK-AIX-64-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_none_v8i16:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
; CHECK-AIX-32-P8-NEXT:    mtvsrwz v3, r3
; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C0(r2) # %const.0
; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_none_v8i16:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lxsihzx f0, 0, r3
; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C0(r2) # %const.0
; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-32-P9-NEXT:    xxperm vs0, v2, vs1
; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %load0.tmp = load <2 x i8>, ptr %a0
  %load0.tmp1 = bitcast <2 x i8> %load0.tmp to i16
  %load0 = insertelement <8 x i16> %b, i16 %load0.tmp1, i64 0
  %load1.tmp = insertelement <16 x i8> %a, i8 %arg, i32 0
  %load1 = bitcast <16 x i8> %load1.tmp to <8 x i16>
  %shuff = shufflevector <8 x i16> %load0, <8 x i16> %load1, <8 x i32> <i32 9, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  store <8 x i16> %shuff, ptr undef
  ret void
}

define void @test_v8i16_none(ptr %a0, ptr %a1, <16 x i8> %a, <8 x i16> %b, i8 %arg) {
; CHECK-LE-P8-LABEL: test_v8i16_none:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    addis r4, r2, .LCPI1_0@toc@ha
; CHECK-LE-P8-NEXT:    mtvsrd v4, r9
; CHECK-LE-P8-NEXT:    lhz r3, 0(r3)
; CHECK-LE-P8-NEXT:    addi r4, r4, .LCPI1_0@toc@l
; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
; CHECK-LE-P8-NEXT:    addis r4, r2, .LCPI1_1@toc@ha
; CHECK-LE-P8-NEXT:    addi r4, r4, .LCPI1_1@toc@l
; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
; CHECK-LE-P8-NEXT:    vperm v2, v2, v4, v3
; CHECK-LE-P8-NEXT:    mtvsrd v4, r3
; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
; CHECK-LE-P8-NEXT:    vperm v2, v2, v4, v3
; CHECK-LE-P8-NEXT:    xxswapd vs0, v2
; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v8i16_none:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lxsihzx f0, 0, r3
; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI1_0@toc@ha
; CHECK-LE-P9-NEXT:    mtvsrwz v3, r9
; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI1_0@toc@l
; CHECK-LE-P9-NEXT:    vinsertb v2, v3, 15
; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-LE-P9-NEXT:    xxperm vs0, v2, vs1
; CHECK-LE-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v8i16_none:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    addis r4, r2, .LCPI1_0@toc@ha
; CHECK-BE-P8-NEXT:    mtvsrwz v4, r9
; CHECK-BE-P8-NEXT:    lhz r3, 0(r3)
; CHECK-BE-P8-NEXT:    addi r4, r4, .LCPI1_0@toc@l
; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r4
; CHECK-BE-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-BE-P8-NEXT:    mtvsrwz v3, r3
; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI1_1@toc@ha
; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI1_1@toc@l
; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
; CHECK-BE-P8-NEXT:    vperm v2, v3, v2, v4
; CHECK-BE-P8-NEXT:    stxvw4x v2, 0, r3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v8i16_none:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lxsihzx f0, 0, r3
; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI1_0@toc@ha
; CHECK-BE-P9-NEXT:    mtvsrwz v3, r9
; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI1_0@toc@l
; CHECK-BE-P9-NEXT:    vinsertb v2, v3, 0
; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-BE-P9-NEXT:    stxv v2, 0(r3)
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v8i16_none:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    ld r4, L..C1(r2) # %const.0
; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r5
; CHECK-AIX-64-P8-NEXT:    lhz r3, 0(r3)
; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r4
; CHECK-AIX-64-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r3
; CHECK-AIX-64-P8-NEXT:    ld r3, L..C2(r2) # %const.1
; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
; CHECK-AIX-64-P8-NEXT:    vperm v2, v3, v2, v4
; CHECK-AIX-64-P8-NEXT:    stxvw4x v2, 0, r3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v8i16_none:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lxsihzx f0, 0, r3
; CHECK-AIX-64-P9-NEXT:    ld r3, L..C1(r2) # %const.0
; CHECK-AIX-64-P9-NEXT:    mtvsrwz v3, r5
; CHECK-AIX-64-P9-NEXT:    vinsertb v2, v3, 0
; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT:    stxv v2, 0(r3)
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v8i16_none:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lwz r4, L..C1(r2) # %const.0
; CHECK-AIX-32-P8-NEXT:    mtvsrwz v4, r5
; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r4
; CHECK-AIX-32-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-AIX-32-P8-NEXT:    mtvsrwz v3, r3
; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C2(r2) # %const.1
; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
; CHECK-AIX-32-P8-NEXT:    vperm v2, v3, v2, v4
; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v8i16_none:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lxsihzx f0, 0, r3
; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C1(r2) # %const.0
; CHECK-AIX-32-P9-NEXT:    mtvsrwz v3, r5
; CHECK-AIX-32-P9-NEXT:    vinsertb v2, v3, 0
; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-AIX-32-P9-NEXT:    stxv v2, 0(r3)
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %load0.tmp = load <2 x i8>, ptr %a0
  %load0.tmp1 = bitcast <2 x i8> %load0.tmp to i16
  %load0 = insertelement <8 x i16> %b, i16 %load0.tmp1, i64 0
  %load1.tmp = insertelement <16 x i8> %a, i8 %arg, i32 0
  %load1 = bitcast <16 x i8> %load1.tmp to <8 x i16>
  %shuff = shufflevector <8 x i16> %load0, <8 x i16> %load1, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
  store <8 x i16> %shuff, ptr undef
  ret void
}

define void @test_none_v4i32(ptr %ptr, ptr %ptr2, i8 %v3) local_unnamed_addr #0 {
; CHECK-LE-P8-LABEL: test_none_v4i32:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    addis r4, r2, .LCPI2_0@toc@ha
; CHECK-LE-P8-NEXT:    mtvsrd v3, r5
; CHECK-LE-P8-NEXT:    lxsiwzx v4, 0, r3
; CHECK-LE-P8-NEXT:    addi r4, r4, .LCPI2_0@toc@l
; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
; CHECK-LE-P8-NEXT:    addis r4, r2, .LCPI2_1@toc@ha
; CHECK-LE-P8-NEXT:    addi r4, r4, .LCPI2_1@toc@l
; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
; CHECK-LE-P8-NEXT:    vperm v2, v3, v3, v2
; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
; CHECK-LE-P8-NEXT:    vperm v2, v2, v4, v3
; CHECK-LE-P8-NEXT:    xxswapd vs0, v2
; CHECK-LE-P8-NEXT:    stfdx f0, 0, r3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_none_v4i32:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lxsiwzx v2, 0, r3
; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI2_0@toc@ha
; CHECK-LE-P9-NEXT:    mtvsrd v3, r5
; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI2_0@toc@l
; CHECK-LE-P9-NEXT:    lxv vs0, 0(r3)
; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI2_1@toc@ha
; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI2_1@toc@l
; CHECK-LE-P9-NEXT:    lxv v4, 0(r3)
; CHECK-LE-P9-NEXT:    xxperm v3, v3, vs0
; CHECK-LE-P9-NEXT:    vperm v2, v3, v2, v4
; CHECK-LE-P9-NEXT:    xxswapd vs0, v2
; CHECK-LE-P9-NEXT:    stfd f0, 0(r3)
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_none_v4i32:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    addis r4, r2, .LCPI2_0@toc@ha
; CHECK-BE-P8-NEXT:    mtvsrwz v2, r5
; CHECK-BE-P8-NEXT:    addi r4, r4, .LCPI2_0@toc@l
; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r4
; CHECK-BE-P8-NEXT:    vperm v2, v2, v2, v3
; CHECK-BE-P8-NEXT:    lxsiwzx v3, 0, r3
; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI2_1@toc@ha
; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI2_1@toc@l
; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
; CHECK-BE-P8-NEXT:    vperm v2, v3, v2, v4
; CHECK-BE-P8-NEXT:    stxsdx v2, 0, r3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_none_v4i32:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lxsiwzx v2, 0, r3
; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI2_0@toc@ha
; CHECK-BE-P9-NEXT:    mtvsrwz v3, r5
; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI2_0@toc@l
; CHECK-BE-P9-NEXT:    lxv vs0, 0(r3)
; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI2_1@toc@ha
; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI2_1@toc@l
; CHECK-BE-P9-NEXT:    lxv v4, 0(r3)
; CHECK-BE-P9-NEXT:    xxperm v3, v3, vs0
; CHECK-BE-P9-NEXT:    vperm v2, v2, v3, v4
; CHECK-BE-P9-NEXT:    stxsd v2, 0(r3)
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_none_v4i32:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    ld r4, L..C3(r2) # %const.0
; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r5
; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r4
; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v2, v3
; CHECK-AIX-64-P8-NEXT:    lxsiwzx v3, 0, r3
; CHECK-AIX-64-P8-NEXT:    ld r3, L..C4(r2) # %const.1
; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
; CHECK-AIX-64-P8-NEXT:    vperm v2, v3, v2, v4
; CHECK-AIX-64-P8-NEXT:    stxsdx v2, 0, r3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_none_v4i32:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lxsiwzx v2, 0, r3
; CHECK-AIX-64-P9-NEXT:    ld r3, L..C2(r2) # %const.0
; CHECK-AIX-64-P9-NEXT:    mtvsrwz v3, r5
; CHECK-AIX-64-P9-NEXT:    lxv vs0, 0(r3)
; CHECK-AIX-64-P9-NEXT:    ld r3, L..C3(r2) # %const.1
; CHECK-AIX-64-P9-NEXT:    lxv v4, 0(r3)
; CHECK-AIX-64-P9-NEXT:    xxperm v3, v3, vs0
; CHECK-AIX-64-P9-NEXT:    vperm v2, v2, v3, v4
; CHECK-AIX-64-P9-NEXT:    stxsd v2, 0(r3)
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_none_v4i32:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lxsiwzx v2, 0, r3
; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
; CHECK-AIX-32-P8-NEXT:    stb r5, -32(r1)
; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C3(r2) # %const.0
; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
; CHECK-AIX-32-P8-NEXT:    vmrghh v3, v3, v3
; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r3
; CHECK-AIX-32-P8-NEXT:    lwz r3, -12(r1)
; CHECK-AIX-32-P8-NEXT:    stw r3, 0(r3)
; CHECK-AIX-32-P8-NEXT:    lwz r3, -16(r1)
; CHECK-AIX-32-P8-NEXT:    stw r3, 0(r3)
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_none_v4i32:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r3
; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C2(r2) # %const.0
; CHECK-AIX-32-P9-NEXT:    stb r5, -32(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v3, -32(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v4, 0(r3)
; CHECK-AIX-32-P9-NEXT:    vmrghh v3, v3, v3
; CHECK-AIX-32-P9-NEXT:    vperm v2, v2, v3, v4
; CHECK-AIX-32-P9-NEXT:    stxv v2, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lwz r3, -12(r1)
; CHECK-AIX-32-P9-NEXT:    stw r3, 0(r3)
; CHECK-AIX-32-P9-NEXT:    lwz r3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    stw r3, 0(r3)
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <2 x i16>, ptr %ptr, align 4
  %tmp = insertelement <4 x i8> undef, i8 %v3, i32 0
  %tmp0 = bitcast <4 x i8> %tmp to <2 x i16>
  %1 = shufflevector <2 x i16> %0, <2 x i16> %tmp0, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
  store <4 x i16> %1, ptr undef, align 4
  ret void
}

define void @test_v4i32_none(ptr nocapture readonly %ptr1, ptr nocapture readonly %ptr2) {
; CHECK-LE-P8-LABEL: test_v4i32_none:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    addis r4, r2, .LCPI3_0@toc@ha
; CHECK-LE-P8-NEXT:    lxsiwzx v3, 0, r3
; CHECK-LE-P8-NEXT:    xxlxor v4, v4, v4
; CHECK-LE-P8-NEXT:    addi r4, r4, .LCPI3_0@toc@l
; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
; CHECK-LE-P8-NEXT:    vperm v2, v4, v3, v2
; CHECK-LE-P8-NEXT:    xxswapd vs0, v2
; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v4i32_none:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lfiwzx f0, 0, r3
; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
; CHECK-LE-P9-NEXT:    xxlxor vs2, vs2, vs2
; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI3_0@toc@l
; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-LE-P9-NEXT:    xxperm vs0, vs2, vs1
; CHECK-LE-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v4i32_none:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lxsiwzx v2, 0, r3
; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
; CHECK-BE-P8-NEXT:    xxlxor v4, v4, v4
; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI3_0@toc@l
; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-BE-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-BE-P8-NEXT:    stxvw4x v2, 0, r3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v4i32_none:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lfiwzx f0, 0, r3
; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
; CHECK-BE-P9-NEXT:    xxlxor vs2, vs2, vs2
; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI3_0@toc@l
; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-BE-P9-NEXT:    xxperm vs0, vs2, vs1
; CHECK-BE-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v4i32_none:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    lxsiwzx v2, 0, r3
; CHECK-AIX-64-P8-NEXT:    ld r3, L..C5(r2) # %const.0
; CHECK-AIX-64-P8-NEXT:    xxlxor v4, v4, v4
; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-AIX-64-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-AIX-64-P8-NEXT:    stxvw4x v2, 0, r3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v4i32_none:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lfiwzx f0, 0, r3
; CHECK-AIX-64-P9-NEXT:    ld r3, L..C4(r2) # %const.0
; CHECK-AIX-64-P9-NEXT:    xxlxor vs2, vs2, vs2
; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-64-P9-NEXT:    xxperm vs0, vs2, vs1
; CHECK-AIX-64-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v4i32_none:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lxsiwzx v2, 0, r3
; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C4(r2) # %const.0
; CHECK-AIX-32-P8-NEXT:    xxlxor v4, v4, v4
; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-AIX-32-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v4i32_none:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r3
; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C3(r2) # %const.0
; CHECK-AIX-32-P9-NEXT:    xxlxor vs2, vs2, vs2
; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-32-P9-NEXT:    xxperm vs0, vs2, vs1
; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <2 x i16>, ptr %ptr1, align 1
  %1 = load <2 x i16>, ptr %ptr2, align 1
  %shuffle1 = shufflevector <2 x i16> %0, <2 x i16> %1, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
  %2 = zext <4 x i16> %shuffle1 to <4 x i32>
  store <4 x i32> %2, ptr undef, align 16
  ret void
}

define void @test_none_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonly %ptr2) {
; CHECK-LE-P8-LABEL: test_none_v2i64:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI4_0@toc@ha
; CHECK-LE-P8-NEXT:    lxsdx v3, 0, r3
; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI4_1@toc@ha
; CHECK-LE-P8-NEXT:    lxvd2x v4, 0, r4
; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI4_0@toc@l
; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI4_1@toc@l
; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r5
; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT:    vperm v2, v3, v4, v2
; CHECK-LE-P8-NEXT:    xxlxor v4, v4, v4
; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-LE-P8-NEXT:    xxswapd vs0, v2
; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_none_v2i64:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lfd f0, 0(r3)
; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
; CHECK-LE-P9-NEXT:    lxv v2, 0(r4)
; CHECK-LE-P9-NEXT:    xxlxor v4, v4, v4
; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI4_0@toc@l
; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI4_1@toc@ha
; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI4_1@toc@l
; CHECK-LE-P9-NEXT:    lxv v3, 0(r3)
; CHECK-LE-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-LE-P9-NEXT:    vperm v2, v4, v2, v3
; CHECK-LE-P9-NEXT:    stxv v2, 0(r3)
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_none_v2i64:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lxsdx v2, 0, r3
; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r4
; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI4_0@toc@l
; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
; CHECK-BE-P8-NEXT:    vperm v2, v3, v2, v4
; CHECK-BE-P8-NEXT:    xxlxor v3, v3, v3
; CHECK-BE-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-BE-P8-NEXT:    stxvw4x v2, 0, r3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_none_v2i64:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lxsd v2, 0(r3)
; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
; CHECK-BE-P9-NEXT:    lxv vs0, 0(r4)
; CHECK-BE-P9-NEXT:    xxlxor v3, v3, v3
; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI4_0@toc@l
; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-BE-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-BE-P9-NEXT:    stxv v2, 0(r3)
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_none_v2i64:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    lxsdx v2, 0, r3
; CHECK-AIX-64-P8-NEXT:    ld r3, L..C6(r2) # %const.0
; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r4
; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
; CHECK-AIX-64-P8-NEXT:    vperm v2, v3, v2, v4
; CHECK-AIX-64-P8-NEXT:    xxlxor v3, v3, v3
; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-64-P8-NEXT:    stxvw4x v2, 0, r3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_none_v2i64:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lxsd v2, 0(r3)
; CHECK-AIX-64-P9-NEXT:    ld r3, L..C5(r2) # %const.0
; CHECK-AIX-64-P9-NEXT:    lxv vs0, 0(r4)
; CHECK-AIX-64-P9-NEXT:    xxlxor v3, v3, v3
; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-64-P9-NEXT:    stxv v2, 0(r3)
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_none_v2i64:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lxsiwzx v2, 0, r3
; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C5(r2) # %const.0
; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r4
; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
; CHECK-AIX-32-P8-NEXT:    vperm v2, v3, v2, v4
; CHECK-AIX-32-P8-NEXT:    xxlxor v3, v3, v3
; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_none_v2i64:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r3
; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C4(r2) # %const.0
; CHECK-AIX-32-P9-NEXT:    lxv vs0, 0(r4)
; CHECK-AIX-32-P9-NEXT:    xxlxor v3, v3, v3
; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-32-P9-NEXT:    stxv v2, 0(r3)
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <4 x i16>, ptr %ptr1, align 1
  %1 = load <4 x i32>, ptr %ptr2, align 1
  %bc = trunc <4 x i32> %1 to <4 x i16>
  %shuffle1 = shufflevector <4 x i16> %0, <4 x i16> %bc, <4 x i32> <i32 4, i32 5, i32 1, i32 0>
  %2 = zext <4 x i16> %shuffle1 to <4 x i32>
  store <4 x i32> %2, ptr undef, align 16
  ret void
}

define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
; CHECK-LE-P8-LABEL: test_v2i64_none:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    addis r4, r2, .LCPI5_0@toc@ha
; CHECK-LE-P8-NEXT:    lxsdx v3, 0, r3
; CHECK-LE-P8-NEXT:    xxlxor v4, v4, v4
; CHECK-LE-P8-NEXT:    addi r4, r4, .LCPI5_0@toc@l
; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
; CHECK-LE-P8-NEXT:    vperm v2, v4, v3, v2
; CHECK-LE-P8-NEXT:    xxswapd vs0, v2
; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v2i64_none:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lfd f0, 0(r3)
; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
; CHECK-LE-P9-NEXT:    xxlxor vs2, vs2, vs2
; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI5_0@toc@l
; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-LE-P9-NEXT:    xxperm vs0, vs2, vs1
; CHECK-LE-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v2i64_none:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lxsdx v2, 0, r3
; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
; CHECK-BE-P8-NEXT:    xxlxor v4, v4, v4
; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI5_0@toc@l
; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-BE-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-BE-P8-NEXT:    stxvw4x v2, 0, r3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v2i64_none:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lfd f0, 0(r3)
; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
; CHECK-BE-P9-NEXT:    xxlxor vs2, vs2, vs2
; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI5_0@toc@l
; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-BE-P9-NEXT:    xxperm vs0, vs2, vs1
; CHECK-BE-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v2i64_none:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    lxsdx v2, 0, r3
; CHECK-AIX-64-P8-NEXT:    ld r3, L..C7(r2) # %const.0
; CHECK-AIX-64-P8-NEXT:    xxlxor v4, v4, v4
; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-AIX-64-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-AIX-64-P8-NEXT:    stxvw4x v2, 0, r3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v2i64_none:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lfd f0, 0(r3)
; CHECK-AIX-64-P9-NEXT:    ld r3, L..C6(r2) # %const.0
; CHECK-AIX-64-P9-NEXT:    xxlxor vs2, vs2, vs2
; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-64-P9-NEXT:    xxperm vs0, vs2, vs1
; CHECK-AIX-64-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v2i64_none:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    li r4, 4
; CHECK-AIX-32-P8-NEXT:    lfiwzx f1, 0, r3
; CHECK-AIX-32-P8-NEXT:    xxlxor v4, v4, v4
; CHECK-AIX-32-P8-NEXT:    lfiwzx f0, r3, r4
; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C6(r2) # %const.0
; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-AIX-32-P8-NEXT:    xxspltw vs1, vs1, 1
; CHECK-AIX-32-P8-NEXT:    xxspltw vs0, vs0, 1
; CHECK-AIX-32-P8-NEXT:    xxmrghw v2, vs1, vs0
; CHECK-AIX-32-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v2i64_none:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    li r4, 4
; CHECK-AIX-32-P9-NEXT:    lxvwsx vs1, 0, r3
; CHECK-AIX-32-P9-NEXT:    xxlxor vs2, vs2, vs2
; CHECK-AIX-32-P9-NEXT:    lxvwsx vs0, r3, r4
; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C5(r2) # %const.0
; CHECK-AIX-32-P9-NEXT:    xxmrghw vs0, vs1, vs0
; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-32-P9-NEXT:    xxperm vs0, vs2, vs1
; CHECK-AIX-32-P9-NEXT:    stxv vs0, 0(r3)
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <4 x i16>, ptr %ptr1, align 1
  %shuffle1 = shufflevector <4 x i16> %0, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
  %1 = zext <4 x i16> %shuffle1 to <4 x i32>
  store <4 x i32> %1, ptr undef, align 16
  ret void
}

define <16 x i8> @test_v8i16_v8i16(ptr %a, ptr %b) {
; CHECK-LE-P8-LABEL: test_v8i16_v8i16:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI6_0@toc@ha
; CHECK-LE-P8-NEXT:    lhz r3, 0(r3)
; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI6_0@toc@l
; CHECK-LE-P8-NEXT:    mtvsrd v3, r3
; CHECK-LE-P8-NEXT:    lhz r3, 0(r4)
; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r5
; CHECK-LE-P8-NEXT:    mtvsrd v4, r3
; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
; CHECK-LE-P8-NEXT:    vperm v2, v4, v3, v2
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v8i16_v8i16:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lxsihzx v2, 0, r3
; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI6_0@toc@ha
; CHECK-LE-P9-NEXT:    lxsihzx f0, 0, r4
; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI6_0@toc@l
; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-LE-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v8i16_v8i16:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lhz r3, 0(r3)
; CHECK-BE-P8-NEXT:    mtvsrwz v2, r3
; CHECK-BE-P8-NEXT:    lhz r3, 0(r4)
; CHECK-BE-P8-NEXT:    mtvsrwz v3, r3
; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI6_0@toc@ha
; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI6_0@toc@l
; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
; CHECK-BE-P8-NEXT:    vperm v2, v2, v3, v4
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v8i16_v8i16:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lxsihzx f0, 0, r3
; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI6_0@toc@ha
; CHECK-BE-P9-NEXT:    lxsihzx v2, 0, r4
; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI6_0@toc@l
; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v8i16_v8i16:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    lhz r3, 0(r3)
; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r3
; CHECK-AIX-64-P8-NEXT:    lhz r3, 0(r4)
; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r3
; CHECK-AIX-64-P8-NEXT:    ld r3, L..C8(r2) # %const.0
; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v3, v4
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lxsihzx f0, 0, r3
; CHECK-AIX-64-P9-NEXT:    ld r3, L..C7(r2) # %const.0
; CHECK-AIX-64-P9-NEXT:    lxsihzx v2, 0, r4
; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v8i16_v8i16:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
; CHECK-AIX-32-P8-NEXT:    mtvsrwz v2, r3
; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r4)
; CHECK-AIX-32-P8-NEXT:    mtvsrwz v3, r3
; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C7(r2) # %const.0
; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v8i16_v8i16:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lxsihzx f0, 0, r3
; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C6(r2) # %const.0
; CHECK-AIX-32-P9-NEXT:    lxsihzx v2, 0, r4
; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %load1 = load <2 x i8>, ptr %a
  %load2 = load <2 x i8>, ptr %b
  %shuffle1 = shufflevector <2 x i8> %load1, <2 x i8> %load2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
  %shuffle2 = shufflevector <8 x i8> %shuffle1, <8 x i8> %shuffle1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  ret <16 x i8> %shuffle2
}

define <16 x i8> @test_v8i16_v4i32(ptr %a, ptr %b) local_unnamed_addr {
; CHECK-LE-P8-LABEL: test_v8i16_v4i32:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    lhz r3, 0(r3)
; CHECK-LE-P8-NEXT:    mtfprd f0, r3
; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
; CHECK-LE-P8-NEXT:    lfiwzx f0, 0, r4
; CHECK-LE-P8-NEXT:    xxswapd v3, f0
; CHECK-LE-P8-NEXT:    vmrglh v2, v3, v2
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v8i16_v4i32:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lxsihzx v2, 0, r3
; CHECK-LE-P9-NEXT:    lfiwzx f0, 0, r4
; CHECK-LE-P9-NEXT:    xxswapd v3, f0
; CHECK-LE-P9-NEXT:    vsplth v2, v2, 3
; CHECK-LE-P9-NEXT:    vmrglh v2, v3, v2
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v8i16_v4i32:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lhz r3, 0(r3)
; CHECK-BE-P8-NEXT:    lfiwzx f0, 0, r4
; CHECK-BE-P8-NEXT:    sldi r3, r3, 48
; CHECK-BE-P8-NEXT:    mtvsrd v3, r3
; CHECK-BE-P8-NEXT:    xxsldwi v2, f0, f0, 1
; CHECK-BE-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v8i16_v4i32:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lxsihzx v2, 0, r3
; CHECK-BE-P9-NEXT:    lfiwzx f0, 0, r4
; CHECK-BE-P9-NEXT:    xxsldwi v3, f0, f0, 1
; CHECK-BE-P9-NEXT:    vsplth v2, v2, 3
; CHECK-BE-P9-NEXT:    vmrghh v2, v2, v3
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    lhz r3, 0(r3)
; CHECK-AIX-64-P8-NEXT:    lfiwzx f0, 0, r4
; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 48
; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r3
; CHECK-AIX-64-P8-NEXT:    xxsldwi v2, f0, f0, 1
; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v8i16_v4i32:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lxsihzx v2, 0, r3
; CHECK-AIX-64-P9-NEXT:    lfiwzx f0, 0, r4
; CHECK-AIX-64-P9-NEXT:    xxsldwi v3, f0, f0, 1
; CHECK-AIX-64-P9-NEXT:    vsplth v2, v2, 3
; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
; CHECK-AIX-32-P8-NEXT:    lfiwzx f0, 0, r4
; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-AIX-32-P8-NEXT:    xxspltw v2, vs0, 1
; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v8i16_v4i32:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lhz r3, 0(r3)
; CHECK-AIX-32-P9-NEXT:    sth r3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v2, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lxvwsx v3, 0, r4
; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <2 x i8>, ptr %a
  %bc1 = bitcast <2 x i8> %0 to i16
  %vecinit3 = insertelement <8 x i16> poison, i16 %bc1, i64 0
  %1 = load <2 x i8>, ptr %b, align 4
  %bc2 = bitcast <2 x i8> %1 to i16
  %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0
  %2 = bitcast <8 x i16> %vecinit3 to <16 x i8>
  %3 = bitcast <8 x i16> %vecinit6 to <16 x i8>
  %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ret <16 x i8> %shuffle
}

define <16 x i8> @test_v8i16_v2i64(ptr %a, ptr %b) local_unnamed_addr {
; CHECK-LE-P8-LABEL: test_v8i16_v2i64:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    lhz r3, 0(r3)
; CHECK-LE-P8-NEXT:    mtfprd f0, r3
; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
; CHECK-LE-P8-NEXT:    lfdx f0, 0, r4
; CHECK-LE-P8-NEXT:    xxswapd v3, f0
; CHECK-LE-P8-NEXT:    vmrglh v2, v3, v2
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v8i16_v2i64:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lxsihzx v2, 0, r3
; CHECK-LE-P9-NEXT:    lfd f0, 0(r4)
; CHECK-LE-P9-NEXT:    xxswapd v3, f0
; CHECK-LE-P9-NEXT:    vsplth v2, v2, 3
; CHECK-LE-P9-NEXT:    vmrglh v2, v3, v2
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v8i16_v2i64:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lhz r3, 0(r3)
; CHECK-BE-P8-NEXT:    lxsdx v3, 0, r4
; CHECK-BE-P8-NEXT:    sldi r3, r3, 48
; CHECK-BE-P8-NEXT:    mtvsrd v2, r3
; CHECK-BE-P8-NEXT:    vmrghh v2, v2, v3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v8i16_v2i64:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lxsihzx v2, 0, r3
; CHECK-BE-P9-NEXT:    lxsd v3, 0(r4)
; CHECK-BE-P9-NEXT:    vsplth v2, v2, 3
; CHECK-BE-P9-NEXT:    vmrghh v2, v2, v3
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v8i16_v2i64:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    lhz r3, 0(r3)
; CHECK-AIX-64-P8-NEXT:    lxsdx v3, 0, r4
; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 48
; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r3
; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v8i16_v2i64:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lxsihzx v2, 0, r3
; CHECK-AIX-64-P9-NEXT:    lxsd v3, 0(r4)
; CHECK-AIX-64-P9-NEXT:    vsplth v2, v2, 3
; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
; CHECK-AIX-32-P8-NEXT:    lfiwzx f0, 0, r4
; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-AIX-32-P8-NEXT:    xxspltw v2, vs0, 1
; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v8i16_v2i64:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lhz r3, 0(r3)
; CHECK-AIX-32-P9-NEXT:    sth r3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v2, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lxvwsx v3, 0, r4
; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <2 x i8>, ptr %a
  %bc1 = bitcast <2 x i8> %0 to i16
  %vecinit3 = insertelement <8 x i16> poison, i16 %bc1, i64 0
  %1 = load <2 x i8>, ptr %b, align 8
  %bc2 = bitcast <2 x i8> %1 to i16
  %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0
  %2 = bitcast <8 x i16> %vecinit3 to <16 x i8>
  %3 = bitcast <8 x i16> %vecinit6 to <16 x i8>
  %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ret <16 x i8> %shuffle
}

define void @test_v4i32_v4i32(ptr nocapture readonly %ptr1, ptr nocapture readonly %ptr2) {
; CHECK-LE-P8-LABEL: test_v4i32_v4i32:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI9_0@toc@ha
; CHECK-LE-P8-NEXT:    lxsiwzx v3, 0, r3
; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI9_1@toc@ha
; CHECK-LE-P8-NEXT:    lxsiwzx v4, 0, r4
; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI9_0@toc@l
; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI9_1@toc@l
; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r5
; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT:    vperm v2, v3, v4, v2
; CHECK-LE-P8-NEXT:    xxlxor v4, v4, v4
; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-LE-P8-NEXT:    xxswapd vs0, v2
; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v4i32_v4i32:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lfiwzx f0, 0, r3
; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI9_0@toc@ha
; CHECK-LE-P9-NEXT:    lxsiwzx v2, 0, r4
; CHECK-LE-P9-NEXT:    xxlxor v4, v4, v4
; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI9_0@toc@l
; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI9_1@toc@ha
; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI9_1@toc@l
; CHECK-LE-P9-NEXT:    lxv v3, 0(r3)
; CHECK-LE-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-LE-P9-NEXT:    vperm v2, v4, v2, v3
; CHECK-LE-P9-NEXT:    stxv v2, 0(r3)
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v4i32_v4i32:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lxsiwzx v2, 0, r3
; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI9_0@toc@ha
; CHECK-BE-P8-NEXT:    lxsiwzx v3, 0, r4
; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI9_0@toc@l
; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
; CHECK-BE-P8-NEXT:    vperm v2, v3, v2, v4
; CHECK-BE-P8-NEXT:    xxlxor v3, v3, v3
; CHECK-BE-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-BE-P8-NEXT:    stxvw4x v2, 0, r3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v4i32_v4i32:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lxsiwzx v2, 0, r3
; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI9_0@toc@ha
; CHECK-BE-P9-NEXT:    lfiwzx f0, 0, r4
; CHECK-BE-P9-NEXT:    xxlxor v3, v3, v3
; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI9_0@toc@l
; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-BE-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-BE-P9-NEXT:    stxv v2, 0(r3)
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v4i32_v4i32:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    lxsiwzx v2, 0, r3
; CHECK-AIX-64-P8-NEXT:    ld r3, L..C9(r2) # %const.0
; CHECK-AIX-64-P8-NEXT:    lxsiwzx v3, 0, r4
; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
; CHECK-AIX-64-P8-NEXT:    vperm v2, v3, v2, v4
; CHECK-AIX-64-P8-NEXT:    xxlxor v3, v3, v3
; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-64-P8-NEXT:    stxvw4x v2, 0, r3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v4i32_v4i32:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lxsiwzx v2, 0, r3
; CHECK-AIX-64-P9-NEXT:    ld r3, L..C8(r2) # %const.0
; CHECK-AIX-64-P9-NEXT:    lfiwzx f0, 0, r4
; CHECK-AIX-64-P9-NEXT:    xxlxor v3, v3, v3
; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-64-P9-NEXT:    stxv v2, 0(r3)
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lxsiwzx v2, 0, r3
; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C8(r2) # %const.0
; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
; CHECK-AIX-32-P8-NEXT:    vperm v2, v3, v2, v4
; CHECK-AIX-32-P8-NEXT:    xxlxor v3, v3, v3
; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v4i32_v4i32:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r3
; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C7(r2) # %const.0
; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r4
; CHECK-AIX-32-P9-NEXT:    xxlxor v3, v3, v3
; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-32-P9-NEXT:    stxv v2, 0(r3)
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <2 x i16>, ptr %ptr1, align 1
  %1 = load <2 x i16>, ptr %ptr2, align 1
  %shuffle1 = shufflevector <2 x i16> %0, <2 x i16> %1, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
  %2 = zext <4 x i16> %shuffle1 to <4 x i32>
  store <4 x i32> %2, ptr undef, align 16
  ret void
}

define <16 x i8> @test_v4i32_v8i16(ptr %a, ptr %b) local_unnamed_addr {
; CHECK-LE-P8-LABEL: test_v4i32_v8i16:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    lhz r3, 0(r3)
; CHECK-LE-P8-NEXT:    mtfprd f0, r3
; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
; CHECK-LE-P8-NEXT:    lfiwzx f0, 0, r4
; CHECK-LE-P8-NEXT:    xxswapd v3, f0
; CHECK-LE-P8-NEXT:    vmrglh v2, v2, v3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v4i32_v8i16:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lxsihzx v2, 0, r3
; CHECK-LE-P9-NEXT:    lfiwzx f0, 0, r4
; CHECK-LE-P9-NEXT:    xxswapd v3, f0
; CHECK-LE-P9-NEXT:    vsplth v2, v2, 3
; CHECK-LE-P9-NEXT:    vmrglh v2, v2, v3
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v4i32_v8i16:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lhz r3, 0(r3)
; CHECK-BE-P8-NEXT:    lfiwzx f0, 0, r4
; CHECK-BE-P8-NEXT:    sldi r3, r3, 48
; CHECK-BE-P8-NEXT:    mtvsrd v3, r3
; CHECK-BE-P8-NEXT:    xxsldwi v2, f0, f0, 1
; CHECK-BE-P8-NEXT:    vmrghh v2, v2, v3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v4i32_v8i16:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lxsihzx v2, 0, r3
; CHECK-BE-P9-NEXT:    lfiwzx f0, 0, r4
; CHECK-BE-P9-NEXT:    xxsldwi v3, f0, f0, 1
; CHECK-BE-P9-NEXT:    vsplth v2, v2, 3
; CHECK-BE-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    lhz r3, 0(r3)
; CHECK-AIX-64-P8-NEXT:    lfiwzx f0, 0, r4
; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 48
; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r3
; CHECK-AIX-64-P8-NEXT:    xxsldwi v2, f0, f0, 1
; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v4i32_v8i16:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lxsihzx v2, 0, r3
; CHECK-AIX-64-P9-NEXT:    lfiwzx f0, 0, r4
; CHECK-AIX-64-P9-NEXT:    xxsldwi v3, f0, f0, 1
; CHECK-AIX-64-P9-NEXT:    vsplth v2, v2, 3
; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
; CHECK-AIX-32-P8-NEXT:    lfiwzx f0, 0, r4
; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-AIX-32-P8-NEXT:    xxspltw v2, vs0, 1
; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v4i32_v8i16:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lhz r3, 0(r3)
; CHECK-AIX-32-P9-NEXT:    sth r3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v2, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lxvwsx v3, 0, r4
; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <2 x i8>, ptr %a
  %bc1 = bitcast <2 x i8> %0 to i16
  %vecinit3 = insertelement <8 x i16> poison, i16 %bc1, i64 0
  %1 = load <2 x i8>, ptr %b, align 4
  %bc2 = bitcast <2 x i8> %1 to i16
  %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0
  %2 = bitcast <8 x i16> %vecinit3 to <16 x i8>
  %3 = bitcast <8 x i16> %vecinit6 to <16 x i8>
  %shuffle = shufflevector <16 x i8> %3, <16 x i8> %2, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ret <16 x i8> %shuffle
}

define <16 x i8> @test_v4i32_v2i64(ptr %a, ptr %b) local_unnamed_addr {
; CHECK-LE-P8-LABEL: test_v4i32_v2i64:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    lfiwzx f0, 0, r3
; CHECK-LE-P8-NEXT:    xxswapd v2, f0
; CHECK-LE-P8-NEXT:    lfdx f0, 0, r4
; CHECK-LE-P8-NEXT:    xxswapd v3, f0
; CHECK-LE-P8-NEXT:    vmrglh v2, v3, v2
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v4i32_v2i64:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lfiwzx f0, 0, r3
; CHECK-LE-P9-NEXT:    xxswapd v2, f0
; CHECK-LE-P9-NEXT:    lfd f0, 0(r4)
; CHECK-LE-P9-NEXT:    xxswapd v3, f0
; CHECK-LE-P9-NEXT:    vmrglh v2, v3, v2
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v4i32_v2i64:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lfiwzx f0, 0, r3
; CHECK-BE-P8-NEXT:    lxsdx v3, 0, r4
; CHECK-BE-P8-NEXT:    xxsldwi v2, f0, f0, 1
; CHECK-BE-P8-NEXT:    vmrghh v2, v2, v3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v4i32_v2i64:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lfiwzx f0, 0, r3
; CHECK-BE-P9-NEXT:    lxsd v3, 0(r4)
; CHECK-BE-P9-NEXT:    xxsldwi v2, f0, f0, 1
; CHECK-BE-P9-NEXT:    vmrghh v2, v2, v3
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v4i32_v2i64:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    lfiwzx f0, 0, r3
; CHECK-AIX-64-P8-NEXT:    lxsdx v3, 0, r4
; CHECK-AIX-64-P8-NEXT:    xxsldwi v2, f0, f0, 1
; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v4i32_v2i64:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lfiwzx f0, 0, r3
; CHECK-AIX-64-P9-NEXT:    lxsd v3, 0(r4)
; CHECK-AIX-64-P9-NEXT:    xxsldwi v2, f0, f0, 1
; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lxsiwzx v2, 0, r3
; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C9(r2) # %const.0
; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r3
; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C8(r2) # %const.0
; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r4
; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <2 x i8>, ptr %a, align 4
  %bc1 = bitcast <2 x i8> %0 to i16
  %vecinit3 = insertelement <8 x i16> poison, i16 %bc1, i64 0
  %1 = load <2 x i8>, ptr %b, align 8
  %bc2 = bitcast <2 x i8> %1 to i16
  %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0
  %2 = bitcast <8 x i16> %vecinit3 to <16 x i8>
  %3 = bitcast <8 x i16> %vecinit6 to <16 x i8>
  %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ret <16 x i8> %shuffle
}

define void @test_v2i64_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonly %ptr2) {
; CHECK-LE-P8-LABEL: test_v2i64_v2i64:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI12_0@toc@ha
; CHECK-LE-P8-NEXT:    lxsdx v3, 0, r3
; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI12_1@toc@ha
; CHECK-LE-P8-NEXT:    lxsdx v4, 0, r4
; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI12_0@toc@l
; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI12_1@toc@l
; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r5
; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT:    vperm v2, v3, v4, v2
; CHECK-LE-P8-NEXT:    xxlxor v4, v4, v4
; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-LE-P8-NEXT:    xxswapd vs0, v2
; CHECK-LE-P8-NEXT:    stxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v2i64_v2i64:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lfd f0, 0(r3)
; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI12_0@toc@ha
; CHECK-LE-P9-NEXT:    lxsd v2, 0(r4)
; CHECK-LE-P9-NEXT:    xxlxor v4, v4, v4
; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI12_0@toc@l
; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI12_1@toc@ha
; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI12_1@toc@l
; CHECK-LE-P9-NEXT:    lxv v3, 0(r3)
; CHECK-LE-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-LE-P9-NEXT:    vperm v2, v4, v2, v3
; CHECK-LE-P9-NEXT:    stxv v2, 0(r3)
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v2i64_v2i64:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lxsdx v2, 0, r3
; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI12_0@toc@ha
; CHECK-BE-P8-NEXT:    lxsdx v3, 0, r4
; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI12_0@toc@l
; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
; CHECK-BE-P8-NEXT:    vperm v2, v3, v2, v4
; CHECK-BE-P8-NEXT:    xxlxor v3, v3, v3
; CHECK-BE-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-BE-P8-NEXT:    stxvw4x v2, 0, r3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v2i64_v2i64:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lxsd v2, 0(r3)
; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI12_0@toc@ha
; CHECK-BE-P9-NEXT:    lfd f0, 0(r4)
; CHECK-BE-P9-NEXT:    xxlxor v3, v3, v3
; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI12_0@toc@l
; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-BE-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-BE-P9-NEXT:    stxv v2, 0(r3)
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v2i64_v2i64:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    lxsdx v2, 0, r3
; CHECK-AIX-64-P8-NEXT:    ld r3, L..C10(r2) # %const.0
; CHECK-AIX-64-P8-NEXT:    lxsdx v3, 0, r4
; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r3
; CHECK-AIX-64-P8-NEXT:    vperm v2, v3, v2, v4
; CHECK-AIX-64-P8-NEXT:    xxlxor v3, v3, v3
; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-64-P8-NEXT:    stxvw4x v2, 0, r3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v2i64_v2i64:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lxsd v2, 0(r3)
; CHECK-AIX-64-P9-NEXT:    ld r3, L..C9(r2) # %const.0
; CHECK-AIX-64-P9-NEXT:    lfd f0, 0(r4)
; CHECK-AIX-64-P9-NEXT:    xxlxor v3, v3, v3
; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-64-P9-NEXT:    stxv v2, 0(r3)
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lxsiwzx v2, 0, r3
; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C10(r2) # %const.0
; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
; CHECK-AIX-32-P8-NEXT:    vperm v2, v3, v2, v4
; CHECK-AIX-32-P8-NEXT:    xxlxor v3, v3, v3
; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-32-P8-NEXT:    stxvw4x v2, 0, r3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r3
; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C9(r2) # %const.0
; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r4
; CHECK-AIX-32-P9-NEXT:    xxlxor v3, v3, v3
; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-32-P9-NEXT:    stxv v2, 0(r3)
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <4 x i16>, ptr %ptr1, align 1
  %1 = load <4 x i16>, ptr %ptr2, align 1
  %shuffle1 = shufflevector <4 x i16> %0, <4 x i16> %1, <4 x i32> <i32 4, i32 5, i32 1, i32 0>
  %2 = zext <4 x i16> %shuffle1 to <4 x i32>
  store <4 x i32> %2, ptr undef, align 16
  ret void
}

define <16 x i8> @test_v2i64_v4i32(ptr %a, ptr %b) local_unnamed_addr {
; CHECK-LE-P8-LABEL: test_v2i64_v4i32:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    lfiwzx f0, 0, r3
; CHECK-LE-P8-NEXT:    xxswapd v2, f0
; CHECK-LE-P8-NEXT:    lfdx f0, 0, r4
; CHECK-LE-P8-NEXT:    xxswapd v3, f0
; CHECK-LE-P8-NEXT:    vmrglh v2, v2, v3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v2i64_v4i32:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lfiwzx f0, 0, r3
; CHECK-LE-P9-NEXT:    xxswapd v2, f0
; CHECK-LE-P9-NEXT:    lfd f0, 0(r4)
; CHECK-LE-P9-NEXT:    xxswapd v3, f0
; CHECK-LE-P9-NEXT:    vmrglh v2, v2, v3
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v2i64_v4i32:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lfiwzx f0, 0, r3
; CHECK-BE-P8-NEXT:    lxsdx v3, 0, r4
; CHECK-BE-P8-NEXT:    xxsldwi v2, f0, f0, 1
; CHECK-BE-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v2i64_v4i32:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lfiwzx f0, 0, r3
; CHECK-BE-P9-NEXT:    lxsd v3, 0(r4)
; CHECK-BE-P9-NEXT:    xxsldwi v2, f0, f0, 1
; CHECK-BE-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v2i64_v4i32:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    lfiwzx f0, 0, r3
; CHECK-AIX-64-P8-NEXT:    lxsdx v3, 0, r4
; CHECK-AIX-64-P8-NEXT:    xxsldwi v2, f0, f0, 1
; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v2i64_v4i32:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lfiwzx f0, 0, r3
; CHECK-AIX-64-P9-NEXT:    lxsd v3, 0(r4)
; CHECK-AIX-64-P9-NEXT:    xxsldwi v2, f0, f0, 1
; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lxsiwzx v2, 0, r3
; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C11(r2) # %const.0
; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
; CHECK-AIX-32-P8-NEXT:    vperm v2, v3, v2, v4
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v2i64_v4i32:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r3
; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C10(r2) # %const.0
; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r4
; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <2 x i8>, ptr %a, align 4
  %bc1 = bitcast <2 x i8> %0 to i16
  %vecinit3 = insertelement <8 x i16> poison, i16 %bc1, i64 0
  %1 = load <2 x i8>, ptr %b, align 8
  %bc2 = bitcast <2 x i8> %1 to i16
  %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0
  %2 = bitcast <8 x i16> %vecinit3 to <16 x i8>
  %3 = bitcast <8 x i16> %vecinit6 to <16 x i8>
  %shuffle = shufflevector <16 x i8> %3, <16 x i8> %2, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ret <16 x i8> %shuffle
}

define <16 x i8> @test_v2i64_v8i16(ptr %a, ptr %b) local_unnamed_addr {
; CHECK-LE-P8-LABEL: test_v2i64_v8i16:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    lhz r3, 0(r3)
; CHECK-LE-P8-NEXT:    mtfprd f0, r3
; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
; CHECK-LE-P8-NEXT:    lfdx f0, 0, r4
; CHECK-LE-P8-NEXT:    xxswapd v3, f0
; CHECK-LE-P8-NEXT:    vmrglh v2, v2, v3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v2i64_v8i16:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lxsihzx v2, 0, r3
; CHECK-LE-P9-NEXT:    lfd f0, 0(r4)
; CHECK-LE-P9-NEXT:    xxswapd v3, f0
; CHECK-LE-P9-NEXT:    vsplth v2, v2, 3
; CHECK-LE-P9-NEXT:    vmrglh v2, v2, v3
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v2i64_v8i16:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lhz r3, 0(r3)
; CHECK-BE-P8-NEXT:    lxsdx v3, 0, r4
; CHECK-BE-P8-NEXT:    sldi r3, r3, 48
; CHECK-BE-P8-NEXT:    mtvsrd v2, r3
; CHECK-BE-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v2i64_v8i16:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lxsihzx v2, 0, r3
; CHECK-BE-P9-NEXT:    lxsd v3, 0(r4)
; CHECK-BE-P9-NEXT:    vsplth v2, v2, 3
; CHECK-BE-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v2i64_v8i16:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    lhz r3, 0(r3)
; CHECK-AIX-64-P8-NEXT:    lxsdx v3, 0, r4
; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 48
; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r3
; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v2i64_v8i16:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lxsihzx v2, 0, r3
; CHECK-AIX-64-P9-NEXT:    lxsd v3, 0(r4)
; CHECK-AIX-64-P9-NEXT:    vsplth v2, v2, 3
; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lhz r3, 0(r3)
; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
; CHECK-AIX-32-P8-NEXT:    lfiwzx f0, 0, r4
; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-AIX-32-P8-NEXT:    xxspltw v2, vs0, 1
; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v2i64_v8i16:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lhz r3, 0(r3)
; CHECK-AIX-32-P9-NEXT:    sth r3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v2, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lxvwsx v3, 0, r4
; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <2 x i8>, ptr %a
  %bc1 = bitcast <2 x i8> %0 to i16
  %vecinit3 = insertelement <8 x i16> poison, i16 %bc1, i64 0
  %1 = load <2 x i8>, ptr %b, align 8
  %bc2 = bitcast <2 x i8> %1 to i16
  %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0
  %2 = bitcast <8 x i16> %vecinit3 to <16 x i8>
  %3 = bitcast <8 x i16> %vecinit6 to <16 x i8>
  %shuffle = shufflevector <16 x i8> %3, <16 x i8> %2, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ret <16 x i8> %shuffle
}