llvm/llvm/test/CodeGen/Thumb2/mve-vmull-splat.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s

define arm_aapcs_vfpcc <2 x i64> @sext32_0246_0ext(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: sext32_0246_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmov q2[2], q2[0], r0, r0
; CHECK-NEXT:    vmullb.s32 q1, q0, q2
; CHECK-NEXT:    vmov q0, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
  %out1 = sext <2 x i32> %shuf1 to <2 x i64>
  %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
  %out2 = sext <2 x i32> %shuf2 to <2 x i64>
  %out = mul <2 x i64> %out1, %out2
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @sext32_0ext_0246(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: sext32_0ext_0246:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmov q2[2], q2[0], r0, r0
; CHECK-NEXT:    vmullb.s32 q1, q2, q0
; CHECK-NEXT:    vmov q0, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
  %out1 = sext <2 x i32> %shuf1 to <2 x i64>
  %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
  %out2 = sext <2 x i32> %shuf2 to <2 x i64>
  %out = mul <2 x i64> %out2, %out1
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @sext32_0246_ext0(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: sext32_0246_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    .save {r4, r5, r7, lr}
; CHECK-NEXT:    push {r4, r5, r7, lr}
; CHECK-NEXT:    vmov r1, s2
; CHECK-NEXT:    vmov r3, s0
; CHECK-NEXT:    umull lr, r12, r1, r0
; CHECK-NEXT:    umull r2, r5, r3, r0
; CHECK-NEXT:    vmov q0[2], q0[0], r2, lr
; CHECK-NEXT:    asrs r2, r0, #31
; CHECK-NEXT:    mla r4, r1, r2, r12
; CHECK-NEXT:    asrs r1, r1, #31
; CHECK-NEXT:    mla r2, r3, r2, r5
; CHECK-NEXT:    asrs r3, r3, #31
; CHECK-NEXT:    mla r1, r1, r0, r4
; CHECK-NEXT:    mla r0, r3, r0, r2
; CHECK-NEXT:    vmov q0[3], q0[1], r0, r1
; CHECK-NEXT:    pop {r4, r5, r7, pc}
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
  %out1 = sext <2 x i32> %shuf1 to <2 x i64>
  %ext = sext i32 %src2 to i64
  %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
  %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
  %out = mul <2 x i64> %out1, %shuf2
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @sext32_ext0_0246(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: sext32_ext0_0246:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    .save {r4, r5, r7, lr}
; CHECK-NEXT:    push {r4, r5, r7, lr}
; CHECK-NEXT:    vmov r1, s2
; CHECK-NEXT:    asrs r4, r0, #31
; CHECK-NEXT:    vmov r3, s0
; CHECK-NEXT:    umull lr, r12, r0, r1
; CHECK-NEXT:    umull r2, r5, r0, r3
; CHECK-NEXT:    vmov q0[2], q0[0], r2, lr
; CHECK-NEXT:    asrs r2, r1, #31
; CHECK-NEXT:    mla r2, r0, r2, r12
; CHECK-NEXT:    mla r1, r4, r1, r2
; CHECK-NEXT:    asrs r2, r3, #31
; CHECK-NEXT:    mla r0, r0, r2, r5
; CHECK-NEXT:    mla r0, r4, r3, r0
; CHECK-NEXT:    vmov q0[3], q0[1], r0, r1
; CHECK-NEXT:    pop {r4, r5, r7, pc}
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
  %out1 = sext <2 x i32> %shuf1 to <2 x i64>
  %ext = sext i32 %src2 to i64
  %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
  %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
  %out = mul <2 x i64> %shuf2, %out1
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @sext32_1357_0ext(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: sext32_1357_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmov q1[2], q1[0], r0, r0
; CHECK-NEXT:    vrev64.32 q2, q0
; CHECK-NEXT:    vmullb.s32 q0, q2, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
  %out1 = sext <2 x i32> %shuf1 to <2 x i64>
  %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
  %out2 = sext <2 x i32> %shuf2 to <2 x i64>
  %out = mul <2 x i64> %out1, %out2
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @sext32_0ext_1357(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: sext32_0ext_1357:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vrev64.32 q1, q0
; CHECK-NEXT:    vmov q2[2], q2[0], r0, r0
; CHECK-NEXT:    vmullb.s32 q0, q2, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
  %out1 = sext <2 x i32> %shuf1 to <2 x i64>
  %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
  %out2 = sext <2 x i32> %shuf2 to <2 x i64>
  %out = mul <2 x i64> %out2, %out1
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @sext32_1357_ext0(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: sext32_1357_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    .save {r4, r5, r7, lr}
; CHECK-NEXT:    push {r4, r5, r7, lr}
; CHECK-NEXT:    vrev64.32 q1, q0
; CHECK-NEXT:    vmov r1, s6
; CHECK-NEXT:    vmov r3, s4
; CHECK-NEXT:    umull lr, r12, r1, r0
; CHECK-NEXT:    umull r2, r5, r3, r0
; CHECK-NEXT:    vmov q0[2], q0[0], r2, lr
; CHECK-NEXT:    asrs r2, r0, #31
; CHECK-NEXT:    mla r4, r1, r2, r12
; CHECK-NEXT:    asrs r1, r1, #31
; CHECK-NEXT:    mla r2, r3, r2, r5
; CHECK-NEXT:    asrs r3, r3, #31
; CHECK-NEXT:    mla r1, r1, r0, r4
; CHECK-NEXT:    mla r0, r3, r0, r2
; CHECK-NEXT:    vmov q0[3], q0[1], r0, r1
; CHECK-NEXT:    pop {r4, r5, r7, pc}
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
  %out1 = sext <2 x i32> %shuf1 to <2 x i64>
  %ext = sext i32 %src2 to i64
  %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
  %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
  %out = mul <2 x i64> %out1, %shuf2
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @sext32_ext0_1357(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: sext32_ext0_1357:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    .save {r4, r5, r7, lr}
; CHECK-NEXT:    push {r4, r5, r7, lr}
; CHECK-NEXT:    vrev64.32 q1, q0
; CHECK-NEXT:    asrs r4, r0, #31
; CHECK-NEXT:    vmov r1, s6
; CHECK-NEXT:    vmov r3, s4
; CHECK-NEXT:    umull lr, r12, r0, r1
; CHECK-NEXT:    umull r2, r5, r0, r3
; CHECK-NEXT:    vmov q0[2], q0[0], r2, lr
; CHECK-NEXT:    asrs r2, r1, #31
; CHECK-NEXT:    mla r2, r0, r2, r12
; CHECK-NEXT:    mla r1, r4, r1, r2
; CHECK-NEXT:    asrs r2, r3, #31
; CHECK-NEXT:    mla r0, r0, r2, r5
; CHECK-NEXT:    mla r0, r4, r3, r0
; CHECK-NEXT:    vmov q0[3], q0[1], r0, r1
; CHECK-NEXT:    pop {r4, r5, r7, pc}
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
  %out1 = sext <2 x i32> %shuf1 to <2 x i64>
  %ext = sext i32 %src2 to i64
  %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
  %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
  %out = mul <2 x i64> %shuf2, %out1
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <4 x i64> @sext32_0213_0ext(<8 x i32> %src1, i32 %src2) {
; CHECK-LABEL: sext32_0213_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    .vsave {d8, d9}
; CHECK-NEXT:    vpush {d8, d9}
; CHECK-NEXT:    vmov q4, q0
; CHECK-NEXT:    vmov q3[2], q3[0], r0, r0
; CHECK-NEXT:    vmov.f32 s17, s4
; CHECK-NEXT:    vmov.f32 s0, s1
; CHECK-NEXT:    vmullb.s32 q2, q4, q3
; CHECK-NEXT:    vmov.f32 s2, s3
; CHECK-NEXT:    vmullb.s32 q1, q0, q3
; CHECK-NEXT:    vmov q0, q2
; CHECK-NEXT:    vpop {d8, d9}
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
  %out1 = sext <4 x i32> %shuf1 to <4 x i64>
  %ins = insertelement <8 x i32> poison, i32 %src2, i32 0
  %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer
  %out2 = sext <4 x i32> %shuf2 to <4 x i64>
  %out = mul <4 x i64> %out1, %out2
  ret <4 x i64> %out
}

define arm_aapcs_vfpcc <4 x i64> @sext32_0ext_0213(<8 x i32> %src1, i32 %src2) {
; CHECK-LABEL: sext32_0ext_0213:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    .vsave {d8, d9}
; CHECK-NEXT:    vpush {d8, d9}
; CHECK-NEXT:    vmov q4, q0
; CHECK-NEXT:    vmov q3[2], q3[0], r0, r0
; CHECK-NEXT:    vmov.f32 s17, s4
; CHECK-NEXT:    vmov.f32 s0, s1
; CHECK-NEXT:    vmullb.s32 q2, q3, q4
; CHECK-NEXT:    vmov.f32 s2, s3
; CHECK-NEXT:    vmullb.s32 q1, q3, q0
; CHECK-NEXT:    vmov q0, q2
; CHECK-NEXT:    vpop {d8, d9}
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
  %out1 = sext <4 x i32> %shuf1 to <4 x i64>
  %ins = insertelement <8 x i32> poison, i32 %src2, i32 0
  %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer
  %out2 = sext <4 x i32> %shuf2 to <4 x i64>
  %out = mul <4 x i64> %out2, %out1
  ret <4 x i64> %out
}

define arm_aapcs_vfpcc <4 x i64> @sext32_0213_ext0(<8 x i32> %src1, i32 %src2) {
; CHECK-LABEL: sext32_0213_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    .save {r4, r5, r7, lr}
; CHECK-NEXT:    push {r4, r5, r7, lr}
; CHECK-NEXT:    vmov.f32 s4, s1
; CHECK-NEXT:    vmov.f32 s6, s3
; CHECK-NEXT:    vmov r3, s4
; CHECK-NEXT:    vmov r1, s6
; CHECK-NEXT:    umull r2, r5, r3, r0
; CHECK-NEXT:    umull lr, r12, r1, r0
; CHECK-NEXT:    vmov q1[2], q1[0], r2, lr
; CHECK-NEXT:    asrs r2, r0, #31
; CHECK-NEXT:    mla r4, r1, r2, r12
; CHECK-NEXT:    asrs r1, r1, #31
; CHECK-NEXT:    mla r5, r3, r2, r5
; CHECK-NEXT:    asrs r3, r3, #31
; CHECK-NEXT:    mla r1, r1, r0, r4
; CHECK-NEXT:    mla r3, r3, r0, r5
; CHECK-NEXT:    vmov q1[3], q1[1], r3, r1
; CHECK-NEXT:    vmov r1, s2
; CHECK-NEXT:    umull r3, r5, r1, r0
; CHECK-NEXT:    mla r5, r1, r2, r5
; CHECK-NEXT:    asrs r1, r1, #31
; CHECK-NEXT:    mla r12, r1, r0, r5
; CHECK-NEXT:    vmov r5, s0
; CHECK-NEXT:    umull r4, r1, r5, r0
; CHECK-NEXT:    mla r1, r5, r2, r1
; CHECK-NEXT:    asrs r2, r5, #31
; CHECK-NEXT:    vmov q0[2], q0[0], r4, r3
; CHECK-NEXT:    mla r0, r2, r0, r1
; CHECK-NEXT:    vmov q0[3], q0[1], r0, r12
; CHECK-NEXT:    pop {r4, r5, r7, pc}
entry:
  %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
  %out1 = sext <4 x i32> %shuf1 to <4 x i64>
  %ext = sext i32 %src2 to i64
  %ins = insertelement <4 x i64> poison, i64 %ext, i32 0
  %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
  %out = mul <4 x i64> %out1, %shuf2
  ret <4 x i64> %out
}

define arm_aapcs_vfpcc <4 x i64> @sext32_ext0_0213(<8 x i32> %src1, i32 %src2) {
; CHECK-LABEL: sext32_ext0_0213:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    .save {r4, r5, r7, lr}
; CHECK-NEXT:    push {r4, r5, r7, lr}
; CHECK-NEXT:    vmov.f32 s4, s1
; CHECK-NEXT:    asrs r4, r0, #31
; CHECK-NEXT:    vmov.f32 s6, s3
; CHECK-NEXT:    vmov r3, s4
; CHECK-NEXT:    vmov r1, s6
; CHECK-NEXT:    umull r2, r5, r0, r3
; CHECK-NEXT:    umull lr, r12, r0, r1
; CHECK-NEXT:    vmov q1[2], q1[0], r2, lr
; CHECK-NEXT:    asrs r2, r1, #31
; CHECK-NEXT:    mla r2, r0, r2, r12
; CHECK-NEXT:    mla r1, r4, r1, r2
; CHECK-NEXT:    asrs r2, r3, #31
; CHECK-NEXT:    mla r2, r0, r2, r5
; CHECK-NEXT:    mla r2, r4, r3, r2
; CHECK-NEXT:    vmov q1[3], q1[1], r2, r1
; CHECK-NEXT:    vmov r1, s2
; CHECK-NEXT:    umull r2, r3, r0, r1
; CHECK-NEXT:    asrs r5, r1, #31
; CHECK-NEXT:    mla r3, r0, r5, r3
; CHECK-NEXT:    mla r12, r4, r1, r3
; CHECK-NEXT:    vmov r3, s0
; CHECK-NEXT:    umull r5, r1, r0, r3
; CHECK-NEXT:    vmov q0[2], q0[0], r5, r2
; CHECK-NEXT:    asrs r2, r3, #31
; CHECK-NEXT:    mla r0, r0, r2, r1
; CHECK-NEXT:    mla r0, r4, r3, r0
; CHECK-NEXT:    vmov q0[3], q0[1], r0, r12
; CHECK-NEXT:    pop {r4, r5, r7, pc}
entry:
  %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
  %out1 = sext <4 x i32> %shuf1 to <4 x i64>
  %ext = sext i32 %src2 to i64
  %ins = insertelement <4 x i64> poison, i64 %ext, i32 0
  %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
  %out = mul <4 x i64> %shuf2, %out1
  ret <4 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @zext32_0246_0ext(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: zext32_0246_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmov q2[2], q2[0], r0, r0
; CHECK-NEXT:    vmullb.u32 q1, q0, q2
; CHECK-NEXT:    vmov q0, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
  %out1 = zext <2 x i32> %shuf1 to <2 x i64>
  %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
  %out2 = zext <2 x i32> %shuf2 to <2 x i64>
  %out = mul <2 x i64> %out1, %out2
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @zext32_0ext_0246(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: zext32_0ext_0246:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmov q2[2], q2[0], r0, r0
; CHECK-NEXT:    vmullb.u32 q1, q2, q0
; CHECK-NEXT:    vmov q0, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
  %out1 = zext <2 x i32> %shuf1 to <2 x i64>
  %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
  %out2 = zext <2 x i32> %shuf2 to <2 x i64>
  %out = mul <2 x i64> %out2, %out1
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @zext32_0246_ext0(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: zext32_0246_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmov r1, s2
; CHECK-NEXT:    vmov r3, s0
; CHECK-NEXT:    umull r1, r2, r1, r0
; CHECK-NEXT:    umull r0, r3, r3, r0
; CHECK-NEXT:    vmov q0[2], q0[0], r0, r1
; CHECK-NEXT:    vmov q0[3], q0[1], r3, r2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
  %out1 = zext <2 x i32> %shuf1 to <2 x i64>
  %ext = zext i32 %src2 to i64
  %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
  %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
  %out = mul <2 x i64> %out1, %shuf2
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @zext32_ext0_0246(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: zext32_ext0_0246:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmov r1, s2
; CHECK-NEXT:    vmov r3, s0
; CHECK-NEXT:    umull r1, r2, r0, r1
; CHECK-NEXT:    umull r0, r3, r0, r3
; CHECK-NEXT:    vmov q0[2], q0[0], r0, r1
; CHECK-NEXT:    vmov q0[3], q0[1], r3, r2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
  %out1 = zext <2 x i32> %shuf1 to <2 x i64>
  %ext = zext i32 %src2 to i64
  %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
  %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
  %out = mul <2 x i64> %shuf2, %out1
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @zext32_1357_0ext(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: zext32_1357_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmov q1[2], q1[0], r0, r0
; CHECK-NEXT:    vrev64.32 q2, q0
; CHECK-NEXT:    vmullb.u32 q0, q2, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
  %out1 = zext <2 x i32> %shuf1 to <2 x i64>
  %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
  %out2 = zext <2 x i32> %shuf2 to <2 x i64>
  %out = mul <2 x i64> %out1, %out2
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @zext32_0ext_1357(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: zext32_0ext_1357:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vrev64.32 q1, q0
; CHECK-NEXT:    vmov q2[2], q2[0], r0, r0
; CHECK-NEXT:    vmullb.u32 q0, q2, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
  %out1 = zext <2 x i32> %shuf1 to <2 x i64>
  %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
  %out2 = zext <2 x i32> %shuf2 to <2 x i64>
  %out = mul <2 x i64> %out2, %out1
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @zext32_1357_ext0(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: zext32_1357_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vrev64.32 q1, q0
; CHECK-NEXT:    vmov r1, s6
; CHECK-NEXT:    vmov r3, s4
; CHECK-NEXT:    umull r1, r2, r1, r0
; CHECK-NEXT:    umull r0, r3, r3, r0
; CHECK-NEXT:    vmov q0[2], q0[0], r0, r1
; CHECK-NEXT:    vmov q0[3], q0[1], r3, r2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
  %out1 = zext <2 x i32> %shuf1 to <2 x i64>
  %ext = zext i32 %src2 to i64
  %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
  %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
  %out = mul <2 x i64> %out1, %shuf2
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @zext32_ext0_1357(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: zext32_ext0_1357:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vrev64.32 q1, q0
; CHECK-NEXT:    vmov r1, s6
; CHECK-NEXT:    vmov r3, s4
; CHECK-NEXT:    umull r1, r2, r0, r1
; CHECK-NEXT:    umull r0, r3, r0, r3
; CHECK-NEXT:    vmov q0[2], q0[0], r0, r1
; CHECK-NEXT:    vmov q0[3], q0[1], r3, r2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
  %out1 = zext <2 x i32> %shuf1 to <2 x i64>
  %ext = zext i32 %src2 to i64
  %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
  %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
  %out = mul <2 x i64> %shuf2, %out1
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <4 x i64> @zext32_0213_0ext(<8 x i32> %src1, i32 %src2) {
; CHECK-LABEL: zext32_0213_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    .vsave {d8, d9}
; CHECK-NEXT:    vpush {d8, d9}
; CHECK-NEXT:    vmov q4, q0
; CHECK-NEXT:    vmov q3[2], q3[0], r0, r0
; CHECK-NEXT:    vmov.f32 s17, s4
; CHECK-NEXT:    vmov.f32 s0, s1
; CHECK-NEXT:    vmullb.u32 q2, q4, q3
; CHECK-NEXT:    vmov.f32 s2, s3
; CHECK-NEXT:    vmullb.u32 q1, q0, q3
; CHECK-NEXT:    vmov q0, q2
; CHECK-NEXT:    vpop {d8, d9}
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
  %out1 = zext <4 x i32> %shuf1 to <4 x i64>
  %ins = insertelement <8 x i32> poison, i32 %src2, i32 0
  %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer
  %out2 = zext <4 x i32> %shuf2 to <4 x i64>
  %out = mul <4 x i64> %out1, %out2
  ret <4 x i64> %out
}

define arm_aapcs_vfpcc <4 x i64> @zext32_0ext_0213(<8 x i32> %src1, i32 %src2) {
; CHECK-LABEL: zext32_0ext_0213:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    .vsave {d8, d9}
; CHECK-NEXT:    vpush {d8, d9}
; CHECK-NEXT:    vmov q4, q0
; CHECK-NEXT:    vmov q3[2], q3[0], r0, r0
; CHECK-NEXT:    vmov.f32 s17, s4
; CHECK-NEXT:    vmov.f32 s0, s1
; CHECK-NEXT:    vmullb.u32 q2, q3, q4
; CHECK-NEXT:    vmov.f32 s2, s3
; CHECK-NEXT:    vmullb.u32 q1, q3, q0
; CHECK-NEXT:    vmov q0, q2
; CHECK-NEXT:    vpop {d8, d9}
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
  %out1 = zext <4 x i32> %shuf1 to <4 x i64>
  %ins = insertelement <8 x i32> poison, i32 %src2, i32 0
  %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer
  %out2 = zext <4 x i32> %shuf2 to <4 x i64>
  %out = mul <4 x i64> %out2, %out1
  ret <4 x i64> %out
}

define arm_aapcs_vfpcc <4 x i64> @zext32_0213_ext0(<8 x i32> %src1, i32 %src2) {
; CHECK-LABEL: zext32_0213_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmov r1, s2
; CHECK-NEXT:    vmov r3, s0
; CHECK-NEXT:    vmov.f32 s0, s1
; CHECK-NEXT:    vmov.f32 s2, s3
; CHECK-NEXT:    umull r1, r12, r1, r0
; CHECK-NEXT:    umull r3, r2, r3, r0
; CHECK-NEXT:    vmov q2[2], q2[0], r3, r1
; CHECK-NEXT:    vmov r1, s2
; CHECK-NEXT:    vmov r3, s0
; CHECK-NEXT:    vmov q2[3], q2[1], r2, r12
; CHECK-NEXT:    vmov q0, q2
; CHECK-NEXT:    umull r1, r2, r1, r0
; CHECK-NEXT:    umull r0, r3, r3, r0
; CHECK-NEXT:    vmov q1[2], q1[0], r0, r1
; CHECK-NEXT:    vmov q1[3], q1[1], r3, r2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
  %out1 = zext <4 x i32> %shuf1 to <4 x i64>
  %ext = zext i32 %src2 to i64
  %ins = insertelement <4 x i64> poison, i64 %ext, i32 0
  %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
  %out = mul <4 x i64> %out1, %shuf2
  ret <4 x i64> %out
}

define arm_aapcs_vfpcc <4 x i64> @zext32_ext0_0213(<8 x i32> %src1, i32 %src2) {
; CHECK-LABEL: zext32_ext0_0213:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmov r1, s2
; CHECK-NEXT:    vmov r3, s0
; CHECK-NEXT:    vmov.f32 s0, s1
; CHECK-NEXT:    vmov.f32 s2, s3
; CHECK-NEXT:    umull r1, r12, r0, r1
; CHECK-NEXT:    umull r3, r2, r0, r3
; CHECK-NEXT:    vmov q2[2], q2[0], r3, r1
; CHECK-NEXT:    vmov r1, s2
; CHECK-NEXT:    vmov r3, s0
; CHECK-NEXT:    vmov q2[3], q2[1], r2, r12
; CHECK-NEXT:    vmov q0, q2
; CHECK-NEXT:    umull r1, r2, r0, r1
; CHECK-NEXT:    umull r0, r3, r0, r3
; CHECK-NEXT:    vmov q1[2], q1[0], r0, r1
; CHECK-NEXT:    vmov q1[3], q1[1], r3, r2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
  %out1 = zext <4 x i32> %shuf1 to <4 x i64>
  %ext = zext i32 %src2 to i64
  %ins = insertelement <4 x i64> poison, i64 %ext, i32 0
  %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
  %out = mul <4 x i64> %shuf2, %out1
  ret <4 x i64> %out
}

define arm_aapcs_vfpcc <4 x i32> @sext16_02468101214_0ext(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: sext16_02468101214_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.32 q1, r0
; CHECK-NEXT:    vmullb.s16 q0, q0, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %out1 = sext <4 x i16> %shuf1 to <4 x i32>
  %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
  %out2 = sext <4 x i16> %shuf2 to <4 x i32>
  %out = mul <4 x i32> %out1, %out2
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @sext16_0ext_02468101214(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: sext16_0ext_02468101214:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.32 q1, r0
; CHECK-NEXT:    vmullb.s16 q0, q1, q0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %out1 = sext <4 x i16> %shuf1 to <4 x i32>
  %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
  %out2 = sext <4 x i16> %shuf2 to <4 x i32>
  %out = mul <4 x i32> %out2, %out1
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @sext16_02468101214_ext0(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: sext16_02468101214_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.s16 q0, q0
; CHECK-NEXT:    sxth r0, r0
; CHECK-NEXT:    vmul.i32 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %out1 = sext <4 x i16> %shuf1 to <4 x i32>
  %ext = sext i16 %src2 to i32
  %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
  %out = mul <4 x i32> %out1, %shuf2
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @sext16_ext0_02468101214(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: sext16_ext0_02468101214:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.s16 q0, q0
; CHECK-NEXT:    sxth r0, r0
; CHECK-NEXT:    vmul.i32 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %out1 = sext <4 x i16> %shuf1 to <4 x i32>
  %ext = sext i16 %src2 to i32
  %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
  %out = mul <4 x i32> %shuf2, %out1
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @sext16_13579111315_0ext(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: sext16_13579111315_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.32 q1, r0
; CHECK-NEXT:    vrev32.16 q0, q0
; CHECK-NEXT:    vmullb.s16 q0, q0, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
  %out1 = sext <4 x i16> %shuf1 to <4 x i32>
  %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
  %out2 = sext <4 x i16> %shuf2 to <4 x i32>
  %out = mul <4 x i32> %out1, %out2
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @sext16_0ext_13579111315(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: sext16_0ext_13579111315:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vrev32.16 q0, q0
; CHECK-NEXT:    vdup.32 q1, r0
; CHECK-NEXT:    vmullb.s16 q0, q1, q0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
  %out1 = sext <4 x i16> %shuf1 to <4 x i32>
  %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
  %out2 = sext <4 x i16> %shuf2 to <4 x i32>
  %out = mul <4 x i32> %out2, %out1
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @sext16_13579111315_ext0(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: sext16_13579111315_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlt.s16 q0, q0
; CHECK-NEXT:    sxth r0, r0
; CHECK-NEXT:    vmul.i32 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
  %out1 = sext <4 x i16> %shuf1 to <4 x i32>
  %ext = sext i16 %src2 to i32
  %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
  %out = mul <4 x i32> %out1, %shuf2
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @sext16_ext0_13579111315(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: sext16_ext0_13579111315:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlt.s16 q0, q0
; CHECK-NEXT:    sxth r0, r0
; CHECK-NEXT:    vmul.i32 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
  %out1 = sext <4 x i16> %shuf1 to <4 x i32>
  %ext = sext i16 %src2 to i32
  %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
  %out = mul <4 x i32> %shuf2, %out1
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <8 x i32> @sext16_02461357_0ext(<16 x i16> %src1, i16 %src2) {
; CHECK-LABEL: sext16_02461357_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.16 q2, r0
; CHECK-NEXT:    vrev32.16 q1, q0
; CHECK-NEXT:    vmullb.s16 q1, q1, q2
; CHECK-NEXT:    vmullb.s16 q0, q0, q2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
  %out1 = sext <8 x i16> %shuf1 to <8 x i32>
  %ins = insertelement <16 x i16> poison, i16 %src2, i32 0
  %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer
  %out2 = sext <8 x i16> %shuf2 to <8 x i32>
  %out = mul <8 x i32> %out1, %out2
  ret <8 x i32> %out
}

define arm_aapcs_vfpcc <8 x i32> @sext16_0ext_02461357(<16 x i16> %src1, i16 %src2) {
; CHECK-LABEL: sext16_0ext_02461357:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vrev32.16 q1, q0
; CHECK-NEXT:    vdup.16 q2, r0
; CHECK-NEXT:    vmullb.s16 q1, q2, q1
; CHECK-NEXT:    vmullb.s16 q0, q2, q0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
  %out1 = sext <8 x i16> %shuf1 to <8 x i32>
  %ins = insertelement <16 x i16> poison, i16 %src2, i32 0
  %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer
  %out2 = sext <8 x i16> %shuf2 to <8 x i32>
  %out = mul <8 x i32> %out2, %out1
  ret <8 x i32> %out
}

define arm_aapcs_vfpcc <8 x i32> @sext16_02461357_ext0(<16 x i16> %src1, i16 %src2) {
; CHECK-LABEL: sext16_02461357_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.s16 q1, q0
; CHECK-NEXT:    sxth r0, r0
; CHECK-NEXT:    vmul.i32 q2, q1, r0
; CHECK-NEXT:    vmovlt.s16 q0, q0
; CHECK-NEXT:    vmul.i32 q1, q0, r0
; CHECK-NEXT:    vmov q0, q2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
  %out1 = sext <8 x i16> %shuf1 to <8 x i32>
  %ext = sext i16 %src2 to i32
  %ins = insertelement <8 x i32> poison, i32 %ext, i32 0
  %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
  %out = mul <8 x i32> %out1, %shuf2
  ret <8 x i32> %out
}

define arm_aapcs_vfpcc <8 x i32> @sext16_ext0_02461357(<16 x i16> %src1, i16 %src2) {
; CHECK-LABEL: sext16_ext0_02461357:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.s16 q1, q0
; CHECK-NEXT:    sxth r0, r0
; CHECK-NEXT:    vmul.i32 q2, q1, r0
; CHECK-NEXT:    vmovlt.s16 q0, q0
; CHECK-NEXT:    vmul.i32 q1, q0, r0
; CHECK-NEXT:    vmov q0, q2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
  %out1 = sext <8 x i16> %shuf1 to <8 x i32>
  %ext = sext i16 %src2 to i32
  %ins = insertelement <8 x i32> poison, i32 %ext, i32 0
  %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
  %out = mul <8 x i32> %shuf2, %out1
  ret <8 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @zext16_02468101214_0ext(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: zext16_02468101214_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.32 q1, r0
; CHECK-NEXT:    vmullb.u16 q0, q0, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %out1 = zext <4 x i16> %shuf1 to <4 x i32>
  %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
  %out2 = zext <4 x i16> %shuf2 to <4 x i32>
  %out = mul <4 x i32> %out1, %out2
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @zext16_0ext_02468101214(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: zext16_0ext_02468101214:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.32 q1, r0
; CHECK-NEXT:    vmullb.u16 q0, q1, q0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %out1 = zext <4 x i16> %shuf1 to <4 x i32>
  %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
  %out2 = zext <4 x i16> %shuf2 to <4 x i32>
  %out = mul <4 x i32> %out2, %out1
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @zext16_02468101214_ext0(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: zext16_02468101214_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.u16 q0, q0
; CHECK-NEXT:    uxth r0, r0
; CHECK-NEXT:    vmul.i32 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %out1 = zext <4 x i16> %shuf1 to <4 x i32>
  %ext = zext i16 %src2 to i32
  %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
  %out = mul <4 x i32> %out1, %shuf2
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @zext16_ext0_02468101214(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: zext16_ext0_02468101214:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.u16 q0, q0
; CHECK-NEXT:    uxth r0, r0
; CHECK-NEXT:    vmul.i32 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %out1 = zext <4 x i16> %shuf1 to <4 x i32>
  %ext = zext i16 %src2 to i32
  %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
  %out = mul <4 x i32> %shuf2, %out1
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @zext16_13579111315_0ext(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: zext16_13579111315_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.32 q1, r0
; CHECK-NEXT:    vrev32.16 q0, q0
; CHECK-NEXT:    vmullb.u16 q0, q0, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
  %out1 = zext <4 x i16> %shuf1 to <4 x i32>
  %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
  %out2 = zext <4 x i16> %shuf2 to <4 x i32>
  %out = mul <4 x i32> %out1, %out2
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @zext16_0ext_13579111315(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: zext16_0ext_13579111315:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vrev32.16 q0, q0
; CHECK-NEXT:    vdup.32 q1, r0
; CHECK-NEXT:    vmullb.u16 q0, q1, q0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
  %out1 = zext <4 x i16> %shuf1 to <4 x i32>
  %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
  %out2 = zext <4 x i16> %shuf2 to <4 x i32>
  %out = mul <4 x i32> %out2, %out1
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @zext16_13579111315_ext0(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: zext16_13579111315_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlt.u16 q0, q0
; CHECK-NEXT:    uxth r0, r0
; CHECK-NEXT:    vmul.i32 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
  %out1 = zext <4 x i16> %shuf1 to <4 x i32>
  %ext = zext i16 %src2 to i32
  %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
  %out = mul <4 x i32> %out1, %shuf2
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @zext16_ext0_13579111315(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: zext16_ext0_13579111315:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlt.u16 q0, q0
; CHECK-NEXT:    uxth r0, r0
; CHECK-NEXT:    vmul.i32 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
  %out1 = zext <4 x i16> %shuf1 to <4 x i32>
  %ext = zext i16 %src2 to i32
  %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
  %out = mul <4 x i32> %shuf2, %out1
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <8 x i32> @zext16_02461357_0ext(<16 x i16> %src1, i16 %src2) {
; CHECK-LABEL: zext16_02461357_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.16 q2, r0
; CHECK-NEXT:    vrev32.16 q1, q0
; CHECK-NEXT:    vmullb.u16 q1, q1, q2
; CHECK-NEXT:    vmullb.u16 q0, q0, q2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
  %out1 = zext <8 x i16> %shuf1 to <8 x i32>
  %ins = insertelement <16 x i16> poison, i16 %src2, i32 0
  %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer
  %out2 = zext <8 x i16> %shuf2 to <8 x i32>
  %out = mul <8 x i32> %out1, %out2
  ret <8 x i32> %out
}

define arm_aapcs_vfpcc <8 x i32> @zext16_0ext_02461357(<16 x i16> %src1, i16 %src2) {
; CHECK-LABEL: zext16_0ext_02461357:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vrev32.16 q1, q0
; CHECK-NEXT:    vdup.16 q2, r0
; CHECK-NEXT:    vmullb.u16 q1, q2, q1
; CHECK-NEXT:    vmullb.u16 q0, q2, q0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
  %out1 = zext <8 x i16> %shuf1 to <8 x i32>
  %ins = insertelement <16 x i16> poison, i16 %src2, i32 0
  %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer
  %out2 = zext <8 x i16> %shuf2 to <8 x i32>
  %out = mul <8 x i32> %out2, %out1
  ret <8 x i32> %out
}

define arm_aapcs_vfpcc <8 x i32> @zext16_02461357_ext0(<16 x i16> %src1, i16 %src2) {
; CHECK-LABEL: zext16_02461357_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.u16 q1, q0
; CHECK-NEXT:    uxth r0, r0
; CHECK-NEXT:    vmul.i32 q2, q1, r0
; CHECK-NEXT:    vmovlt.u16 q0, q0
; CHECK-NEXT:    vmul.i32 q1, q0, r0
; CHECK-NEXT:    vmov q0, q2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
  %out1 = zext <8 x i16> %shuf1 to <8 x i32>
  %ext = zext i16 %src2 to i32
  %ins = insertelement <8 x i32> poison, i32 %ext, i32 0
  %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
  %out = mul <8 x i32> %out1, %shuf2
  ret <8 x i32> %out
}

define arm_aapcs_vfpcc <8 x i32> @zext16_ext0_02461357(<16 x i16> %src1, i16 %src2) {
; CHECK-LABEL: zext16_ext0_02461357:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.u16 q1, q0
; CHECK-NEXT:    uxth r0, r0
; CHECK-NEXT:    vmul.i32 q2, q1, r0
; CHECK-NEXT:    vmovlt.u16 q0, q0
; CHECK-NEXT:    vmul.i32 q1, q0, r0
; CHECK-NEXT:    vmov q0, q2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
  %out1 = zext <8 x i16> %shuf1 to <8 x i32>
  %ext = zext i16 %src2 to i32
  %ins = insertelement <8 x i32> poison, i32 %ext, i32 0
  %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
  %out = mul <8 x i32> %shuf2, %out1
  ret <8 x i32> %out
}

define arm_aapcs_vfpcc <8 x i16> @sext8_024681012141618202224262830_0ext(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: sext8_024681012141618202224262830_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.16 q1, r0
; CHECK-NEXT:    vmullb.s8 q0, q0, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
  %out1 = sext <8 x i8> %shuf1 to <8 x i16>
  %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
  %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
  %out2 = sext <8 x i8> %shuf2 to <8 x i16>
  %out = mul <8 x i16> %out1, %out2
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @sext8_0ext_024681012141618202224262830(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: sext8_0ext_024681012141618202224262830:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.16 q1, r0
; CHECK-NEXT:    vmullb.s8 q0, q1, q0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
  %out1 = sext <8 x i8> %shuf1 to <8 x i16>
  %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
  %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
  %out2 = sext <8 x i8> %shuf2 to <8 x i16>
  %out = mul <8 x i16> %out2, %out1
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @sext8_024681012141618202224262830_ext0(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: sext8_024681012141618202224262830_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.s8 q0, q0
; CHECK-NEXT:    sxtb r0, r0
; CHECK-NEXT:    vmul.i16 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
  %out1 = sext <8 x i8> %shuf1 to <8 x i16>
  %ext = sext i8 %src2 to i16
  %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
  %out = mul <8 x i16> %out1, %shuf2
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @sext8_ext0_024681012141618202224262830(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: sext8_ext0_024681012141618202224262830:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.s8 q0, q0
; CHECK-NEXT:    sxtb r0, r0
; CHECK-NEXT:    vmul.i16 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
  %out1 = sext <8 x i8> %shuf1 to <8 x i16>
  %ext = sext i8 %src2 to i16
  %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
  %out = mul <8 x i16> %shuf2, %out1
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @sext8_135791113151719212325272931_0ext(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: sext8_135791113151719212325272931_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.16 q1, r0
; CHECK-NEXT:    vrev16.8 q0, q0
; CHECK-NEXT:    vmullb.s8 q0, q0, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = sext <8 x i8> %shuf1 to <8 x i16>
  %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
  %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
  %out2 = sext <8 x i8> %shuf2 to <8 x i16>
  %out = mul <8 x i16> %out1, %out2
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @sext8_0ext_135791113151719212325272931(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: sext8_0ext_135791113151719212325272931:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vrev16.8 q0, q0
; CHECK-NEXT:    vdup.16 q1, r0
; CHECK-NEXT:    vmullb.s8 q0, q1, q0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = sext <8 x i8> %shuf1 to <8 x i16>
  %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
  %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
  %out2 = sext <8 x i8> %shuf2 to <8 x i16>
  %out = mul <8 x i16> %out2, %out1
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @sext8_135791113151719212325272931_ext0(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: sext8_135791113151719212325272931_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlt.s8 q0, q0
; CHECK-NEXT:    sxtb r0, r0
; CHECK-NEXT:    vmul.i16 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = sext <8 x i8> %shuf1 to <8 x i16>
  %ext = sext i8 %src2 to i16
  %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
  %out = mul <8 x i16> %out1, %shuf2
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @sext8_ext0_135791113151719212325272931(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: sext8_ext0_135791113151719212325272931:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlt.s8 q0, q0
; CHECK-NEXT:    sxtb r0, r0
; CHECK-NEXT:    vmul.i16 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = sext <8 x i8> %shuf1 to <8 x i16>
  %ext = sext i8 %src2 to i16
  %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
  %out = mul <8 x i16> %shuf2, %out1
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <16 x i16> @sext8_0246810121413579111315_0ext(<32 x i8> %src1, i8 %src2) {
; CHECK-LABEL: sext8_0246810121413579111315_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.8 q2, r0
; CHECK-NEXT:    vrev16.8 q1, q0
; CHECK-NEXT:    vmullb.s8 q1, q1, q2
; CHECK-NEXT:    vmullb.s8 q0, q0, q2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = sext <16 x i8> %shuf1 to <16 x i16>
  %ins = insertelement <32 x i8> poison, i8 %src2, i32 0
  %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer
  %out2 = sext <16 x i8> %shuf2 to <16 x i16>
  %out = mul <16 x i16> %out1, %out2
  ret <16 x i16> %out
}

define arm_aapcs_vfpcc <16 x i16> @sext8_0ext_0246810121413579111315(<32 x i8> %src1, i8 %src2) {
; CHECK-LABEL: sext8_0ext_0246810121413579111315:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vrev16.8 q1, q0
; CHECK-NEXT:    vdup.8 q2, r0
; CHECK-NEXT:    vmullb.s8 q1, q2, q1
; CHECK-NEXT:    vmullb.s8 q0, q2, q0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = sext <16 x i8> %shuf1 to <16 x i16>
  %ins = insertelement <32 x i8> poison, i8 %src2, i32 0
  %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer
  %out2 = sext <16 x i8> %shuf2 to <16 x i16>
  %out = mul <16 x i16> %out2, %out1
  ret <16 x i16> %out
}

define arm_aapcs_vfpcc <16 x i16> @sext8_0246810121413579111315_ext0(<32 x i8> %src1, i8 %src2) {
; CHECK-LABEL: sext8_0246810121413579111315_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.s8 q1, q0
; CHECK-NEXT:    sxtb r0, r0
; CHECK-NEXT:    vmul.i16 q2, q1, r0
; CHECK-NEXT:    vmovlt.s8 q0, q0
; CHECK-NEXT:    vmul.i16 q1, q0, r0
; CHECK-NEXT:    vmov q0, q2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = sext <16 x i8> %shuf1 to <16 x i16>
  %ext = sext i8 %src2 to i16
  %ins = insertelement <16 x i16> poison, i16 %ext, i32 0
  %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
  %out = mul <16 x i16> %out1, %shuf2
  ret <16 x i16> %out
}

define arm_aapcs_vfpcc <16 x i16> @sext8_ext0_0246810121413579111315(<32 x i8> %src1, i8 %src2) {
; CHECK-LABEL: sext8_ext0_0246810121413579111315:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.s8 q1, q0
; CHECK-NEXT:    sxtb r0, r0
; CHECK-NEXT:    vmul.i16 q2, q1, r0
; CHECK-NEXT:    vmovlt.s8 q0, q0
; CHECK-NEXT:    vmul.i16 q1, q0, r0
; CHECK-NEXT:    vmov q0, q2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = sext <16 x i8> %shuf1 to <16 x i16>
  %ext = sext i8 %src2 to i16
  %ins = insertelement <16 x i16> poison, i16 %ext, i32 0
  %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
  %out = mul <16 x i16> %shuf2, %out1
  ret <16 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @zext8_024681012141618202224262830_0ext(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: zext8_024681012141618202224262830_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.16 q1, r0
; CHECK-NEXT:    vmullb.u8 q0, q0, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
  %out1 = zext <8 x i8> %shuf1 to <8 x i16>
  %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
  %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
  %out2 = zext <8 x i8> %shuf2 to <8 x i16>
  %out = mul <8 x i16> %out1, %out2
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @zext8_0ext_024681012141618202224262830(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: zext8_0ext_024681012141618202224262830:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.16 q1, r0
; CHECK-NEXT:    vmullb.u8 q0, q1, q0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
  %out1 = zext <8 x i8> %shuf1 to <8 x i16>
  %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
  %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
  %out2 = zext <8 x i8> %shuf2 to <8 x i16>
  %out = mul <8 x i16> %out2, %out1
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @zext8_024681012141618202224262830_ext0(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: zext8_024681012141618202224262830_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.u8 q0, q0
; CHECK-NEXT:    uxtb r0, r0
; CHECK-NEXT:    vmul.i16 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
  %out1 = zext <8 x i8> %shuf1 to <8 x i16>
  %ext = zext i8 %src2 to i16
  %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
  %out = mul <8 x i16> %out1, %shuf2
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @zext8_ext0_024681012141618202224262830(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: zext8_ext0_024681012141618202224262830:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.u8 q0, q0
; CHECK-NEXT:    uxtb r0, r0
; CHECK-NEXT:    vmul.i16 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
  %out1 = zext <8 x i8> %shuf1 to <8 x i16>
  %ext = zext i8 %src2 to i16
  %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
  %out = mul <8 x i16> %shuf2, %out1
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @zext8_135791113151719212325272931_0ext(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: zext8_135791113151719212325272931_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.16 q1, r0
; CHECK-NEXT:    vrev16.8 q0, q0
; CHECK-NEXT:    vmullb.u8 q0, q0, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = zext <8 x i8> %shuf1 to <8 x i16>
  %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
  %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
  %out2 = zext <8 x i8> %shuf2 to <8 x i16>
  %out = mul <8 x i16> %out1, %out2
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @zext8_0ext_135791113151719212325272931(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: zext8_0ext_135791113151719212325272931:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vrev16.8 q0, q0
; CHECK-NEXT:    vdup.16 q1, r0
; CHECK-NEXT:    vmullb.u8 q0, q1, q0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = zext <8 x i8> %shuf1 to <8 x i16>
  %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
  %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
  %out2 = zext <8 x i8> %shuf2 to <8 x i16>
  %out = mul <8 x i16> %out2, %out1
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @zext8_135791113151719212325272931_ext0(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: zext8_135791113151719212325272931_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlt.u8 q0, q0
; CHECK-NEXT:    uxtb r0, r0
; CHECK-NEXT:    vmul.i16 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = zext <8 x i8> %shuf1 to <8 x i16>
  %ext = zext i8 %src2 to i16
  %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
  %out = mul <8 x i16> %out1, %shuf2
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @zext8_ext0_135791113151719212325272931(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: zext8_ext0_135791113151719212325272931:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlt.u8 q0, q0
; CHECK-NEXT:    uxtb r0, r0
; CHECK-NEXT:    vmul.i16 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = zext <8 x i8> %shuf1 to <8 x i16>
  %ext = zext i8 %src2 to i16
  %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
  %out = mul <8 x i16> %shuf2, %out1
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <16 x i16> @zext8_0246810121413579111315_0ext(<32 x i8> %src1, i8 %src2) {
; CHECK-LABEL: zext8_0246810121413579111315_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.8 q2, r0
; CHECK-NEXT:    vrev16.8 q1, q0
; CHECK-NEXT:    vmullb.u8 q1, q1, q2
; CHECK-NEXT:    vmullb.u8 q0, q0, q2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = zext <16 x i8> %shuf1 to <16 x i16>
  %ins = insertelement <32 x i8> poison, i8 %src2, i32 0
  %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer
  %out2 = zext <16 x i8> %shuf2 to <16 x i16>
  %out = mul <16 x i16> %out1, %out2
  ret <16 x i16> %out
}

define arm_aapcs_vfpcc <16 x i16> @zext8_0ext_0246810121413579111315(<32 x i8> %src1, i8 %src2) {
; CHECK-LABEL: zext8_0ext_0246810121413579111315:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vrev16.8 q1, q0
; CHECK-NEXT:    vdup.8 q2, r0
; CHECK-NEXT:    vmullb.u8 q1, q2, q1
; CHECK-NEXT:    vmullb.u8 q0, q2, q0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = zext <16 x i8> %shuf1 to <16 x i16>
  %ins = insertelement <32 x i8> poison, i8 %src2, i32 0
  %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer
  %out2 = zext <16 x i8> %shuf2 to <16 x i16>
  %out = mul <16 x i16> %out2, %out1
  ret <16 x i16> %out
}

define arm_aapcs_vfpcc <16 x i16> @zext8_0246810121413579111315_ext0(<32 x i8> %src1, i8 %src2) {
; CHECK-LABEL: zext8_0246810121413579111315_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.u8 q1, q0
; CHECK-NEXT:    uxtb r0, r0
; CHECK-NEXT:    vmul.i16 q2, q1, r0
; CHECK-NEXT:    vmovlt.u8 q0, q0
; CHECK-NEXT:    vmul.i16 q1, q0, r0
; CHECK-NEXT:    vmov q0, q2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = zext <16 x i8> %shuf1 to <16 x i16>
  %ext = zext i8 %src2 to i16
  %ins = insertelement <16 x i16> poison, i16 %ext, i32 0
  %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
  %out = mul <16 x i16> %out1, %shuf2
  ret <16 x i16> %out
}

define arm_aapcs_vfpcc <16 x i16> @zext8_ext0_0246810121413579111315(<32 x i8> %src1, i8 %src2) {
; CHECK-LABEL: zext8_ext0_0246810121413579111315:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.u8 q1, q0
; CHECK-NEXT:    uxtb r0, r0
; CHECK-NEXT:    vmul.i16 q2, q1, r0
; CHECK-NEXT:    vmovlt.u8 q0, q0
; CHECK-NEXT:    vmul.i16 q1, q0, r0
; CHECK-NEXT:    vmov q0, q2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = zext <16 x i8> %shuf1 to <16 x i16>
  %ext = zext i8 %src2 to i16
  %ins = insertelement <16 x i16> poison, i16 %ext, i32 0
  %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
  %out = mul <16 x i16> %shuf2, %out1
  ret <16 x i16> %out
}