llvm/llvm/test/CodeGen/AArch64/insertshuffleload.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s

define <8 x i8> @inserti8_first(ptr %p) {
; CHECK-LABEL: inserti8_first:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr d0, [x0]
; CHECK-NEXT:    ret
  %q = getelementptr inbounds i8, ptr %p, i32 1
  %l1 = load <8 x i8>, ptr %q
  %l2 = load i8, ptr %p
  %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
  %ins = insertelement <8 x i8> %s, i8 %l2, i32 0
  ret <8 x i8> %ins
}

define <8 x i8> @inserti8_last(ptr %p) {
; CHECK-LABEL: inserti8_last:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur d0, [x0, #1]
; CHECK-NEXT:    ret
  %q = getelementptr inbounds i8, ptr %p, i32 8
  %l1 = load <8 x i8>, ptr %p
  %l2 = load i8, ptr %q
  %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
  %ins = insertelement <8 x i8> %s, i8 %l2, i32 7
  ret <8 x i8> %ins
}

define <8 x i16> @inserti8_first_sext(ptr %p) {
; CHECK-LABEL: inserti8_first_sext:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr d0, [x0]
; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
; CHECK-NEXT:    ret
  %q = getelementptr inbounds i8, ptr %p, i32 1
  %l1 = load <8 x i8>, ptr %q
  %s1 = sext <8 x i8> %l1 to <8 x i16>
  %l2 = load i8, ptr %p
  %s2 = sext i8 %l2 to i16
  %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
  %ins = insertelement <8 x i16> %s, i16 %s2, i32 0
  ret <8 x i16> %ins
}

define <8 x i16> @inserti8_last_sext(ptr %p) {
; CHECK-LABEL: inserti8_last_sext:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur d0, [x0, #1]
; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
; CHECK-NEXT:    ret
  %q = getelementptr inbounds i8, ptr %p, i32 8
  %l1 = load <8 x i8>, ptr %p
  %s1 = sext <8 x i8> %l1 to <8 x i16>
  %l2 = load i8, ptr %q
  %s2 = sext i8 %l2 to i16
  %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
  %ins = insertelement <8 x i16> %s, i16 %s2, i32 7
  ret <8 x i16> %ins
}

define <8 x i16> @inserti8_first_zext(ptr %p) {
; CHECK-LABEL: inserti8_first_zext:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr d0, [x0]
; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
; CHECK-NEXT:    ret
  %q = getelementptr inbounds i8, ptr %p, i32 1
  %l1 = load <8 x i8>, ptr %q
  %s1 = zext <8 x i8> %l1 to <8 x i16>
  %l2 = load i8, ptr %p
  %s2 = zext i8 %l2 to i16
  %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
  %ins = insertelement <8 x i16> %s, i16 %s2, i32 0
  ret <8 x i16> %ins
}

define <8 x i16> @inserti8_last_zext(ptr %p) {
; CHECK-LABEL: inserti8_last_zext:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur d0, [x0, #1]
; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
; CHECK-NEXT:    ret
  %q = getelementptr inbounds i8, ptr %p, i32 8
  %l1 = load <8 x i8>, ptr %p
  %s1 = zext <8 x i8> %l1 to <8 x i16>
  %l2 = load i8, ptr %q
  %s2 = zext i8 %l2 to i16
  %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
  %ins = insertelement <8 x i16> %s, i16 %s2, i32 7
  ret <8 x i16> %ins
}

define <8 x i32> @inserti32_first(ptr %p) {
; CHECK-LABEL: inserti32_first:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldp q0, q1, [x0]
; CHECK-NEXT:    ret
  %q = getelementptr inbounds i8, ptr %p, i32 4
  %l1 = load <8 x i32>, ptr %q
  %l2 = load i32, ptr %p
  %s = shufflevector <8 x i32> %l1, <8 x i32> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
  %ins = insertelement <8 x i32> %s, i32 %l2, i32 0
  ret <8 x i32> %ins
}

define <8 x i32> @inserti32_last(ptr %p) {
; CHECK-LABEL: inserti32_last:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur q0, [x0, #4]
; CHECK-NEXT:    ldur q1, [x0, #20]
; CHECK-NEXT:    ret
  %q = getelementptr inbounds i8, ptr %p, i32 32
  %l1 = load <8 x i32>, ptr %p
  %l2 = load i32, ptr %q
  %s = shufflevector <8 x i32> %l1, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
  %ins = insertelement <8 x i32> %s, i32 %l2, i32 7
  ret <8 x i32> %ins
}

define <8 x i32> @inserti32_first_multiuse(ptr %p) {
; CHECK-LABEL: inserti32_first_multiuse:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldp q3, q2, [x0]
; CHECK-NEXT:    ldur q1, [x0, #20]
; CHECK-NEXT:    ldur q0, [x0, #4]
; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
; CHECK-NEXT:    add v1.4s, v1.4s, v2.4s
; CHECK-NEXT:    ret
  %q = getelementptr inbounds i8, ptr %p, i32 4
  %l1 = load <8 x i32>, ptr %q
  %l2 = load i32, ptr %p
  %s = shufflevector <8 x i32> %l1, <8 x i32> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
  %ins = insertelement <8 x i32> %s, i32 %l2, i32 0
  %a = add <8 x i32> %l1, %ins
  ret <8 x i32> %a
}

define <8 x i32> @inserti32_last_multiuse(ptr %p) {
; CHECK-LABEL: inserti32_last_multiuse:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldp q0, q1, [x0]
; CHECK-NEXT:    ldur q2, [x0, #20]
; CHECK-NEXT:    ldur q3, [x0, #4]
; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
; CHECK-NEXT:    add v1.4s, v1.4s, v2.4s
; CHECK-NEXT:    ret
  %q = getelementptr inbounds i8, ptr %p, i32 32
  %l1 = load <8 x i32>, ptr %p
  %l2 = load i32, ptr %q
  %s = shufflevector <8 x i32> %l1, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
  %ins = insertelement <8 x i32> %s, i32 %l2, i32 7
  %a = add <8 x i32> %l1, %ins
  ret <8 x i32> %a
}

define <4 x float> @insertf32_first(ptr %p) {
; CHECK-LABEL: insertf32_first:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    ret
  %q = getelementptr inbounds i8, ptr %p, i32 4
  %l1 = load <4 x float>, ptr %q
  %l2 = load float, ptr %p
  %s = shufflevector <4 x float> %l1, <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 2>
  %ins = insertelement <4 x float> %s, float %l2, i32 0
  ret <4 x float> %ins
}

define <4 x float> @insertf32_last(ptr %p) {
; CHECK-LABEL: insertf32_last:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur q0, [x0, #4]
; CHECK-NEXT:    ret
  %q = getelementptr inbounds i8, ptr %p, i32 16
  %l1 = load <4 x float>, ptr %p
  %l2 = load float, ptr %q
  %s = shufflevector <4 x float> %l1, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 undef>
  %ins = insertelement <4 x float> %s, float %l2, i32 3
  ret <4 x float> %ins
}

define <2 x i64> @inserti64_first(ptr %p) {
; CHECK-LABEL: inserti64_first:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    ret
  %q = getelementptr inbounds i8, ptr %p, i32 8
  %l1 = load <2 x i64>, ptr %q
  %l2 = load i64, ptr %p
  %s = shufflevector <2 x i64> %l1, <2 x i64> undef, <2 x i32> <i32 undef, i32 0>
  %ins = insertelement <2 x i64> %s, i64 %l2, i32 0
  ret <2 x i64> %ins
}

define <2 x i64> @inserti64_last(ptr %p) {
; CHECK-LABEL: inserti64_last:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur q0, [x0, #8]
; CHECK-NEXT:    ret
  %q = getelementptr inbounds i8, ptr %p, i32 16
  %l1 = load <2 x i64>, ptr %p
  %l2 = load i64, ptr %q
  %s = shufflevector <2 x i64> %l1, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
  %ins = insertelement <2 x i64> %s, i64 %l2, i32 1
  ret <2 x i64> %ins
}

define <8 x i8> @inserti8_first_undef(ptr %p) {
; CHECK-LABEL: inserti8_first_undef:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr d0, [x0]
; CHECK-NEXT:    ret
  %q = getelementptr inbounds i8, ptr %p, i32 1
  %l1 = load <8 x i8>, ptr %q
  %l2 = load i8, ptr %p
  %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 undef, i32 3, i32 4, i32 5, i32 6>
  %ins = insertelement <8 x i8> %s, i8 %l2, i32 0
  ret <8 x i8> %ins
}

define <8 x i8> @inserti8_last_undef(ptr %p) {
; CHECK-LABEL: inserti8_last_undef:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur d0, [x0, #1]
; CHECK-NEXT:    ret
  %q = getelementptr inbounds i8, ptr %p, i32 8
  %l1 = load <8 x i8>, ptr %p
  %l2 = load i8, ptr %q
  %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  %ins = insertelement <8 x i8> %s, i8 %l2, i32 7
  ret <8 x i8> %ins
}



define <8 x i16> @wrong_zextandsext(ptr %p) {
; CHECK-LABEL: wrong_zextandsext:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur d0, [x0, #1]
; CHECK-NEXT:    ldrsb w8, [x0]
; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #14
; CHECK-NEXT:    mov v0.h[0], w8
; CHECK-NEXT:    ret
  %q = getelementptr inbounds i8, ptr %p, i32 1
  %l1 = load <8 x i8>, ptr %q
  %s1 = zext <8 x i8> %l1 to <8 x i16>
  %l2 = load i8, ptr %p
  %s2 = sext i8 %l2 to i16
  %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
  %ins = insertelement <8 x i16> %s, i16 %s2, i32 0
  ret <8 x i16> %ins
}

define <8 x i8> @wrongidx_first(ptr %p) {
; CHECK-LABEL: wrongidx_first:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur d0, [x0, #1]
; CHECK-NEXT:    ext v0.8b, v0.8b, v0.8b, #7
; CHECK-NEXT:    ld1 { v0.b }[7], [x0]
; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT:    ret
  %q = getelementptr inbounds i8, ptr %p, i32 1
  %l1 = load <8 x i8>, ptr %q
  %l2 = load i8, ptr %p
  %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
  %ins = insertelement <8 x i8> %s, i8 %l2, i32 7
  ret <8 x i8> %ins
}

define <8 x i8> @wrong_last(ptr %p) {
; CHECK-LABEL: wrong_last:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr d0, [x0]
; CHECK-NEXT:    add x8, x0, #8
; CHECK-NEXT:    ext v0.8b, v0.8b, v0.8b, #1
; CHECK-NEXT:    ld1 { v0.b }[0], [x8]
; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT:    ret
  %q = getelementptr inbounds i8, ptr %p, i32 8
  %l1 = load <8 x i8>, ptr %p
  %l2 = load i8, ptr %q
  %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
  %ins = insertelement <8 x i8> %s, i8 %l2, i32 0
  ret <8 x i8> %ins
}

define <8 x i8> @wrong_shuffle(ptr %p) {
; CHECK-LABEL: wrong_shuffle:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur d0, [x0, #1]
; CHECK-NEXT:    adrp x8, .LCPI19_0
; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI19_0]
; CHECK-NEXT:    mov v0.d[1], v0.d[0]
; CHECK-NEXT:    tbl v0.8b, { v0.16b }, v1.8b
; CHECK-NEXT:    ld1 { v0.b }[0], [x0]
; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT:    ret
  %q = getelementptr inbounds i8, ptr %p, i32 1
  %l1 = load <8 x i8>, ptr %q
  %l2 = load i8, ptr %p
  %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6>
  %ins = insertelement <8 x i8> %s, i8 %l2, i32 0
  ret <8 x i8> %ins
}

define <8 x i16> @wrong_exttype(ptr %p) {
; CHECK-LABEL: wrong_exttype:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur d0, [x0, #1]
; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #14
; CHECK-NEXT:    ld1 { v0.h }[0], [x0]
; CHECK-NEXT:    ret
  %q = getelementptr inbounds i8, ptr %p, i32 1
  %l1 = load <8 x i8>, ptr %q
  %s1 = sext <8 x i8> %l1 to <8 x i16>
  %l2 = load i16, ptr %p
  %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
  %ins = insertelement <8 x i16> %s, i16 %l2, i32 0
  ret <8 x i16> %ins
}

define <4 x i32> @wrong_exttype2(ptr %p) {
; CHECK-LABEL: wrong_exttype2:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur s0, [x0, #1]
; CHECK-NEXT:    ldrsh w8, [x0]
; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #12
; CHECK-NEXT:    mov v0.s[0], w8
; CHECK-NEXT:    ret
  %q = getelementptr inbounds i8, ptr %p, i32 1
  %l1 = load <4 x i8>, ptr %q
  %s1 = sext <4 x i8> %l1 to <4 x i32>
  %l2 = load i16, ptr %p
  %s2 = sext i16 %l2 to i32
  %s = shufflevector <4 x i32> %s1, <4 x i32> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 2>
  %ins = insertelement <4 x i32> %s, i32 %s2, i32 0
  ret <4 x i32> %ins
}

define <8 x i8> @wrong_offsetfirst(ptr %p) {
; CHECK-LABEL: wrong_offsetfirst:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur d0, [x0, #-1]
; CHECK-NEXT:    ext v0.8b, v0.8b, v0.8b, #7
; CHECK-NEXT:    ld1 { v0.b }[0], [x0]
; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT:    ret
  %q = getelementptr inbounds i8, ptr %p, i32 -1
  %l1 = load <8 x i8>, ptr %q
  %l2 = load i8, ptr %p
  %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
  %ins = insertelement <8 x i8> %s, i8 %l2, i32 0
  ret <8 x i8> %ins
}

define <8 x i8> @wrong_offsetlast(ptr %p) {
; CHECK-LABEL: wrong_offsetlast:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr d0, [x0]
; CHECK-NEXT:    add x8, x0, #7
; CHECK-NEXT:    ext v0.8b, v0.8b, v0.8b, #1
; CHECK-NEXT:    ld1 { v0.b }[7], [x8]
; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT:    ret
  %q = getelementptr inbounds i8, ptr %p, i32 7
  %l1 = load <8 x i8>, ptr %p
  %l2 = load i8, ptr %q
  %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
  %ins = insertelement <8 x i8> %s, i8 %l2, i32 7
  ret <8 x i8> %ins
}


define <8 x i8> @storebetween(ptr %p, ptr %r) {
; CHECK-LABEL: storebetween:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur d0, [x0, #1]
; CHECK-NEXT:    strb wzr, [x1]
; CHECK-NEXT:    ext v0.8b, v0.8b, v0.8b, #7
; CHECK-NEXT:    ld1 { v0.b }[0], [x0]
; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT:    ret
  %q = getelementptr inbounds i8, ptr %p, i32 1
  %l1 = load <8 x i8>, ptr %q
  store i8 0, ptr %r
  %l2 = load i8, ptr %p
  %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
  %ins = insertelement <8 x i8> %s, i8 %l2, i32 0
  ret <8 x i8> %ins
}

define <8 x i8> @storebefore(ptr %p, ptr %r) {
; CHECK-LABEL: storebefore:
; CHECK:       // %bb.0:
; CHECK-NEXT:    strb wzr, [x1]
; CHECK-NEXT:    ldr d0, [x0]
; CHECK-NEXT:    ret
  %q = getelementptr inbounds i8, ptr %p, i32 1
  store i8 0, ptr %r
  %l1 = load <8 x i8>, ptr %q
  %l2 = load i8, ptr %p
  %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
  %ins = insertelement <8 x i8> %s, i8 %l2, i32 0
  ret <8 x i8> %ins
}

define <8 x i8> @storeafter(ptr %p, ptr %r) {
; CHECK-LABEL: storeafter:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr d0, [x0]
; CHECK-NEXT:    strb wzr, [x1]
; CHECK-NEXT:    ret
  %q = getelementptr inbounds i8, ptr %p, i32 1
  %l1 = load <8 x i8>, ptr %q
  %l2 = load i8, ptr %p
  store i8 0, ptr %r
  %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
  %ins = insertelement <8 x i8> %s, i8 %l2, i32 0
  ret <8 x i8> %ins
}