; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
define <8 x i8> @inserti8_first(ptr %p) {
; CHECK-LABEL: inserti8_first:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i32 1
%l1 = load <8 x i8>, ptr %q
%l2 = load i8, ptr %p
%s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
%ins = insertelement <8 x i8> %s, i8 %l2, i32 0
ret <8 x i8> %ins
}
define <8 x i8> @inserti8_last(ptr %p) {
; CHECK-LABEL: inserti8_last:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur d0, [x0, #1]
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i32 8
%l1 = load <8 x i8>, ptr %p
%l2 = load i8, ptr %q
%s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
%ins = insertelement <8 x i8> %s, i8 %l2, i32 7
ret <8 x i8> %ins
}
define <8 x i16> @inserti8_first_sext(ptr %p) {
; CHECK-LABEL: inserti8_first_sext:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i32 1
%l1 = load <8 x i8>, ptr %q
%s1 = sext <8 x i8> %l1 to <8 x i16>
%l2 = load i8, ptr %p
%s2 = sext i8 %l2 to i16
%s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
%ins = insertelement <8 x i16> %s, i16 %s2, i32 0
ret <8 x i16> %ins
}
define <8 x i16> @inserti8_last_sext(ptr %p) {
; CHECK-LABEL: inserti8_last_sext:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur d0, [x0, #1]
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i32 8
%l1 = load <8 x i8>, ptr %p
%s1 = sext <8 x i8> %l1 to <8 x i16>
%l2 = load i8, ptr %q
%s2 = sext i8 %l2 to i16
%s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
%ins = insertelement <8 x i16> %s, i16 %s2, i32 7
ret <8 x i16> %ins
}
define <8 x i16> @inserti8_first_zext(ptr %p) {
; CHECK-LABEL: inserti8_first_zext:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i32 1
%l1 = load <8 x i8>, ptr %q
%s1 = zext <8 x i8> %l1 to <8 x i16>
%l2 = load i8, ptr %p
%s2 = zext i8 %l2 to i16
%s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
%ins = insertelement <8 x i16> %s, i16 %s2, i32 0
ret <8 x i16> %ins
}
define <8 x i16> @inserti8_last_zext(ptr %p) {
; CHECK-LABEL: inserti8_last_zext:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur d0, [x0, #1]
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i32 8
%l1 = load <8 x i8>, ptr %p
%s1 = zext <8 x i8> %l1 to <8 x i16>
%l2 = load i8, ptr %q
%s2 = zext i8 %l2 to i16
%s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
%ins = insertelement <8 x i16> %s, i16 %s2, i32 7
ret <8 x i16> %ins
}
define <8 x i32> @inserti32_first(ptr %p) {
; CHECK-LABEL: inserti32_first:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i32 4
%l1 = load <8 x i32>, ptr %q
%l2 = load i32, ptr %p
%s = shufflevector <8 x i32> %l1, <8 x i32> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
%ins = insertelement <8 x i32> %s, i32 %l2, i32 0
ret <8 x i32> %ins
}
define <8 x i32> @inserti32_last(ptr %p) {
; CHECK-LABEL: inserti32_last:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur q0, [x0, #4]
; CHECK-NEXT: ldur q1, [x0, #20]
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i32 32
%l1 = load <8 x i32>, ptr %p
%l2 = load i32, ptr %q
%s = shufflevector <8 x i32> %l1, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
%ins = insertelement <8 x i32> %s, i32 %l2, i32 7
ret <8 x i32> %ins
}
define <8 x i32> @inserti32_first_multiuse(ptr %p) {
; CHECK-LABEL: inserti32_first_multiuse:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q3, q2, [x0]
; CHECK-NEXT: ldur q1, [x0, #20]
; CHECK-NEXT: ldur q0, [x0, #4]
; CHECK-NEXT: add v0.4s, v0.4s, v3.4s
; CHECK-NEXT: add v1.4s, v1.4s, v2.4s
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i32 4
%l1 = load <8 x i32>, ptr %q
%l2 = load i32, ptr %p
%s = shufflevector <8 x i32> %l1, <8 x i32> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
%ins = insertelement <8 x i32> %s, i32 %l2, i32 0
%a = add <8 x i32> %l1, %ins
ret <8 x i32> %a
}
define <8 x i32> @inserti32_last_multiuse(ptr %p) {
; CHECK-LABEL: inserti32_last_multiuse:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ldur q2, [x0, #20]
; CHECK-NEXT: ldur q3, [x0, #4]
; CHECK-NEXT: add v0.4s, v0.4s, v3.4s
; CHECK-NEXT: add v1.4s, v1.4s, v2.4s
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i32 32
%l1 = load <8 x i32>, ptr %p
%l2 = load i32, ptr %q
%s = shufflevector <8 x i32> %l1, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
%ins = insertelement <8 x i32> %s, i32 %l2, i32 7
%a = add <8 x i32> %l1, %ins
ret <8 x i32> %a
}
define <4 x float> @insertf32_first(ptr %p) {
; CHECK-LABEL: insertf32_first:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i32 4
%l1 = load <4 x float>, ptr %q
%l2 = load float, ptr %p
%s = shufflevector <4 x float> %l1, <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 2>
%ins = insertelement <4 x float> %s, float %l2, i32 0
ret <4 x float> %ins
}
define <4 x float> @insertf32_last(ptr %p) {
; CHECK-LABEL: insertf32_last:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur q0, [x0, #4]
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i32 16
%l1 = load <4 x float>, ptr %p
%l2 = load float, ptr %q
%s = shufflevector <4 x float> %l1, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 undef>
%ins = insertelement <4 x float> %s, float %l2, i32 3
ret <4 x float> %ins
}
define <2 x i64> @inserti64_first(ptr %p) {
; CHECK-LABEL: inserti64_first:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i32 8
%l1 = load <2 x i64>, ptr %q
%l2 = load i64, ptr %p
%s = shufflevector <2 x i64> %l1, <2 x i64> undef, <2 x i32> <i32 undef, i32 0>
%ins = insertelement <2 x i64> %s, i64 %l2, i32 0
ret <2 x i64> %ins
}
define <2 x i64> @inserti64_last(ptr %p) {
; CHECK-LABEL: inserti64_last:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur q0, [x0, #8]
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i32 16
%l1 = load <2 x i64>, ptr %p
%l2 = load i64, ptr %q
%s = shufflevector <2 x i64> %l1, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
%ins = insertelement <2 x i64> %s, i64 %l2, i32 1
ret <2 x i64> %ins
}
define <8 x i8> @inserti8_first_undef(ptr %p) {
; CHECK-LABEL: inserti8_first_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i32 1
%l1 = load <8 x i8>, ptr %q
%l2 = load i8, ptr %p
%s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 undef, i32 3, i32 4, i32 5, i32 6>
%ins = insertelement <8 x i8> %s, i8 %l2, i32 0
ret <8 x i8> %ins
}
define <8 x i8> @inserti8_last_undef(ptr %p) {
; CHECK-LABEL: inserti8_last_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur d0, [x0, #1]
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i32 8
%l1 = load <8 x i8>, ptr %p
%l2 = load i8, ptr %q
%s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%ins = insertelement <8 x i8> %s, i8 %l2, i32 7
ret <8 x i8> %ins
}
define <8 x i16> @wrong_zextandsext(ptr %p) {
; CHECK-LABEL: wrong_zextandsext:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur d0, [x0, #1]
; CHECK-NEXT: ldrsb w8, [x0]
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #14
; CHECK-NEXT: mov v0.h[0], w8
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i32 1
%l1 = load <8 x i8>, ptr %q
%s1 = zext <8 x i8> %l1 to <8 x i16>
%l2 = load i8, ptr %p
%s2 = sext i8 %l2 to i16
%s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
%ins = insertelement <8 x i16> %s, i16 %s2, i32 0
ret <8 x i16> %ins
}
define <8 x i8> @wrongidx_first(ptr %p) {
; CHECK-LABEL: wrongidx_first:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur d0, [x0, #1]
; CHECK-NEXT: ext v0.8b, v0.8b, v0.8b, #7
; CHECK-NEXT: ld1 { v0.b }[7], [x0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i32 1
%l1 = load <8 x i8>, ptr %q
%l2 = load i8, ptr %p
%s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
%ins = insertelement <8 x i8> %s, i8 %l2, i32 7
ret <8 x i8> %ins
}
define <8 x i8> @wrong_last(ptr %p) {
; CHECK-LABEL: wrong_last:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: add x8, x0, #8
; CHECK-NEXT: ext v0.8b, v0.8b, v0.8b, #1
; CHECK-NEXT: ld1 { v0.b }[0], [x8]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i32 8
%l1 = load <8 x i8>, ptr %p
%l2 = load i8, ptr %q
%s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
%ins = insertelement <8 x i8> %s, i8 %l2, i32 0
ret <8 x i8> %ins
}
define <8 x i8> @wrong_shuffle(ptr %p) {
; CHECK-LABEL: wrong_shuffle:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur d0, [x0, #1]
; CHECK-NEXT: adrp x8, .LCPI19_0
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI19_0]
; CHECK-NEXT: mov v0.d[1], v0.d[0]
; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b
; CHECK-NEXT: ld1 { v0.b }[0], [x0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i32 1
%l1 = load <8 x i8>, ptr %q
%l2 = load i8, ptr %p
%s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6>
%ins = insertelement <8 x i8> %s, i8 %l2, i32 0
ret <8 x i8> %ins
}
define <8 x i16> @wrong_exttype(ptr %p) {
; CHECK-LABEL: wrong_exttype:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur d0, [x0, #1]
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #14
; CHECK-NEXT: ld1 { v0.h }[0], [x0]
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i32 1
%l1 = load <8 x i8>, ptr %q
%s1 = sext <8 x i8> %l1 to <8 x i16>
%l2 = load i16, ptr %p
%s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
%ins = insertelement <8 x i16> %s, i16 %l2, i32 0
ret <8 x i16> %ins
}
define <4 x i32> @wrong_exttype2(ptr %p) {
; CHECK-LABEL: wrong_exttype2:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur s0, [x0, #1]
; CHECK-NEXT: ldrsh w8, [x0]
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #12
; CHECK-NEXT: mov v0.s[0], w8
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i32 1
%l1 = load <4 x i8>, ptr %q
%s1 = sext <4 x i8> %l1 to <4 x i32>
%l2 = load i16, ptr %p
%s2 = sext i16 %l2 to i32
%s = shufflevector <4 x i32> %s1, <4 x i32> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 2>
%ins = insertelement <4 x i32> %s, i32 %s2, i32 0
ret <4 x i32> %ins
}
define <8 x i8> @wrong_offsetfirst(ptr %p) {
; CHECK-LABEL: wrong_offsetfirst:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur d0, [x0, #-1]
; CHECK-NEXT: ext v0.8b, v0.8b, v0.8b, #7
; CHECK-NEXT: ld1 { v0.b }[0], [x0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i32 -1
%l1 = load <8 x i8>, ptr %q
%l2 = load i8, ptr %p
%s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
%ins = insertelement <8 x i8> %s, i8 %l2, i32 0
ret <8 x i8> %ins
}
define <8 x i8> @wrong_offsetlast(ptr %p) {
; CHECK-LABEL: wrong_offsetlast:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: add x8, x0, #7
; CHECK-NEXT: ext v0.8b, v0.8b, v0.8b, #1
; CHECK-NEXT: ld1 { v0.b }[7], [x8]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i32 7
%l1 = load <8 x i8>, ptr %p
%l2 = load i8, ptr %q
%s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
%ins = insertelement <8 x i8> %s, i8 %l2, i32 7
ret <8 x i8> %ins
}
define <8 x i8> @storebetween(ptr %p, ptr %r) {
; CHECK-LABEL: storebetween:
; CHECK: // %bb.0:
; CHECK-NEXT: ldur d0, [x0, #1]
; CHECK-NEXT: strb wzr, [x1]
; CHECK-NEXT: ext v0.8b, v0.8b, v0.8b, #7
; CHECK-NEXT: ld1 { v0.b }[0], [x0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i32 1
%l1 = load <8 x i8>, ptr %q
store i8 0, ptr %r
%l2 = load i8, ptr %p
%s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
%ins = insertelement <8 x i8> %s, i8 %l2, i32 0
ret <8 x i8> %ins
}
define <8 x i8> @storebefore(ptr %p, ptr %r) {
; CHECK-LABEL: storebefore:
; CHECK: // %bb.0:
; CHECK-NEXT: strb wzr, [x1]
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i32 1
store i8 0, ptr %r
%l1 = load <8 x i8>, ptr %q
%l2 = load i8, ptr %p
%s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
%ins = insertelement <8 x i8> %s, i8 %l2, i32 0
ret <8 x i8> %ins
}
define <8 x i8> @storeafter(ptr %p, ptr %r) {
; CHECK-LABEL: storeafter:
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: strb wzr, [x1]
; CHECK-NEXT: ret
%q = getelementptr inbounds i8, ptr %p, i32 1
%l1 = load <8 x i8>, ptr %q
%l2 = load i8, ptr %p
store i8 0, ptr %r
%s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
%ins = insertelement <8 x i8> %s, i8 %l2, i32 0
ret <8 x i8> %ins
}