llvm/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc -global-isel=1 -mtriple=arm64-apple-ios7.0 -o - %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI

@ptr = global ptr null

define <8 x i8> @test_v8i8_pre_load(ptr %addr) {
; CHECK-LABEL: test_v8i8_pre_load:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr d0, [x0, #40]!
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <8 x i8>, ptr %addr, i32 5
  %val = load <8 x i8>, ptr %newaddr, align 8
  store ptr %newaddr, ptr @ptr
  ret <8 x i8> %val
}

define <8 x i8> @test_v8i8_post_load(ptr %addr) {
; CHECK-LABEL: test_v8i8_post_load:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr d0, [x0], #40
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <8 x i8>, ptr %addr, i32 5
  %val = load <8 x i8>, ptr %addr, align 8
  store ptr %newaddr, ptr @ptr
  ret <8 x i8> %val
}

define void @test_v8i8_pre_store(<8 x i8> %in, ptr %addr) {
; CHECK-LABEL: test_v8i8_pre_store:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str d0, [x0, #40]!
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <8 x i8>, ptr %addr, i32 5
  store <8 x i8> %in, ptr %newaddr, align 8
  store ptr %newaddr, ptr @ptr
  ret void
}

define void @test_v8i8_post_store(<8 x i8> %in, ptr %addr) {
; CHECK-LABEL: test_v8i8_post_store:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str d0, [x0], #40
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <8 x i8>, ptr %addr, i32 5
  store <8 x i8> %in, ptr %addr, align 8
  store ptr %newaddr, ptr @ptr
  ret void
}

define <4 x i16> @test_v4i16_pre_load(ptr %addr) {
; CHECK-LABEL: test_v4i16_pre_load:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr d0, [x0, #40]!
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <4 x i16>, ptr %addr, i32 5
  %val = load <4 x i16>, ptr %newaddr, align 8
  store ptr %newaddr, ptr @ptr
  ret <4 x i16> %val
}

define <4 x i16> @test_v4i16_post_load(ptr %addr) {
; CHECK-LABEL: test_v4i16_post_load:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr d0, [x0], #40
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <4 x i16>, ptr %addr, i32 5
  %val = load <4 x i16>, ptr %addr, align 8
  store ptr %newaddr, ptr @ptr
  ret <4 x i16> %val
}

define void @test_v4i16_pre_store(<4 x i16> %in, ptr %addr) {
; CHECK-LABEL: test_v4i16_pre_store:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str d0, [x0, #40]!
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <4 x i16>, ptr %addr, i32 5
  store <4 x i16> %in, ptr %newaddr, align 8
  store ptr %newaddr, ptr @ptr
  ret void
}

define void @test_v4i16_post_store(<4 x i16> %in, ptr %addr) {
; CHECK-LABEL: test_v4i16_post_store:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str d0, [x0], #40
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <4 x i16>, ptr %addr, i32 5
  store <4 x i16> %in, ptr %addr, align 8
  store ptr %newaddr, ptr @ptr
  ret void
}

define <2 x i32> @test_v2i32_pre_load(ptr %addr) {
; CHECK-LABEL: test_v2i32_pre_load:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr d0, [x0, #40]!
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <2 x i32>, ptr %addr, i32 5
  %val = load <2 x i32>, ptr %newaddr, align 8
  store ptr %newaddr, ptr @ptr
  ret <2 x i32> %val
}

define <2 x i32> @test_v2i32_post_load(ptr %addr) {
; CHECK-LABEL: test_v2i32_post_load:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr d0, [x0], #40
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <2 x i32>, ptr %addr, i32 5
  %val = load <2 x i32>, ptr %addr, align 8
  store ptr %newaddr, ptr @ptr
  ret <2 x i32> %val
}

define void @test_v2i32_pre_store(<2 x i32> %in, ptr %addr) {
; CHECK-LABEL: test_v2i32_pre_store:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str d0, [x0, #40]!
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <2 x i32>, ptr %addr, i32 5
  store <2 x i32> %in, ptr %newaddr, align 8
  store ptr %newaddr, ptr @ptr
  ret void
}

define void @test_v2i32_post_store(<2 x i32> %in, ptr %addr) {
; CHECK-LABEL: test_v2i32_post_store:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str d0, [x0], #40
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <2 x i32>, ptr %addr, i32 5
  store <2 x i32> %in, ptr %addr, align 8
  store ptr %newaddr, ptr @ptr
  ret void
}

define <2 x float> @test_v2f32_pre_load(ptr %addr) {
; CHECK-LABEL: test_v2f32_pre_load:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr d0, [x0, #40]!
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <2 x float>, ptr %addr, i32 5
  %val = load <2 x float>, ptr %newaddr, align 8
  store ptr %newaddr, ptr @ptr
  ret <2 x float> %val
}

define <2 x float> @test_v2f32_post_load(ptr %addr) {
; CHECK-LABEL: test_v2f32_post_load:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr d0, [x0], #40
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <2 x float>, ptr %addr, i32 5
  %val = load <2 x float>, ptr %addr, align 8
  store ptr %newaddr, ptr @ptr
  ret <2 x float> %val
}

define void @test_v2f32_pre_store(<2 x float> %in, ptr %addr) {
; CHECK-LABEL: test_v2f32_pre_store:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str d0, [x0, #40]!
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <2 x float>, ptr %addr, i32 5
  store <2 x float> %in, ptr %newaddr, align 8
  store ptr %newaddr, ptr @ptr
  ret void
}

define void @test_v2f32_post_store(<2 x float> %in, ptr %addr) {
; CHECK-LABEL: test_v2f32_post_store:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str d0, [x0], #40
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <2 x float>, ptr %addr, i32 5
  store <2 x float> %in, ptr %addr, align 8
  store ptr %newaddr, ptr @ptr
  ret void
}

define <1 x i64> @test_v1i64_pre_load(ptr %addr) {
; CHECK-SD-LABEL: test_v1i64_pre_load:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ldr d0, [x0, #40]!
; CHECK-SD-NEXT:    adrp x8, _ptr@PAGE
; CHECK-SD-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_pre_load:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ldr x8, [x0, #40]!
; CHECK-GI-NEXT:    adrp x9, _ptr@PAGE
; CHECK-GI-NEXT:    str x0, [x9, _ptr@PAGEOFF]
; CHECK-GI-NEXT:    fmov d0, x8
; CHECK-GI-NEXT:    ret
  %newaddr = getelementptr <1 x i64>, ptr %addr, i32 5
  %val = load <1 x i64>, ptr %newaddr, align 8
  store ptr %newaddr, ptr @ptr
  ret <1 x i64> %val
}

define <1 x i64> @test_v1i64_post_load(ptr %addr) {
; CHECK-SD-LABEL: test_v1i64_post_load:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ldr d0, [x0], #40
; CHECK-SD-NEXT:    adrp x8, _ptr@PAGE
; CHECK-SD-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_load:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ldr x8, [x0], #40
; CHECK-GI-NEXT:    adrp x9, _ptr@PAGE
; CHECK-GI-NEXT:    str x0, [x9, _ptr@PAGEOFF]
; CHECK-GI-NEXT:    fmov d0, x8
; CHECK-GI-NEXT:    ret
  %newaddr = getelementptr <1 x i64>, ptr %addr, i32 5
  %val = load <1 x i64>, ptr %addr, align 8
  store ptr %newaddr, ptr @ptr
  ret <1 x i64> %val
}

define void @test_v1i64_pre_store(<1 x i64> %in, ptr %addr) {
; CHECK-LABEL: test_v1i64_pre_store:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str d0, [x0, #40]!
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <1 x i64>, ptr %addr, i32 5
  store <1 x i64> %in, ptr %newaddr, align 8
  store ptr %newaddr, ptr @ptr
  ret void
}

define void @test_v1i64_post_store(<1 x i64> %in, ptr %addr) {
; CHECK-LABEL: test_v1i64_post_store:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str d0, [x0], #40
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <1 x i64>, ptr %addr, i32 5
  store <1 x i64> %in, ptr %addr, align 8
  store ptr %newaddr, ptr @ptr
  ret void
}

define <16 x i8> @test_v16i8_pre_load(ptr %addr) {
; CHECK-LABEL: test_v16i8_pre_load:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr q0, [x0, #80]!
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <16 x i8>, ptr %addr, i32 5
  %val = load <16 x i8>, ptr %newaddr, align 8
  store ptr %newaddr, ptr @ptr
  ret <16 x i8> %val
}

define <16 x i8> @test_v16i8_post_load(ptr %addr) {
; CHECK-LABEL: test_v16i8_post_load:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr q0, [x0], #80
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <16 x i8>, ptr %addr, i32 5
  %val = load <16 x i8>, ptr %addr, align 8
  store ptr %newaddr, ptr @ptr
  ret <16 x i8> %val
}

define void @test_v16i8_pre_store(<16 x i8> %in, ptr %addr) {
; CHECK-LABEL: test_v16i8_pre_store:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str q0, [x0, #80]!
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <16 x i8>, ptr %addr, i32 5
  store <16 x i8> %in, ptr %newaddr, align 8
  store ptr %newaddr, ptr @ptr
  ret void
}

define void @test_v16i8_post_store(<16 x i8> %in, ptr %addr) {
; CHECK-LABEL: test_v16i8_post_store:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str q0, [x0], #80
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <16 x i8>, ptr %addr, i32 5
  store <16 x i8> %in, ptr %addr, align 8
  store ptr %newaddr, ptr @ptr
  ret void
}

define <8 x i16> @test_v8i16_pre_load(ptr %addr) {
; CHECK-LABEL: test_v8i16_pre_load:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr q0, [x0, #80]!
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <8 x i16>, ptr %addr, i32 5
  %val = load <8 x i16>, ptr %newaddr, align 8
  store ptr %newaddr, ptr @ptr
  ret <8 x i16> %val
}

define <8 x i16> @test_v8i16_post_load(ptr %addr) {
; CHECK-LABEL: test_v8i16_post_load:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr q0, [x0], #80
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <8 x i16>, ptr %addr, i32 5
  %val = load <8 x i16>, ptr %addr, align 8
  store ptr %newaddr, ptr @ptr
  ret <8 x i16> %val
}

define void @test_v8i16_pre_store(<8 x i16> %in, ptr %addr) {
; CHECK-LABEL: test_v8i16_pre_store:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str q0, [x0, #80]!
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <8 x i16>, ptr %addr, i32 5
  store <8 x i16> %in, ptr %newaddr, align 8
  store ptr %newaddr, ptr @ptr
  ret void
}

define void @test_v8i16_post_store(<8 x i16> %in, ptr %addr) {
; CHECK-LABEL: test_v8i16_post_store:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str q0, [x0], #80
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <8 x i16>, ptr %addr, i32 5
  store <8 x i16> %in, ptr %addr, align 8
  store ptr %newaddr, ptr @ptr
  ret void
}

define <4 x i32> @test_v4i32_pre_load(ptr %addr) {
; CHECK-LABEL: test_v4i32_pre_load:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr q0, [x0, #80]!
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <4 x i32>, ptr %addr, i32 5
  %val = load <4 x i32>, ptr %newaddr, align 8
  store ptr %newaddr, ptr @ptr
  ret <4 x i32> %val
}

define <4 x i32> @test_v4i32_post_load(ptr %addr) {
; CHECK-LABEL: test_v4i32_post_load:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr q0, [x0], #80
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <4 x i32>, ptr %addr, i32 5
  %val = load <4 x i32>, ptr %addr, align 8
  store ptr %newaddr, ptr @ptr
  ret <4 x i32> %val
}

define void @test_v4i32_pre_store(<4 x i32> %in, ptr %addr) {
; CHECK-LABEL: test_v4i32_pre_store:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str q0, [x0, #80]!
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <4 x i32>, ptr %addr, i32 5
  store <4 x i32> %in, ptr %newaddr, align 8
  store ptr %newaddr, ptr @ptr
  ret void
}

define void @test_v4i32_post_store(<4 x i32> %in, ptr %addr) {
; CHECK-LABEL: test_v4i32_post_store:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str q0, [x0], #80
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <4 x i32>, ptr %addr, i32 5
  store <4 x i32> %in, ptr %addr, align 8
  store ptr %newaddr, ptr @ptr
  ret void
}


define <4 x float> @test_v4f32_pre_load(ptr %addr) {
; CHECK-LABEL: test_v4f32_pre_load:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr q0, [x0, #80]!
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <4 x float>, ptr %addr, i32 5
  %val = load <4 x float>, ptr %newaddr, align 8
  store ptr %newaddr, ptr @ptr
  ret <4 x float> %val
}

define <4 x float> @test_v4f32_post_load(ptr %addr) {
; CHECK-LABEL: test_v4f32_post_load:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr q0, [x0], #80
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <4 x float>, ptr %addr, i32 5
  %val = load <4 x float>, ptr %addr, align 8
  store ptr %newaddr, ptr @ptr
  ret <4 x float> %val
}

define void @test_v4f32_pre_store(<4 x float> %in, ptr %addr) {
; CHECK-LABEL: test_v4f32_pre_store:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str q0, [x0, #80]!
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <4 x float>, ptr %addr, i32 5
  store <4 x float> %in, ptr %newaddr, align 8
  store ptr %newaddr, ptr @ptr
  ret void
}

define void @test_v4f32_post_store(<4 x float> %in, ptr %addr) {
; CHECK-LABEL: test_v4f32_post_store:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str q0, [x0], #80
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <4 x float>, ptr %addr, i32 5
  store <4 x float> %in, ptr %addr, align 8
  store ptr %newaddr, ptr @ptr
  ret void
}


define <2 x i64> @test_v2i64_pre_load(ptr %addr) {
; CHECK-LABEL: test_v2i64_pre_load:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr q0, [x0, #80]!
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <2 x i64>, ptr %addr, i32 5
  %val = load <2 x i64>, ptr %newaddr, align 8
  store ptr %newaddr, ptr @ptr
  ret <2 x i64> %val
}

define <2 x i64> @test_v2i64_post_load(ptr %addr) {
; CHECK-LABEL: test_v2i64_post_load:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr q0, [x0], #80
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <2 x i64>, ptr %addr, i32 5
  %val = load <2 x i64>, ptr %addr, align 8
  store ptr %newaddr, ptr @ptr
  ret <2 x i64> %val
}

define void @test_v2i64_pre_store(<2 x i64> %in, ptr %addr) {
; CHECK-LABEL: test_v2i64_pre_store:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str q0, [x0, #80]!
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <2 x i64>, ptr %addr, i32 5
  store <2 x i64> %in, ptr %newaddr, align 8
  store ptr %newaddr, ptr @ptr
  ret void
}

define void @test_v2i64_post_store(<2 x i64> %in, ptr %addr) {
; CHECK-LABEL: test_v2i64_post_store:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str q0, [x0], #80
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <2 x i64>, ptr %addr, i32 5
  store <2 x i64> %in, ptr %addr, align 8
  store ptr %newaddr, ptr @ptr
  ret void
}


define <2 x double> @test_v2f64_pre_load(ptr %addr) {
; CHECK-LABEL: test_v2f64_pre_load:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr q0, [x0, #80]!
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <2 x double>, ptr %addr, i32 5
  %val = load <2 x double>, ptr %newaddr, align 8
  store ptr %newaddr, ptr @ptr
  ret <2 x double> %val
}

define <2 x double> @test_v2f64_post_load(ptr %addr) {
; CHECK-LABEL: test_v2f64_post_load:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr q0, [x0], #80
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <2 x double>, ptr %addr, i32 5
  %val = load <2 x double>, ptr %addr, align 8
  store ptr %newaddr, ptr @ptr
  ret <2 x double> %val
}

define void @test_v2f64_pre_store(<2 x double> %in, ptr %addr) {
; CHECK-LABEL: test_v2f64_pre_store:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str q0, [x0, #80]!
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <2 x double>, ptr %addr, i32 5
  store <2 x double> %in, ptr %newaddr, align 8
  store ptr %newaddr, ptr @ptr
  ret void
}

define void @test_v2f64_post_store(<2 x double> %in, ptr %addr) {
; CHECK-LABEL: test_v2f64_post_store:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    adrp x8, _ptr@PAGE
; CHECK-NEXT:    str q0, [x0], #80
; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
; CHECK-NEXT:    ret
  %newaddr = getelementptr <2 x double>, ptr %addr, i32 5
  store <2 x double> %in, ptr %addr, align 8
  store ptr %newaddr, ptr @ptr
  ret void
}

define ptr @test_v16i8_post_imm_st1_lane(<16 x i8> %in, ptr %addr) {
; CHECK-SD-LABEL: test_v16i8_post_imm_st1_lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    st1.b { v0 }[3], [x0], #1
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_st1_lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov b0, v0[3]
; CHECK-GI-NEXT:    str b0, [x0], #1
; CHECK-GI-NEXT:    ret
  %elt = extractelement <16 x i8> %in, i32 3
  store i8 %elt, ptr %addr

  %newaddr = getelementptr i8, ptr %addr, i32 1
  ret ptr %newaddr
}

define ptr @test_v16i8_post_reg_st1_lane(<16 x i8> %in, ptr %addr) {
; CHECK-SD-LABEL: test_v16i8_post_reg_st1_lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    mov w8, #2 ; =0x2
; CHECK-SD-NEXT:    st1.b { v0 }[3], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_st1_lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov b0, v0[3]
; CHECK-GI-NEXT:    str b0, [x0], #2
; CHECK-GI-NEXT:    ret
  %elt = extractelement <16 x i8> %in, i32 3
  store i8 %elt, ptr %addr

  %newaddr = getelementptr i8, ptr %addr, i32 2
  ret ptr %newaddr
}


define ptr @test_v8i16_post_imm_st1_lane(<8 x i16> %in, ptr %addr) {
; CHECK-SD-LABEL: test_v8i16_post_imm_st1_lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    st1.h { v0 }[3], [x0], #2
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_st1_lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov h0, v0[3]
; CHECK-GI-NEXT:    str h0, [x0], #2
; CHECK-GI-NEXT:    ret
  %elt = extractelement <8 x i16> %in, i32 3
  store i16 %elt, ptr %addr

  %newaddr = getelementptr i16, ptr %addr, i32 1
  ret ptr %newaddr
}

define ptr @test_v8i16_post_reg_st1_lane(<8 x i16> %in, ptr %addr) {
; CHECK-SD-LABEL: test_v8i16_post_reg_st1_lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    mov w8, #4 ; =0x4
; CHECK-SD-NEXT:    st1.h { v0 }[3], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_st1_lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov h0, v0[3]
; CHECK-GI-NEXT:    str h0, [x0], #4
; CHECK-GI-NEXT:    ret
  %elt = extractelement <8 x i16> %in, i32 3
  store i16 %elt, ptr %addr

  %newaddr = getelementptr i16, ptr %addr, i32 2
  ret ptr %newaddr
}

define ptr @test_v4i32_post_imm_st1_lane(<4 x i32> %in, ptr %addr) {
; CHECK-SD-LABEL: test_v4i32_post_imm_st1_lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    st1.s { v0 }[3], [x0], #4
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_st1_lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov s0, v0[3]
; CHECK-GI-NEXT:    str s0, [x0], #4
; CHECK-GI-NEXT:    ret
  %elt = extractelement <4 x i32> %in, i32 3
  store i32 %elt, ptr %addr

  %newaddr = getelementptr i32, ptr %addr, i32 1
  ret ptr %newaddr
}

define ptr @test_v4i32_post_reg_st1_lane(<4 x i32> %in, ptr %addr) {
; CHECK-SD-LABEL: test_v4i32_post_reg_st1_lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    mov w8, #8 ; =0x8
; CHECK-SD-NEXT:    st1.s { v0 }[3], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_st1_lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov s0, v0[3]
; CHECK-GI-NEXT:    str s0, [x0], #8
; CHECK-GI-NEXT:    ret
  %elt = extractelement <4 x i32> %in, i32 3
  store i32 %elt, ptr %addr

  %newaddr = getelementptr i32, ptr %addr, i32 2
  ret ptr %newaddr
}

define ptr @test_v4f32_post_imm_st1_lane(<4 x float> %in, ptr %addr) {
; CHECK-SD-LABEL: test_v4f32_post_imm_st1_lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    st1.s { v0 }[3], [x0], #4
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_st1_lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov s0, v0[3]
; CHECK-GI-NEXT:    str s0, [x0], #4
; CHECK-GI-NEXT:    ret
  %elt = extractelement <4 x float> %in, i32 3
  store float %elt, ptr %addr

  %newaddr = getelementptr float, ptr %addr, i32 1
  ret ptr %newaddr
}

define ptr @test_v4f32_post_reg_st1_lane(<4 x float> %in, ptr %addr) {
; CHECK-SD-LABEL: test_v4f32_post_reg_st1_lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    mov w8, #8 ; =0x8
; CHECK-SD-NEXT:    st1.s { v0 }[3], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_st1_lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov s0, v0[3]
; CHECK-GI-NEXT:    str s0, [x0], #8
; CHECK-GI-NEXT:    ret
  %elt = extractelement <4 x float> %in, i32 3
  store float %elt, ptr %addr

  %newaddr = getelementptr float, ptr %addr, i32 2
  ret ptr %newaddr
}

define ptr @test_v2i64_post_imm_st1_lane(<2 x i64> %in, ptr %addr) {
; CHECK-SD-LABEL: test_v2i64_post_imm_st1_lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    st1.d { v0 }[1], [x0], #8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_st1_lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov d0, v0[1]
; CHECK-GI-NEXT:    str d0, [x0], #8
; CHECK-GI-NEXT:    ret
  %elt = extractelement <2 x i64> %in, i64 1
  store i64 %elt, ptr %addr

  %newaddr = getelementptr i64, ptr %addr, i64 1
  ret ptr %newaddr
}

define ptr @test_v2i64_post_reg_st1_lane(<2 x i64> %in, ptr %addr) {
; CHECK-SD-LABEL: test_v2i64_post_reg_st1_lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    mov w8, #16 ; =0x10
; CHECK-SD-NEXT:    st1.d { v0 }[1], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_st1_lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov d0, v0[1]
; CHECK-GI-NEXT:    str d0, [x0], #16
; CHECK-GI-NEXT:    ret
  %elt = extractelement <2 x i64> %in, i64 1
  store i64 %elt, ptr %addr

  %newaddr = getelementptr i64, ptr %addr, i64 2
  ret ptr %newaddr
}

define ptr @test_v2f64_post_imm_st1_lane(<2 x double> %in, ptr %addr) {
; CHECK-SD-LABEL: test_v2f64_post_imm_st1_lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    st1.d { v0 }[1], [x0], #8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_st1_lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov d0, v0[1]
; CHECK-GI-NEXT:    str d0, [x0], #8
; CHECK-GI-NEXT:    ret
  %elt = extractelement <2 x double> %in, i32 1
  store double %elt, ptr %addr

  %newaddr = getelementptr double, ptr %addr, i32 1
  ret ptr %newaddr
}

define ptr @test_v2f64_post_reg_st1_lane(<2 x double> %in, ptr %addr) {
; CHECK-SD-LABEL: test_v2f64_post_reg_st1_lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    mov w8, #16 ; =0x10
; CHECK-SD-NEXT:    st1.d { v0 }[1], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_st1_lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov d0, v0[1]
; CHECK-GI-NEXT:    str d0, [x0], #16
; CHECK-GI-NEXT:    ret
  %elt = extractelement <2 x double> %in, i32 1
  store double %elt, ptr %addr

  %newaddr = getelementptr double, ptr %addr, i32 2
  ret ptr %newaddr
}

define ptr @test_v8i8_post_imm_st1_lane(<8 x i8> %in, ptr %addr) {
; CHECK-SD-LABEL: test_v8i8_post_imm_st1_lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    st1.b { v0 }[3], [x0], #1
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_st1_lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    mov b0, v0[3]
; CHECK-GI-NEXT:    str b0, [x0], #1
; CHECK-GI-NEXT:    ret
  %elt = extractelement <8 x i8> %in, i32 3
  store i8 %elt, ptr %addr

  %newaddr = getelementptr i8, ptr %addr, i32 1
  ret ptr %newaddr
}

define ptr @test_v8i8_post_reg_st1_lane(<8 x i8> %in, ptr %addr) {
; CHECK-SD-LABEL: test_v8i8_post_reg_st1_lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    mov w8, #2 ; =0x2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    st1.b { v0 }[3], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_st1_lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    mov b0, v0[3]
; CHECK-GI-NEXT:    str b0, [x0], #2
; CHECK-GI-NEXT:    ret
  %elt = extractelement <8 x i8> %in, i32 3
  store i8 %elt, ptr %addr

  %newaddr = getelementptr i8, ptr %addr, i32 2
  ret ptr %newaddr
}

define ptr @test_v4i16_post_imm_st1_lane(<4 x i16> %in, ptr %addr) {
; CHECK-SD-LABEL: test_v4i16_post_imm_st1_lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    st1.h { v0 }[3], [x0], #2
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_st1_lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    mov h0, v0[3]
; CHECK-GI-NEXT:    str h0, [x0], #2
; CHECK-GI-NEXT:    ret
  %elt = extractelement <4 x i16> %in, i32 3
  store i16 %elt, ptr %addr

  %newaddr = getelementptr i16, ptr %addr, i32 1
  ret ptr %newaddr
}

define ptr @test_v4i16_post_reg_st1_lane(<4 x i16> %in, ptr %addr) {
; CHECK-SD-LABEL: test_v4i16_post_reg_st1_lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    mov w8, #4 ; =0x4
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    st1.h { v0 }[3], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_st1_lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    mov h0, v0[3]
; CHECK-GI-NEXT:    str h0, [x0], #4
; CHECK-GI-NEXT:    ret
  %elt = extractelement <4 x i16> %in, i32 3
  store i16 %elt, ptr %addr

  %newaddr = getelementptr i16, ptr %addr, i32 2
  ret ptr %newaddr
}

define ptr @test_v2i32_post_imm_st1_lane(<2 x i32> %in, ptr %addr) {
; CHECK-SD-LABEL: test_v2i32_post_imm_st1_lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    st1.s { v0 }[1], [x0], #4
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_st1_lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    mov s0, v0[1]
; CHECK-GI-NEXT:    str s0, [x0], #4
; CHECK-GI-NEXT:    ret
  %elt = extractelement <2 x i32> %in, i32 1
  store i32 %elt, ptr %addr

  %newaddr = getelementptr i32, ptr %addr, i32 1
  ret ptr %newaddr
}

define ptr @test_v2i32_post_reg_st1_lane(<2 x i32> %in, ptr %addr) {
; CHECK-SD-LABEL: test_v2i32_post_reg_st1_lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    mov w8, #8 ; =0x8
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    st1.s { v0 }[1], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_st1_lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    mov s0, v0[1]
; CHECK-GI-NEXT:    str s0, [x0], #8
; CHECK-GI-NEXT:    ret
  %elt = extractelement <2 x i32> %in, i32 1
  store i32 %elt, ptr %addr

  %newaddr = getelementptr i32, ptr %addr, i32 2
  ret ptr %newaddr
}

define ptr @test_v2f32_post_imm_st1_lane(<2 x float> %in, ptr %addr) {
; CHECK-SD-LABEL: test_v2f32_post_imm_st1_lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    st1.s { v0 }[1], [x0], #4
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_st1_lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    mov s0, v0[1]
; CHECK-GI-NEXT:    str s0, [x0], #4
; CHECK-GI-NEXT:    ret
  %elt = extractelement <2 x float> %in, i32 1
  store float %elt, ptr %addr

  %newaddr = getelementptr float, ptr %addr, i32 1
  ret ptr %newaddr
}

define ptr @test_v2f32_post_reg_st1_lane(<2 x float> %in, ptr %addr) {
; CHECK-SD-LABEL: test_v2f32_post_reg_st1_lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    mov w8, #8 ; =0x8
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    st1.s { v0 }[1], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_st1_lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    mov s0, v0[1]
; CHECK-GI-NEXT:    str s0, [x0], #8
; CHECK-GI-NEXT:    ret
  %elt = extractelement <2 x float> %in, i32 1
  store float %elt, ptr %addr

  %newaddr = getelementptr float, ptr %addr, i32 2
  ret ptr %newaddr
}

define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v16i8_post_imm_ld2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld2.16b { v0, v1 }, [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_ld2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2.16b { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 32
  store ptr %tmp, ptr %ptr
  ret { <16 x i8>, <16 x i8> } %ld2
}

define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v16i8_post_reg_ld2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld2.16b { v0, v1 }, [x0], x2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_ld2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2.16b { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <16 x i8>, <16 x i8> } %ld2
}

declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr)


define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v8i8_post_imm_ld2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld2.8b { v0, v1 }, [x0], #16
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_ld2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2.8b { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #16
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 16
  store ptr %tmp, ptr %ptr
  ret { <8 x i8>, <8 x i8> } %ld2
}

define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v8i8_post_reg_ld2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld2.8b { v0, v1 }, [x0], x2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_ld2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2.8b { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <8 x i8>, <8 x i8> } %ld2
}

declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr)


define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v8i16_post_imm_ld2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld2.8h { v0, v1 }, [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_ld2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2.8h { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 16
  store ptr %tmp, ptr %ptr
  ret { <8 x i16>, <8 x i16> } %ld2
}

define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v8i16_post_reg_ld2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ld2.8h { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_ld2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2.8h { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <8 x i16>, <8 x i16> } %ld2
}

declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr)


define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v4i16_post_imm_ld2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld2.4h { v0, v1 }, [x0], #16
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_ld2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2.4h { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #16
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 8
  store ptr %tmp, ptr %ptr
  ret { <4 x i16>, <4 x i16> } %ld2
}

define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v4i16_post_reg_ld2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ld2.4h { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_ld2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2.4h { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x i16>, <4 x i16> } %ld2
}

declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr)


define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v4i32_post_imm_ld2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld2.4s { v0, v1 }, [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_ld2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2.4s { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 8
  store ptr %tmp, ptr %ptr
  ret { <4 x i32>, <4 x i32> } %ld2
}

define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v4i32_post_reg_ld2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld2.4s { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_ld2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2.4s { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x i32>, <4 x i32> } %ld2
}

declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr)


define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v2i32_post_imm_ld2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld2.2s { v0, v1 }, [x0], #16
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_ld2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2.2s { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #16
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <2 x i32>, <2 x i32> } %ld2
}

define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v2i32_post_reg_ld2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld2.2s { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_ld2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2.2s { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x i32>, <2 x i32> } %ld2
}

declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0(ptr)


define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v2i64_post_imm_ld2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld2.2d { v0, v1 }, [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_ld2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2.2d { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <2 x i64>, <2 x i64> } %ld2
}

define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v2i64_post_reg_ld2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld2.2d { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_ld2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2.2d { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x i64>, <2 x i64> } %ld2
}

declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr)


define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v1i64_post_imm_ld2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.1d { v0, v1 }, [x0], #16
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_ld2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.1d { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #16
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i32 2
  store ptr %tmp, ptr %ptr
  ret { <1 x i64>, <1 x i64> } %ld2
}

define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v1i64_post_reg_ld2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld1.1d { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_ld2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.1d { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <1 x i64>, <1 x i64> } %ld2
}

declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0(ptr)


define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v4f32_post_imm_ld2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld2.4s { v0, v1 }, [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_ld2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2.4s { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i32 8
  store ptr %tmp, ptr %ptr
  ret { <4 x float>, <4 x float> } %ld2
}

define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v4f32_post_reg_ld2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld2.4s { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_ld2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2.4s { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x float>, <4 x float> } %ld2
}

declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr)


define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v2f32_post_imm_ld2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld2.2s { v0, v1 }, [x0], #16
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_ld2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2.2s { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #16
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <2 x float>, <2 x float> } %ld2
}

define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v2f32_post_reg_ld2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld2.2s { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_ld2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2.2s { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x float>, <2 x float> } %ld2
}

declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0(ptr)


define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v2f64_post_imm_ld2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld2.2d { v0, v1 }, [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_ld2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2.2d { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <2 x double>, <2 x double> } %ld2
}

define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v2f64_post_reg_ld2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld2.2d { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_ld2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2.2d { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x double>, <2 x double> } %ld2
}

declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0(ptr)


define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v1f64_post_imm_ld2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.1d { v0, v1 }, [x0], #16
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_ld2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.1d { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #16
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i32 2
  store ptr %tmp, ptr %ptr
  ret { <1 x double>, <1 x double> } %ld2
}

define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v1f64_post_reg_ld2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld1.1d { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_ld2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.1d { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <1 x double>, <1 x double> } %ld2
}

declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0(ptr)


define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v16i8_post_imm_ld3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld3.16b { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_ld3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3.16b { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #48
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 48
  store ptr %tmp, ptr %ptr
  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
}

define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v16i8_post_reg_ld3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld3.16b { v0, v1, v2 }, [x0], x2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_ld3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3.16b { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
}

declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr)


define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v8i8_post_imm_ld3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld3.8b { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_ld3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3.8b { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #24
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 24
  store ptr %tmp, ptr %ptr
  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
}

define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v8i8_post_reg_ld3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld3.8b { v0, v1, v2 }, [x0], x2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_ld3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3.8b { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
}

declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr)


define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v8i16_post_imm_ld3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld3.8h { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_ld3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3.8h { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #48
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 24
  store ptr %tmp, ptr %ptr
  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
}

define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v8i16_post_reg_ld3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ld3.8h { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_ld3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3.8h { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
}

declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr)


define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v4i16_post_imm_ld3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld3.4h { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_ld3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3.4h { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #24
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 12
  store ptr %tmp, ptr %ptr
  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
}

define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v4i16_post_reg_ld3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ld3.4h { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_ld3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3.4h { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
}

declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr)


define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v4i32_post_imm_ld3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld3.4s { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_ld3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3.4s { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #48
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 12
  store ptr %tmp, ptr %ptr
  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
}

define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v4i32_post_reg_ld3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld3.4s { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_ld3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3.4s { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
}

declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr)


define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v2i32_post_imm_ld3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld3.2s { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_ld3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3.2s { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #24
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 6
  store ptr %tmp, ptr %ptr
  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
}

define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v2i32_post_reg_ld3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld3.2s { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_ld3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3.2s { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
}

declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0(ptr)


define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v2i64_post_imm_ld3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld3.2d { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_ld3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3.2d { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #48
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i32 6
  store ptr %tmp, ptr %ptr
  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
}

define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v2i64_post_reg_ld3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld3.2d { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_ld3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3.2d { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
}

declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr)


define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v1i64_post_imm_ld3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_ld3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #24
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i32 3
  store ptr %tmp, ptr %ptr
  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
}

define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v1i64_post_reg_ld3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_ld3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
}

declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0(ptr)


define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v4f32_post_imm_ld3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld3.4s { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_ld3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3.4s { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #48
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i32 12
  store ptr %tmp, ptr %ptr
  ret { <4 x float>, <4 x float>, <4 x float> } %ld3
}

define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v4f32_post_reg_ld3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld3.4s { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_ld3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3.4s { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x float>, <4 x float>, <4 x float> } %ld3
}

declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0(ptr)


define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v2f32_post_imm_ld3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld3.2s { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_ld3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3.2s { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #24
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i32 6
  store ptr %tmp, ptr %ptr
  ret { <2 x float>, <2 x float>, <2 x float> } %ld3
}

define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v2f32_post_reg_ld3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld3.2s { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_ld3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3.2s { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x float>, <2 x float>, <2 x float> } %ld3
}

declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0(ptr)


define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v2f64_post_imm_ld3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld3.2d { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_ld3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3.2d { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #48
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i32 6
  store ptr %tmp, ptr %ptr
  ret { <2 x double>, <2 x double>, <2 x double> } %ld3
}

define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v2f64_post_reg_ld3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld3.2d { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_ld3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3.2d { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x double>, <2 x double>, <2 x double> } %ld3
}

declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0(ptr)


define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v1f64_post_imm_ld3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_ld3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #24
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i32 3
  store ptr %tmp, ptr %ptr
  ret { <1 x double>, <1 x double>, <1 x double> } %ld3
}

define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v1f64_post_reg_ld3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_ld3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <1 x double>, <1 x double>, <1 x double> } %ld3
}

declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0(ptr)


define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v16i8_post_imm_ld4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld4.16b { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_ld4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4.16b { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #64
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 64
  store ptr %tmp, ptr %ptr
  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
}

define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v16i8_post_reg_ld4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld4.16b { v0, v1, v2, v3 }, [x0], x2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_ld4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4.16b { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
}

declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr)


define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v8i8_post_imm_ld4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld4.8b { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_ld4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4.8b { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 32
  store ptr %tmp, ptr %ptr
  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
}

define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v8i8_post_reg_ld4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld4.8b { v0, v1, v2, v3 }, [x0], x2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_ld4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4.8b { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
}

declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr)


define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v8i16_post_imm_ld4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld4.8h { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_ld4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4.8h { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #64
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 32
  store ptr %tmp, ptr %ptr
  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
}

define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v8i16_post_reg_ld4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ld4.8h { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_ld4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4.8h { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
}

declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0(ptr)


define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v4i16_post_imm_ld4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld4.4h { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_ld4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4.4h { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 16
  store ptr %tmp, ptr %ptr
  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
}

define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v4i16_post_reg_ld4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ld4.4h { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_ld4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4.4h { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
}

declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr)


define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v4i32_post_imm_ld4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld4.4s { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_ld4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4.4s { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #64
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 16
  store ptr %tmp, ptr %ptr
  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
}

define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v4i32_post_reg_ld4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld4.4s { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_ld4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4.4s { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
}

declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr)


define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v2i32_post_imm_ld4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld4.2s { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_ld4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4.2s { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 8
  store ptr %tmp, ptr %ptr
  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
}

define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v2i32_post_reg_ld4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld4.2s { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_ld4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4.2s { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
}

declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0(ptr)


define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v2i64_post_imm_ld4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld4.2d { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_ld4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4.2d { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #64
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i32 8
  store ptr %tmp, ptr %ptr
  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
}

define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v2i64_post_reg_ld4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld4.2d { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_ld4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4.2d { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
}

declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0(ptr)


define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v1i64_post_imm_ld4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_ld4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
}

define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v1i64_post_reg_ld4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_ld4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
}

declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0(ptr)


define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v4f32_post_imm_ld4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld4.4s { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_ld4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4.4s { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #64
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i32 16
  store ptr %tmp, ptr %ptr
  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
}

define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v4f32_post_reg_ld4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld4.4s { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_ld4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4.4s { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
}

declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0(ptr)


define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v2f32_post_imm_ld4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld4.2s { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_ld4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4.2s { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i32 8
  store ptr %tmp, ptr %ptr
  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
}

define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v2f32_post_reg_ld4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld4.2s { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_ld4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4.2s { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
}

declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0(ptr)


define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v2f64_post_imm_ld4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld4.2d { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_ld4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4.2d { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #64
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i32 8
  store ptr %tmp, ptr %ptr
  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
}

define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v2f64_post_reg_ld4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld4.2d { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_ld4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4.2d { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
}

declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0(ptr)


define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v1f64_post_imm_ld4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_ld4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
}

define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v1f64_post_reg_ld4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_ld4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
}

declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0(ptr)

define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x2(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v16i8_post_imm_ld1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.16b { v0, v1 }, [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_ld1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.16b { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 32
  store ptr %tmp, ptr %ptr
  ret { <16 x i8>, <16 x i8> } %ld1x2
}

define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v16i8_post_reg_ld1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.16b { v0, v1 }, [x0], x2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_ld1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.16b { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <16 x i8>, <16 x i8> } %ld1x2
}

declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0(ptr)


define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x2(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v8i8_post_imm_ld1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.8b { v0, v1 }, [x0], #16
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_ld1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.8b { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #16
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 16
  store ptr %tmp, ptr %ptr
  ret { <8 x i8>, <8 x i8> } %ld1x2
}

define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v8i8_post_reg_ld1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.8b { v0, v1 }, [x0], x2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_ld1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.8b { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <8 x i8>, <8 x i8> } %ld1x2
}

declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0(ptr)


define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x2(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v8i16_post_imm_ld1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.8h { v0, v1 }, [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_ld1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.8h { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 16
  store ptr %tmp, ptr %ptr
  ret { <8 x i16>, <8 x i16> } %ld1x2
}

define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v8i16_post_reg_ld1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ld1.8h { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_ld1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.8h { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <8 x i16>, <8 x i16> } %ld1x2
}

declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0(ptr)


define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x2(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v4i16_post_imm_ld1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.4h { v0, v1 }, [x0], #16
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_ld1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.4h { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #16
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 8
  store ptr %tmp, ptr %ptr
  ret { <4 x i16>, <4 x i16> } %ld1x2
}

define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v4i16_post_reg_ld1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ld1.4h { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_ld1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.4h { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x i16>, <4 x i16> } %ld1x2
}

declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0(ptr)


define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x2(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v4i32_post_imm_ld1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.4s { v0, v1 }, [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_ld1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.4s { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 8
  store ptr %tmp, ptr %ptr
  ret { <4 x i32>, <4 x i32> } %ld1x2
}

define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v4i32_post_reg_ld1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld1.4s { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_ld1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.4s { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x i32>, <4 x i32> } %ld1x2
}

declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0(ptr)


define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x2(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v2i32_post_imm_ld1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.2s { v0, v1 }, [x0], #16
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_ld1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.2s { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #16
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <2 x i32>, <2 x i32> } %ld1x2
}

define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v2i32_post_reg_ld1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld1.2s { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_ld1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.2s { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x i32>, <2 x i32> } %ld1x2
}

declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0(ptr)


define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x2(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v2i64_post_imm_ld1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.2d { v0, v1 }, [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_ld1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.2d { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <2 x i64>, <2 x i64> } %ld1x2
}

define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v2i64_post_reg_ld1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld1.2d { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_ld1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.2d { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x i64>, <2 x i64> } %ld1x2
}

declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0(ptr)


define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x2(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v1i64_post_imm_ld1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.1d { v0, v1 }, [x0], #16
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_ld1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.1d { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #16
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i32 2
  store ptr %tmp, ptr %ptr
  ret { <1 x i64>, <1 x i64> } %ld1x2
}

define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v1i64_post_reg_ld1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld1.1d { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_ld1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.1d { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <1 x i64>, <1 x i64> } %ld1x2
}

declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0(ptr)


define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x2(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v4f32_post_imm_ld1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.4s { v0, v1 }, [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_ld1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.4s { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i32 8
  store ptr %tmp, ptr %ptr
  ret { <4 x float>, <4 x float> } %ld1x2
}

define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v4f32_post_reg_ld1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld1.4s { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_ld1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.4s { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x float>, <4 x float> } %ld1x2
}

declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0(ptr)


define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x2(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v2f32_post_imm_ld1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.2s { v0, v1 }, [x0], #16
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_ld1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.2s { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #16
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <2 x float>, <2 x float> } %ld1x2
}

define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v2f32_post_reg_ld1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld1.2s { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_ld1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.2s { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x float>, <2 x float> } %ld1x2
}

declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0(ptr)


define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x2(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v2f64_post_imm_ld1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.2d { v0, v1 }, [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_ld1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.2d { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <2 x double>, <2 x double> } %ld1x2
}

define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v2f64_post_reg_ld1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld1.2d { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_ld1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.2d { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x double>, <2 x double> } %ld1x2
}

declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0(ptr)


define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x2(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v1f64_post_imm_ld1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.1d { v0, v1 }, [x0], #16
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_ld1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.1d { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #16
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i32 2
  store ptr %tmp, ptr %ptr
  ret { <1 x double>, <1 x double> } %ld1x2
}

define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v1f64_post_reg_ld1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld1.1d { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_ld1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.1d { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <1 x double>, <1 x double> } %ld1x2
}

declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0(ptr)


define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x3(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v16i8_post_imm_ld1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.16b { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_ld1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.16b { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #48
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 48
  store ptr %tmp, ptr %ptr
  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld1x3
}

define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v16i8_post_reg_ld1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.16b { v0, v1, v2 }, [x0], x2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_ld1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.16b { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld1x3
}

declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0(ptr)


define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x3(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v8i8_post_imm_ld1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.8b { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_ld1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.8b { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #24
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 24
  store ptr %tmp, ptr %ptr
  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld1x3
}

define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v8i8_post_reg_ld1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.8b { v0, v1, v2 }, [x0], x2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_ld1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.8b { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld1x3
}

declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0(ptr)


define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x3(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v8i16_post_imm_ld1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.8h { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_ld1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.8h { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #48
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 24
  store ptr %tmp, ptr %ptr
  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld1x3
}

define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v8i16_post_reg_ld1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ld1.8h { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_ld1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.8h { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld1x3
}

declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0(ptr)


define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x3(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v4i16_post_imm_ld1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.4h { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_ld1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.4h { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #24
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 12
  store ptr %tmp, ptr %ptr
  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld1x3
}

define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v4i16_post_reg_ld1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ld1.4h { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_ld1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.4h { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld1x3
}

declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0(ptr)


define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x3(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v4i32_post_imm_ld1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.4s { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_ld1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.4s { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #48
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 12
  store ptr %tmp, ptr %ptr
  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld1x3
}

define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v4i32_post_reg_ld1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld1.4s { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_ld1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.4s { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld1x3
}

declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0(ptr)


define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x3(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v2i32_post_imm_ld1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.2s { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_ld1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.2s { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #24
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 6
  store ptr %tmp, ptr %ptr
  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld1x3
}

define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v2i32_post_reg_ld1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld1.2s { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_ld1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.2s { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld1x3
}

declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0(ptr)


define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x3(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v2i64_post_imm_ld1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.2d { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_ld1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.2d { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #48
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i32 6
  store ptr %tmp, ptr %ptr
  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld1x3
}

define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v2i64_post_reg_ld1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld1.2d { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_ld1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.2d { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld1x3
}

declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0(ptr)


define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x3(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v1i64_post_imm_ld1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_ld1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #24
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i32 3
  store ptr %tmp, ptr %ptr
  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld1x3
}

define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v1i64_post_reg_ld1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_ld1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld1x3
}

declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0(ptr)


define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x3(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v4f32_post_imm_ld1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.4s { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_ld1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.4s { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #48
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i32 12
  store ptr %tmp, ptr %ptr
  ret { <4 x float>, <4 x float>, <4 x float> } %ld1x3
}

define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v4f32_post_reg_ld1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld1.4s { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_ld1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.4s { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x float>, <4 x float>, <4 x float> } %ld1x3
}

declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0(ptr)


define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x3(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v2f32_post_imm_ld1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.2s { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_ld1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.2s { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #24
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i32 6
  store ptr %tmp, ptr %ptr
  ret { <2 x float>, <2 x float>, <2 x float> } %ld1x3
}

define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v2f32_post_reg_ld1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld1.2s { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_ld1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.2s { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x float>, <2 x float>, <2 x float> } %ld1x3
}

declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0(ptr)


define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x3(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v2f64_post_imm_ld1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.2d { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_ld1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.2d { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #48
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i32 6
  store ptr %tmp, ptr %ptr
  ret { <2 x double>, <2 x double>, <2 x double> } %ld1x3
}

define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v2f64_post_reg_ld1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld1.2d { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_ld1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.2d { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x double>, <2 x double>, <2 x double> } %ld1x3
}

declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0(ptr)


define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x3(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v1f64_post_imm_ld1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_ld1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #24
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i32 3
  store ptr %tmp, ptr %ptr
  ret { <1 x double>, <1 x double>, <1 x double> } %ld1x3
}

define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v1f64_post_reg_ld1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_ld1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <1 x double>, <1 x double>, <1 x double> } %ld1x3
}

declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0(ptr)


define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x4(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v16i8_post_imm_ld1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.16b { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_ld1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.16b { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #64
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 64
  store ptr %tmp, ptr %ptr
  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld1x4
}

define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v16i8_post_reg_ld1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.16b { v0, v1, v2, v3 }, [x0], x2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_ld1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.16b { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld1x4
}

declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0(ptr)


define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x4(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v8i8_post_imm_ld1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.8b { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_ld1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.8b { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 32
  store ptr %tmp, ptr %ptr
  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld1x4
}

define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v8i8_post_reg_ld1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.8b { v0, v1, v2, v3 }, [x0], x2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_ld1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.8b { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld1x4
}

declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0(ptr)


define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x4(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v8i16_post_imm_ld1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.8h { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_ld1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.8h { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #64
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 32
  store ptr %tmp, ptr %ptr
  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld1x4
}

define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v8i16_post_reg_ld1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ld1.8h { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_ld1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.8h { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld1x4
}

declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0(ptr)


define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x4(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v4i16_post_imm_ld1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.4h { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_ld1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.4h { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 16
  store ptr %tmp, ptr %ptr
  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld1x4
}

define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v4i16_post_reg_ld1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ld1.4h { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_ld1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.4h { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld1x4
}

declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0(ptr)


define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x4(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v4i32_post_imm_ld1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.4s { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_ld1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.4s { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #64
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 16
  store ptr %tmp, ptr %ptr
  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld1x4
}

define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v4i32_post_reg_ld1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld1.4s { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_ld1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.4s { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld1x4
}

declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0(ptr)


define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x4(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v2i32_post_imm_ld1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.2s { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_ld1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.2s { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 8
  store ptr %tmp, ptr %ptr
  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld1x4
}

define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v2i32_post_reg_ld1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld1.2s { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_ld1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.2s { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld1x4
}

declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0(ptr)


define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x4(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v2i64_post_imm_ld1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.2d { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_ld1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.2d { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #64
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i32 8
  store ptr %tmp, ptr %ptr
  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld1x4
}

define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v2i64_post_reg_ld1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld1.2d { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_ld1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.2d { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld1x4
}

declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0(ptr)


define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x4(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v1i64_post_imm_ld1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_ld1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld1x4
}

define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v1i64_post_reg_ld1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_ld1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld1x4
}

declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0(ptr)


define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x4(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v4f32_post_imm_ld1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.4s { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_ld1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.4s { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #64
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i32 16
  store ptr %tmp, ptr %ptr
  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld1x4
}

define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v4f32_post_reg_ld1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld1.4s { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_ld1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.4s { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld1x4
}

declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0(ptr)


define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x4(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v2f32_post_imm_ld1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.2s { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_ld1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.2s { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i32 8
  store ptr %tmp, ptr %ptr
  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld1x4
}

define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v2f32_post_reg_ld1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld1.2s { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_ld1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.2s { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld1x4
}

declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0(ptr)


define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x4(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v2f64_post_imm_ld1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.2d { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_ld1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.2d { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #64
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i32 8
  store ptr %tmp, ptr %ptr
  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld1x4
}

define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v2f64_post_reg_ld1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld1.2d { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_ld1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.2d { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld1x4
}

declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0(ptr)


define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x4(ptr %A, ptr %ptr) {
; CHECK-SD-LABEL: test_v1f64_post_imm_ld1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_ld1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld1x4
}

define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v1f64_post_reg_ld1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_ld1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld1x4
}

declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0(ptr)


define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_imm_ld2r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld2r.16b { v0, v1 }, [x0], #2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_ld2r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2r.16b { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 2
  store ptr %tmp, ptr %ptr
  ret { <16 x i8>, <16 x i8> } %ld2
}

define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_reg_ld2r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld2r.16b { v0, v1 }, [x0], x2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_ld2r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2r.16b { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <16 x i8>, <16 x i8> } %ld2
}

declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0(ptr) nounwind readonly


define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_imm_ld2r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld2r.8b { v0, v1 }, [x0], #2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_ld2r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2r.8b { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 2
  store ptr %tmp, ptr %ptr
  ret { <8 x i8>, <8 x i8> } %ld2
}

define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_reg_ld2r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld2r.8b { v0, v1 }, [x0], x2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_ld2r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2r.8b { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <8 x i8>, <8 x i8> } %ld2
}

declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0(ptr) nounwind readonly


define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_imm_ld2r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld2r.8h { v0, v1 }, [x0], #4
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_ld2r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2r.8h { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #4
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 2
  store ptr %tmp, ptr %ptr
  ret { <8 x i16>, <8 x i16> } %ld2
}

define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_reg_ld2r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ld2r.8h { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_ld2r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2r.8h { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <8 x i16>, <8 x i16> } %ld2
}

declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0(ptr) nounwind readonly


define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_imm_ld2r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld2r.4h { v0, v1 }, [x0], #4
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_ld2r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2r.4h { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #4
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 2
  store ptr %tmp, ptr %ptr
  ret { <4 x i16>, <4 x i16> } %ld2
}

define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_reg_ld2r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ld2r.4h { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_ld2r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2r.4h { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x i16>, <4 x i16> } %ld2
}

declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0(ptr) nounwind readonly


define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_imm_ld2r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld2r.4s { v0, v1 }, [x0], #8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_ld2r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2r.4s { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #8
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 2
  store ptr %tmp, ptr %ptr
  ret { <4 x i32>, <4 x i32> } %ld2
}

define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_reg_ld2r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld2r.4s { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_ld2r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2r.4s { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x i32>, <4 x i32> } %ld2
}

declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0(ptr) nounwind readonly

define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_imm_ld2r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld2r.2s { v0, v1 }, [x0], #8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_ld2r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2r.2s { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #8
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 2
  store ptr %tmp, ptr %ptr
  ret { <2 x i32>, <2 x i32> } %ld2
}

define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_reg_ld2r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld2r.2s { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_ld2r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2r.2s { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x i32>, <2 x i32> } %ld2
}

declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0(ptr) nounwind readonly


define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_imm_ld2r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld2r.2d { v0, v1 }, [x0], #16
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_ld2r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2r.2d { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #16
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i32 2
  store ptr %tmp, ptr %ptr
  ret { <2 x i64>, <2 x i64> } %ld2
}

define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_reg_ld2r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld2r.2d { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_ld2r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2r.2d { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x i64>, <2 x i64> } %ld2
}

declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0(ptr) nounwind readonly

define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_imm_ld2r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld2r.1d { v0, v1 }, [x0], #16
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_ld2r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2r.1d { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #16
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i32 2
  store ptr %tmp, ptr %ptr
  ret { <1 x i64>, <1 x i64> } %ld2
}

define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_reg_ld2r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld2r.1d { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_ld2r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2r.1d { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <1 x i64>, <1 x i64> } %ld2
}

declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0(ptr) nounwind readonly


define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_imm_ld2r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld2r.4s { v0, v1 }, [x0], #8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_ld2r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2r.4s { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #8
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i32 2
  store ptr %tmp, ptr %ptr
  ret { <4 x float>, <4 x float> } %ld2
}

define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_reg_ld2r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld2r.4s { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_ld2r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2r.4s { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x float>, <4 x float> } %ld2
}

declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0(ptr) nounwind readonly

define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_imm_ld2r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld2r.2s { v0, v1 }, [x0], #8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_ld2r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2r.2s { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #8
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i32 2
  store ptr %tmp, ptr %ptr
  ret { <2 x float>, <2 x float> } %ld2
}

define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_reg_ld2r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld2r.2s { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_ld2r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2r.2s { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x float>, <2 x float> } %ld2
}

declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0(ptr) nounwind readonly


define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_imm_ld2r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld2r.2d { v0, v1 }, [x0], #16
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_ld2r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2r.2d { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #16
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i32 2
  store ptr %tmp, ptr %ptr
  ret { <2 x double>, <2 x double> } %ld2
}

define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_reg_ld2r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld2r.2d { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_ld2r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2r.2d { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x double>, <2 x double> } %ld2
}

declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0(ptr) nounwind readonly

define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_imm_ld2r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld2r.1d { v0, v1 }, [x0], #16
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_ld2r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2r.1d { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #16
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i32 2
  store ptr %tmp, ptr %ptr
  ret { <1 x double>, <1 x double> } %ld2
}

define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_reg_ld2r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld2r.1d { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_ld2r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld2r.1d { v0, v1 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <1 x double>, <1 x double> } %ld2
}

declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0(ptr) nounwind readonly


define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_imm_ld3r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld3r.16b { v0, v1, v2 }, [x0], #3
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_ld3r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3r.16b { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 3
  store ptr %tmp, ptr %ptr
  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
}

define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_reg_ld3r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld3r.16b { v0, v1, v2 }, [x0], x2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_ld3r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3r.16b { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
}

declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0(ptr) nounwind readonly


define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_imm_ld3r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld3r.8b { v0, v1, v2 }, [x0], #3
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_ld3r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3r.8b { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 3
  store ptr %tmp, ptr %ptr
  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
}

define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_reg_ld3r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld3r.8b { v0, v1, v2 }, [x0], x2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_ld3r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3r.8b { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
}

declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0(ptr) nounwind readonly


define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_imm_ld3r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld3r.8h { v0, v1, v2 }, [x0], #6
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_ld3r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3r.8h { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #6
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 3
  store ptr %tmp, ptr %ptr
  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
}

define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_reg_ld3r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ld3r.8h { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_ld3r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3r.8h { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
}

declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0(ptr) nounwind readonly


define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_imm_ld3r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld3r.4h { v0, v1, v2 }, [x0], #6
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_ld3r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3r.4h { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #6
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 3
  store ptr %tmp, ptr %ptr
  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
}

define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_reg_ld3r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ld3r.4h { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_ld3r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3r.4h { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
}

declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0(ptr) nounwind readonly


define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_imm_ld3r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld3r.4s { v0, v1, v2 }, [x0], #12
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_ld3r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3r.4s { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #12
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 3
  store ptr %tmp, ptr %ptr
  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
}

define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_reg_ld3r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld3r.4s { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_ld3r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3r.4s { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
}

declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0(ptr) nounwind readonly

define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_imm_ld3r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld3r.2s { v0, v1, v2 }, [x0], #12
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_ld3r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3r.2s { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #12
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 3
  store ptr %tmp, ptr %ptr
  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
}

define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_reg_ld3r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld3r.2s { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_ld3r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3r.2s { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
}

declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0(ptr) nounwind readonly


define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_imm_ld3r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld3r.2d { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_ld3r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3r.2d { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #24
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i32 3
  store ptr %tmp, ptr %ptr
  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
}

define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_reg_ld3r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld3r.2d { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_ld3r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3r.2d { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
}

declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0(ptr) nounwind readonly

define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_imm_ld3r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld3r.1d { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_ld3r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3r.1d { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #24
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i32 3
  store ptr %tmp, ptr %ptr
  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
}

define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_reg_ld3r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld3r.1d { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_ld3r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3r.1d { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
}

declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0(ptr) nounwind readonly


define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_imm_ld3r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld3r.4s { v0, v1, v2 }, [x0], #12
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_ld3r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3r.4s { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #12
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i32 3
  store ptr %tmp, ptr %ptr
  ret { <4 x float>, <4 x float>, <4 x float> } %ld3
}

define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_reg_ld3r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld3r.4s { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_ld3r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3r.4s { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x float>, <4 x float>, <4 x float> } %ld3
}

declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0(ptr) nounwind readonly

define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_imm_ld3r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld3r.2s { v0, v1, v2 }, [x0], #12
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_ld3r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3r.2s { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #12
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i32 3
  store ptr %tmp, ptr %ptr
  ret { <2 x float>, <2 x float>, <2 x float> } %ld3
}

define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_reg_ld3r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld3r.2s { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_ld3r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3r.2s { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x float>, <2 x float>, <2 x float> } %ld3
}

declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0(ptr) nounwind readonly


define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_imm_ld3r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld3r.2d { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_ld3r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3r.2d { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #24
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i32 3
  store ptr %tmp, ptr %ptr
  ret { <2 x double>, <2 x double>, <2 x double> } %ld3
}

define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_reg_ld3r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld3r.2d { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_ld3r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3r.2d { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x double>, <2 x double>, <2 x double> } %ld3
}

declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0(ptr) nounwind readonly

define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_imm_ld3r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld3r.1d { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_ld3r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3r.1d { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #24
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i32 3
  store ptr %tmp, ptr %ptr
  ret { <1 x double>, <1 x double>, <1 x double> } %ld3
}

define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_reg_ld3r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld3r.1d { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_ld3r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld3r.1d { v0, v1, v2 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <1 x double>, <1 x double>, <1 x double> } %ld3
}

declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0(ptr) nounwind readonly


define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_imm_ld4r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld4r.16b { v0, v1, v2, v3 }, [x0], #4
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_ld4r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4r.16b { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #4
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
}

define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_reg_ld4r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld4r.16b { v0, v1, v2, v3 }, [x0], x2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_ld4r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4r.16b { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
}

declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0(ptr) nounwind readonly


define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_imm_ld4r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld4r.8b { v0, v1, v2, v3 }, [x0], #4
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_ld4r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4r.8b { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #4
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
}

define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_reg_ld4r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld4r.8b { v0, v1, v2, v3 }, [x0], x2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_ld4r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4r.8b { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0(ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
}

declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0(ptr) nounwind readonly


define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_imm_ld4r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld4r.8h { v0, v1, v2, v3 }, [x0], #8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_ld4r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4r.8h { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #8
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
}

define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_reg_ld4r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ld4r.8h { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_ld4r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4r.8h { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
}

declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0(ptr) nounwind readonly


define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_imm_ld4r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld4r.4h { v0, v1, v2, v3 }, [x0], #8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_ld4r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4r.4h { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #8
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
}

define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_reg_ld4r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ld4r.4h { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_ld4r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4r.4h { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0(ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
}

declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0(ptr) nounwind readonly


define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_imm_ld4r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld4r.4s { v0, v1, v2, v3 }, [x0], #16
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_ld4r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4r.4s { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #16
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
}

define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_reg_ld4r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld4r.4s { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_ld4r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4r.4s { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
}

declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0(ptr) nounwind readonly

define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_imm_ld4r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld4r.2s { v0, v1, v2, v3 }, [x0], #16
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_ld4r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4r.2s { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #16
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
}

define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_reg_ld4r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld4r.2s { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_ld4r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4r.2s { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0(ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
}

declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0(ptr) nounwind readonly


define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_imm_ld4r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld4r.2d { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_ld4r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4r.2d { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
}

define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_reg_ld4r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld4r.2d { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_ld4r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4r.2d { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
}

declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0(ptr) nounwind readonly

define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_imm_ld4r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld4r.1d { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_ld4r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4r.1d { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
}

define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_reg_ld4r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld4r.1d { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_ld4r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4r.1d { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0(ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
}

declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0(ptr) nounwind readonly


define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_imm_ld4r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld4r.4s { v0, v1, v2, v3 }, [x0], #16
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_ld4r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4r.4s { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #16
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
}

define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_reg_ld4r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld4r.4s { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_ld4r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4r.4s { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
}

declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0(ptr) nounwind readonly

define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_imm_ld4r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld4r.2s { v0, v1, v2, v3 }, [x0], #16
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_ld4r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4r.2s { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #16
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
}

define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_reg_ld4r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld4r.2s { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_ld4r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4r.2s { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0(ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
}

declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0(ptr) nounwind readonly


define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_imm_ld4r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld4r.2d { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_ld4r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4r.2d { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
}

define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_reg_ld4r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld4r.2d { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_ld4r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4r.2d { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
}

declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0(ptr) nounwind readonly

define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4r(ptr %A, ptr %ptr) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_imm_ld4r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld4r.1d { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_ld4r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4r.1d { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
}

define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_reg_ld4r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld4r.1d { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_ld4r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld4r.1d { v0, v1, v2, v3 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0(ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
}

declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0(ptr) nounwind readonly


define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_imm_ld2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ld2.b { v0, v1 }[0], [x0], #2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_ld2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x8, x0, #2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ld2.b { v0, v1 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 2
  store ptr %tmp, ptr %ptr
  ret { <16 x i8>, <16 x i8> } %ld2
}

define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_reg_ld2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ld2.b { v0, v1 }[0], [x0], x2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_ld2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x8, x0, x2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ld2.b { v0, v1 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <16 x i8>, <16 x i8> } %ld2
}

declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8>, <16 x i8>, i64, ptr) nounwind readonly


define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_imm_ld2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ld2.b { v0, v1 }[0], [x0], #2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_ld2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x8, x0, #2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ld2.b { v0, v1 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 2
  store ptr %tmp, ptr %ptr
  ret { <8 x i8>, <8 x i8> } %ld2
}

define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_reg_ld2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ld2.b { v0, v1 }[0], [x0], x2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_ld2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x8, x0, x2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ld2.b { v0, v1 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <8 x i8>, <8 x i8> } %ld2
}

declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8>, <8 x i8>, i64, ptr) nounwind readonly


define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_imm_ld2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ld2.h { v0, v1 }[0], [x0], #4
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_ld2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x8, x0, #4
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ld2.h { v0, v1 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 2
  store ptr %tmp, ptr %ptr
  ret { <8 x i16>, <8 x i16> } %ld2
}

define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_reg_ld2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ld2.h { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_ld2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ld2.h { v0, v1 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <8 x i16>, <8 x i16> } %ld2
}

declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16>, <8 x i16>, i64, ptr) nounwind readonly


define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_imm_ld2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ld2.h { v0, v1 }[0], [x0], #4
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_ld2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x8, x0, #4
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ld2.h { v0, v1 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 2
  store ptr %tmp, ptr %ptr
  ret { <4 x i16>, <4 x i16> } %ld2
}

define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_reg_ld2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ld2.h { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_ld2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ld2.h { v0, v1 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x i16>, <4 x i16> } %ld2
}

declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16>, <4 x i16>, i64, ptr) nounwind readonly


define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_imm_ld2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], #8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_ld2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x8, x0, #8
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 2
  store ptr %tmp, ptr %ptr
  ret { <4 x i32>, <4 x i32> } %ld2
}

define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_reg_ld2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_ld2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x i32>, <4 x i32> } %ld2
}

declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32>, <4 x i32>, i64, ptr) nounwind readonly


define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_imm_ld2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], #8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_ld2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x8, x0, #8
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 2
  store ptr %tmp, ptr %ptr
  ret { <2 x i32>, <2 x i32> } %ld2
}

define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_reg_ld2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_ld2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x i32>, <2 x i32> } %ld2
}

declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32>, <2 x i32>, i64, ptr) nounwind readonly


define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_imm_ld2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], #16
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_ld2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x8, x0, #16
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A)
  %tmp = getelementptr i64, ptr %A, i32 2
  store ptr %tmp, ptr %ptr
  ret { <2 x i64>, <2 x i64> } %ld2
}

define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_reg_ld2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_ld2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x i64>, <2 x i64> } %ld2
}

declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64>, <2 x i64>, i64, ptr) nounwind readonly


define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_imm_ld2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], #16
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_ld2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x8, x0, #16
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A)
  %tmp = getelementptr i64, ptr %A, i32 2
  store ptr %tmp, ptr %ptr
  ret { <1 x i64>, <1 x i64> } %ld2
}

define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_reg_ld2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_ld2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <1 x i64>, <1 x i64> } %ld2
}

declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64>, <1 x i64>, i64, ptr) nounwind readonly


define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_imm_ld2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], #8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_ld2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x8, x0, #8
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A)
  %tmp = getelementptr float, ptr %A, i32 2
  store ptr %tmp, ptr %ptr
  ret { <4 x float>, <4 x float> } %ld2
}

define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x float> %B, <4 x float> %C) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_reg_ld2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_ld2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x float>, <4 x float> } %ld2
}

declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0(<4 x float>, <4 x float>, i64, ptr) nounwind readonly


define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_imm_ld2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], #8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_ld2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x8, x0, #8
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A)
  %tmp = getelementptr float, ptr %A, i32 2
  store ptr %tmp, ptr %ptr
  ret { <2 x float>, <2 x float> } %ld2
}

define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x float> %B, <2 x float> %C) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_reg_ld2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_ld2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x float>, <2 x float> } %ld2
}

declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0(<2 x float>, <2 x float>, i64, ptr) nounwind readonly


define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_imm_ld2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], #16
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_ld2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x8, x0, #16
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A)
  %tmp = getelementptr double, ptr %A, i32 2
  store ptr %tmp, ptr %ptr
  ret { <2 x double>, <2 x double> } %ld2
}

define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x double> %B, <2 x double> %C) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_reg_ld2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_ld2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x double>, <2 x double> } %ld2
}

declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0(<2 x double>, <2 x double>, i64, ptr) nounwind readonly


define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_imm_ld2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], #16
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_ld2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x8, x0, #16
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A)
  %tmp = getelementptr double, ptr %A, i32 2
  store ptr %tmp, ptr %ptr
  ret { <1 x double>, <1 x double> } %ld2
}

define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <1 x double> %B, <1 x double> %C) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_reg_ld2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_ld2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <1 x double>, <1 x double> } %ld2
}

declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0(<1 x double>, <1 x double>, i64, ptr) nounwind readonly


define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_imm_ld3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ld3.b { v0, v1, v2 }[0], [x0], #3
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_ld3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x8, x0, #3
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ld3.b { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 3
  store ptr %tmp, ptr %ptr
  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
}

define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_reg_ld3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ld3.b { v0, v1, v2 }[0], [x0], x2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_ld3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x8, x0, x2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ld3.b { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
}

declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, i64, ptr) nounwind readonly


define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_imm_ld3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ld3.b { v0, v1, v2 }[0], [x0], #3
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_ld3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x8, x0, #3
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ld3.b { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 3
  store ptr %tmp, ptr %ptr
  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
}

define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_reg_ld3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ld3.b { v0, v1, v2 }[0], [x0], x2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_ld3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x8, x0, x2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ld3.b { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
}

declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, i64, ptr) nounwind readonly


define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_imm_ld3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ld3.h { v0, v1, v2 }[0], [x0], #6
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_ld3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x8, x0, #6
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ld3.h { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 3
  store ptr %tmp, ptr %ptr
  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
}

define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_reg_ld3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ld3.h { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_ld3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ld3.h { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
}

declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, i64, ptr) nounwind readonly


define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_imm_ld3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ld3.h { v0, v1, v2 }[0], [x0], #6
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_ld3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x8, x0, #6
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ld3.h { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 3
  store ptr %tmp, ptr %ptr
  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
}

define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_reg_ld3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ld3.h { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_ld3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ld3.h { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
}

declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, i64, ptr) nounwind readonly


define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_imm_ld3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], #12
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_ld3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x8, x0, #12
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 3
  store ptr %tmp, ptr %ptr
  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
}

define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_reg_ld3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_ld3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
}

declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, i64, ptr) nounwind readonly


define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_imm_ld3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], #12
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_ld3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x8, x0, #12
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 3
  store ptr %tmp, ptr %ptr
  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
}

define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_reg_ld3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_ld3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
}

declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, i64, ptr) nounwind readonly


define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_imm_ld3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], #24
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_ld3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x8, x0, #24
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A)
  %tmp = getelementptr i64, ptr %A, i32 3
  store ptr %tmp, ptr %ptr
  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
}

define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_reg_ld3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_ld3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
}

declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, i64, ptr) nounwind readonly


define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_imm_ld3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], #24
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_ld3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x8, x0, #24
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A)
  %tmp = getelementptr i64, ptr %A, i32 3
  store ptr %tmp, ptr %ptr
  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
}

define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_reg_ld3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_ld3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
}

declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, i64, ptr) nounwind readonly


define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_imm_ld3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], #12
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_ld3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x8, x0, #12
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A)
  %tmp = getelementptr float, ptr %A, i32 3
  store ptr %tmp, ptr %ptr
  ret { <4 x float>, <4 x float>, <4 x float> } %ld3
}

define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_reg_ld3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_ld3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x float>, <4 x float>, <4 x float> } %ld3
}

declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0(<4 x float>, <4 x float>, <4 x float>, i64, ptr) nounwind readonly


define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_imm_ld3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], #12
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_ld3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x8, x0, #12
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A)
  %tmp = getelementptr float, ptr %A, i32 3
  store ptr %tmp, ptr %ptr
  ret { <2 x float>, <2 x float>, <2 x float> } %ld3
}

define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_reg_ld3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_ld3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x float>, <2 x float>, <2 x float> } %ld3
}

declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0(<2 x float>, <2 x float>, <2 x float>, i64, ptr) nounwind readonly


define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_imm_ld3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], #24
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_ld3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x8, x0, #24
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A)
  %tmp = getelementptr double, ptr %A, i32 3
  store ptr %tmp, ptr %ptr
  ret { <2 x double>, <2 x double>, <2 x double> } %ld3
}

define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_reg_ld3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_ld3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x double>, <2 x double>, <2 x double> } %ld3
}

declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0(<2 x double>, <2 x double>, <2 x double>, i64, ptr) nounwind readonly


define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_imm_ld3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], #24
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_ld3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x8, x0, #24
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A)
  %tmp = getelementptr double, ptr %A, i32 3
  store ptr %tmp, ptr %ptr
  ret { <1 x double>, <1 x double>, <1 x double> } %ld3
}

define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_reg_ld3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_ld3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <1 x double>, <1 x double>, <1 x double> } %ld3
}

declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0(<1 x double>, <1 x double>, <1 x double>, i64, ptr) nounwind readonly


define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_imm_ld4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0], #4
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_ld4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    add x8, x0, #4
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
}

define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_reg_ld4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0], x2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_ld4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    add x8, x0, x2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
}

declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, ptr) nounwind readonly


define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_imm_ld4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0], #4
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_ld4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    add x8, x0, #4
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
}

define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_reg_ld4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0], x2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_ld4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    add x8, x0, x2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
}

declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64, ptr) nounwind readonly


define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_imm_ld4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0], #8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_ld4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    add x8, x0, #8
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
}

define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_reg_ld4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_ld4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
}

declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, ptr) nounwind readonly


define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_imm_ld4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0], #8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_ld4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    add x8, x0, #8
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
}

define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_reg_ld4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_ld4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
}

declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i64, ptr) nounwind readonly


define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_imm_ld4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], #16
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_ld4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    add x8, x0, #16
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
}

define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_reg_ld4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_ld4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
}

declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, ptr) nounwind readonly


define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_imm_ld4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], #16
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_ld4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    add x8, x0, #16
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
}

define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_reg_ld4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_ld4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
}

declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i64, ptr) nounwind readonly


define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_imm_ld4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_ld4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A)
  %tmp = getelementptr i64, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
}

define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_reg_ld4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_ld4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
}

declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, ptr) nounwind readonly


define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_imm_ld4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_ld4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A)
  %tmp = getelementptr i64, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
}

define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_reg_ld4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_ld4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
}

declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, ptr) nounwind readonly


define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_imm_ld4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], #16
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_ld4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    add x8, x0, #16
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A)
  %tmp = getelementptr float, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
}

define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_reg_ld4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_ld4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
}

declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0(<4 x float>, <4 x float>, <4 x float>, <4 x float>, i64, ptr) nounwind readonly


define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_imm_ld4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], #16
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_ld4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    add x8, x0, #16
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A)
  %tmp = getelementptr float, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
}

define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_reg_ld4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_ld4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
}

declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0(<2 x float>, <2 x float>, <2 x float>, <2 x float>, i64, ptr) nounwind readonly


define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_imm_ld4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_ld4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A)
  %tmp = getelementptr double, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
}

define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_reg_ld4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_ld4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
}

declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0(<2 x double>, <2 x double>, <2 x double>, <2 x double>, i64, ptr) nounwind readonly


define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_imm_ld4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], #32
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_ld4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    add x8, x0, #32
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A)
  %tmp = getelementptr double, ptr %A, i32 4
  store ptr %tmp, ptr %ptr
  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
}

define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_reg_ld4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_ld4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  store ptr %tmp, ptr %ptr
  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
}

declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0(<1 x double>, <1 x double>, <1 x double>, <1 x double>, i64, ptr) nounwind readonly


define ptr @test_v16i8_post_imm_st2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_imm_st2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.16b { v0, v1 }, [x0], #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_st2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x0, x0, #32
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.16b { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 32
  ret ptr %tmp
}

define ptr @test_v16i8_post_reg_st2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_reg_st2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.16b { v0, v1 }, [x0], x2
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_st2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x0, x0, x2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.16b { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8>, <16 x i8>, ptr)


define ptr @test_v8i8_post_imm_st2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_imm_st2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    st2.8b { v0, v1 }, [x0], #16
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_st2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    add x0, x0, #16
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    st2.8b { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 16
  ret ptr %tmp
}

define ptr @test_v8i8_post_reg_st2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_reg_st2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    st2.8b { v0, v1 }, [x0], x2
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_st2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    add x0, x0, x2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    st2.8b { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8>, <8 x i8>, ptr)


define ptr @test_v8i16_post_imm_st2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_imm_st2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.8h { v0, v1 }, [x0], #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_st2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x0, x0, #32
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.8h { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 16
  ret ptr %tmp
}

define ptr @test_v8i16_post_reg_st2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_reg_st2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.8h { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_st2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.8h { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16>, <8 x i16>, ptr)


define ptr @test_v4i16_post_imm_st2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_imm_st2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    st2.4h { v0, v1 }, [x0], #16
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_st2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    add x0, x0, #16
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    st2.4h { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 8
  ret ptr %tmp
}

define ptr @test_v4i16_post_reg_st2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_reg_st2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    st2.4h { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_st2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    st2.4h { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16>, <4 x i16>, ptr)


define ptr @test_v4i32_post_imm_st2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_imm_st2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.4s { v0, v1 }, [x0], #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_st2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x0, x0, #32
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.4s { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 8
  ret ptr %tmp
}

define ptr @test_v4i32_post_reg_st2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_reg_st2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.4s { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_st2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.4s { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32>, <4 x i32>, ptr)


define ptr @test_v2i32_post_imm_st2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_imm_st2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    st2.2s { v0, v1 }, [x0], #16
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_st2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    add x0, x0, #16
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    st2.2s { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 4
  ret ptr %tmp
}

define ptr @test_v2i32_post_reg_st2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_reg_st2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    st2.2s { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_st2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    st2.2s { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32>, <2 x i32>, ptr)


define ptr @test_v2i64_post_imm_st2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_imm_st2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.2d { v0, v1 }, [x0], #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_st2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x0, x0, #32
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.2d { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 4
  ret ptr %tmp
}

define ptr @test_v2i64_post_reg_st2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_reg_st2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.2d { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_st2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.2d { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64>, <2 x i64>, ptr)


define ptr @test_v1i64_post_imm_st2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_imm_st2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], #16
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_st2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    add x0, x0, #16
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 2
  ret ptr %tmp
}

define ptr @test_v1i64_post_reg_st2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_reg_st2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_st2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64>, <1 x i64>, ptr)


define ptr @test_v4f32_post_imm_st2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_imm_st2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.4s { v0, v1 }, [x0], #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_st2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x0, x0, #32
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.4s { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A)
  %tmp = getelementptr float, ptr %A, i32 8
  ret ptr %tmp
}

define ptr @test_v4f32_post_reg_st2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_reg_st2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.4s { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_st2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.4s { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float>, <4 x float>, ptr)


define ptr @test_v2f32_post_imm_st2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_imm_st2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    st2.2s { v0, v1 }, [x0], #16
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_st2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    add x0, x0, #16
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    st2.2s { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A)
  %tmp = getelementptr float, ptr %A, i32 4
  ret ptr %tmp
}

define ptr @test_v2f32_post_reg_st2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_reg_st2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    st2.2s { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_st2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    st2.2s { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float>, <2 x float>, ptr)


define ptr @test_v2f64_post_imm_st2(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_imm_st2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.2d { v0, v1 }, [x0], #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_st2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x0, x0, #32
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.2d { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 4
  ret ptr %tmp
}

define ptr @test_v2f64_post_reg_st2(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_reg_st2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.2d { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_st2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.2d { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double>, <2 x double>, ptr)


define ptr @test_v1f64_post_imm_st2(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_imm_st2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], #16
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_st2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    add x0, x0, #16
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 2
  ret ptr %tmp
}

define ptr @test_v1f64_post_reg_st2(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_reg_st2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_st2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double>, <1 x double>, ptr)


define ptr @test_v16i8_post_imm_st3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_imm_st3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.16b { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_st3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #48
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.16b { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 48
  ret ptr %tmp
}

define ptr @test_v16i8_post_reg_st3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_reg_st3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.16b { v0, v1, v2 }, [x0], x2
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_st3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.16b { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, ptr)


define ptr @test_v8i8_post_imm_st3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_imm_st3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    st3.8b { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_st3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #24
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    st3.8b { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 24
  ret ptr %tmp
}

define ptr @test_v8i8_post_reg_st3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_reg_st3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    st3.8b { v0, v1, v2 }, [x0], x2
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_st3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    st3.8b { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, ptr)


define ptr @test_v8i16_post_imm_st3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_imm_st3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.8h { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_st3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #48
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.8h { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 24
  ret ptr %tmp
}

define ptr @test_v8i16_post_reg_st3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_reg_st3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.8h { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_st3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.8h { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, ptr)


define ptr @test_v4i16_post_imm_st3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_imm_st3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    st3.4h { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_st3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #24
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    st3.4h { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 12
  ret ptr %tmp
}

define ptr @test_v4i16_post_reg_st3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_reg_st3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    st3.4h { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_st3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    st3.4h { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, ptr)


define ptr @test_v4i32_post_imm_st3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_imm_st3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.4s { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_st3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #48
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.4s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 12
  ret ptr %tmp
}

define ptr @test_v4i32_post_reg_st3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_reg_st3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.4s { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_st3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.4s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, ptr)


define ptr @test_v2i32_post_imm_st3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_imm_st3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    st3.2s { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_st3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #24
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    st3.2s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 6
  ret ptr %tmp
}

define ptr @test_v2i32_post_reg_st3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_reg_st3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    st3.2s { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_st3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    st3.2s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, ptr)


define ptr @test_v2i64_post_imm_st3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_imm_st3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.2d { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_st3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #48
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.2d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 6
  ret ptr %tmp
}

define ptr @test_v2i64_post_reg_st3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_reg_st3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.2d { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_st3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.2d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, ptr)


define ptr @test_v1i64_post_imm_st3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_imm_st3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_st3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #24
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 3
  ret ptr %tmp
}

define ptr @test_v1i64_post_reg_st3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_reg_st3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_st3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, ptr)


define ptr @test_v4f32_post_imm_st3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_imm_st3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.4s { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_st3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #48
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.4s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A)
  %tmp = getelementptr float, ptr %A, i32 12
  ret ptr %tmp
}

define ptr @test_v4f32_post_reg_st3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_reg_st3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.4s { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_st3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.4s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float>, <4 x float>, <4 x float>, ptr)


define ptr @test_v2f32_post_imm_st3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_imm_st3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    st3.2s { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_st3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #24
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    st3.2s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A)
  %tmp = getelementptr float, ptr %A, i32 6
  ret ptr %tmp
}

define ptr @test_v2f32_post_reg_st3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_reg_st3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    st3.2s { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_st3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    st3.2s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float>, <2 x float>, <2 x float>, ptr)


define ptr @test_v2f64_post_imm_st3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_imm_st3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.2d { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_st3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #48
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.2d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 6
  ret ptr %tmp
}

define ptr @test_v2f64_post_reg_st3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_reg_st3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.2d { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_st3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.2d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double>, <2 x double>, <2 x double>, ptr)


define ptr @test_v1f64_post_imm_st3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_imm_st3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_st3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #24
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 3
  ret ptr %tmp
}

define ptr @test_v1f64_post_reg_st3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_reg_st3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_st3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double>, <1 x double>, <1 x double>, ptr)


define ptr @test_v16i8_post_imm_st4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_imm_st4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.16b { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_st4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #64
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.16b { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 64
  ret ptr %tmp
}

define ptr @test_v16i8_post_reg_st4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_reg_st4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.16b { v0, v1, v2, v3 }, [x0], x2
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_st4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.16b { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, ptr)


define ptr @test_v8i8_post_imm_st4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_imm_st4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    st4.8b { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_st4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #32
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    st4.8b { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 32
  ret ptr %tmp
}

define ptr @test_v8i8_post_reg_st4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_reg_st4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    st4.8b { v0, v1, v2, v3 }, [x0], x2
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_st4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    st4.8b { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, ptr)


define ptr @test_v8i16_post_imm_st4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_imm_st4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.8h { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_st4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #64
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.8h { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 32
  ret ptr %tmp
}

define ptr @test_v8i16_post_reg_st4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_reg_st4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.8h { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_st4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.8h { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, ptr)


define ptr @test_v4i16_post_imm_st4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_imm_st4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    st4.4h { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_st4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #32
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    st4.4h { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 16
  ret ptr %tmp
}

define ptr @test_v4i16_post_reg_st4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_reg_st4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    st4.4h { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_st4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    st4.4h { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>,<4 x i16>,  ptr)


define ptr @test_v4i32_post_imm_st4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_imm_st4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.4s { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_st4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #64
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.4s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 16
  ret ptr %tmp
}

define ptr @test_v4i32_post_reg_st4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_reg_st4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.4s { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_st4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.4s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>,<4 x i32>,  ptr)


define ptr @test_v2i32_post_imm_st4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_imm_st4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    st4.2s { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_st4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #32
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    st4.2s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 8
  ret ptr %tmp
}

define ptr @test_v2i32_post_reg_st4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_reg_st4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    st4.2s { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_st4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    st4.2s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, ptr)


define ptr @test_v2i64_post_imm_st4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_imm_st4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.2d { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_st4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #64
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.2d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 8
  ret ptr %tmp
}

define ptr @test_v2i64_post_reg_st4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_reg_st4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.2d { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_st4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.2d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>,<2 x i64>,  ptr)


define ptr @test_v1i64_post_imm_st4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_imm_st4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_st4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #32
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 4
  ret ptr %tmp
}

define ptr @test_v1i64_post_reg_st4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_reg_st4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_st4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>,<1 x i64>,  ptr)


define ptr @test_v4f32_post_imm_st4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_imm_st4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.4s { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_st4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #64
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.4s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A)
  %tmp = getelementptr float, ptr %A, i32 16
  ret ptr %tmp
}

define ptr @test_v4f32_post_reg_st4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_reg_st4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.4s { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_st4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.4s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float>, <4 x float>, <4 x float>, <4 x float>, ptr)


define ptr @test_v2f32_post_imm_st4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_imm_st4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    st4.2s { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_st4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #32
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    st4.2s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A)
  %tmp = getelementptr float, ptr %A, i32 8
  ret ptr %tmp
}

define ptr @test_v2f32_post_reg_st4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_reg_st4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    st4.2s { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_st4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    st4.2s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float>, <2 x float>, <2 x float>, <2 x float>, ptr)


define ptr @test_v2f64_post_imm_st4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_imm_st4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.2d { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_st4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #64
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.2d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 8
  ret ptr %tmp
}

define ptr @test_v2f64_post_reg_st4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_reg_st4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.2d { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_st4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.2d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double>, <2 x double>, <2 x double>,<2 x double>,  ptr)


define ptr @test_v1f64_post_imm_st4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_imm_st4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_st4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #32
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 4
  ret ptr %tmp
}

define ptr @test_v1f64_post_reg_st4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_reg_st4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_st4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double>, <1 x double>, <1 x double>, <1 x double>, ptr)


define ptr @test_v16i8_post_imm_st1x2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_imm_st1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st1.16b { v0, v1 }, [x0], #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_st1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x0, x0, #32
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st1.16b { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 32
  ret ptr %tmp
}

define ptr @test_v16i8_post_reg_st1x2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_reg_st1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st1.16b { v0, v1 }, [x0], x2
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_st1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x0, x0, x2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st1.16b { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8>, <16 x i8>, ptr)


define ptr @test_v8i8_post_imm_st1x2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_imm_st1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    st1.8b { v0, v1 }, [x0], #16
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_st1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    add x0, x0, #16
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    st1.8b { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 16
  ret ptr %tmp
}

define ptr @test_v8i8_post_reg_st1x2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_reg_st1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    st1.8b { v0, v1 }, [x0], x2
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_st1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    add x0, x0, x2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    st1.8b { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8>, <8 x i8>, ptr)


define ptr @test_v8i16_post_imm_st1x2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_imm_st1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st1.8h { v0, v1 }, [x0], #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_st1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x0, x0, #32
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st1.8h { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 16
  ret ptr %tmp
}

define ptr @test_v8i16_post_reg_st1x2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_reg_st1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st1.8h { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_st1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st1.8h { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16>, <8 x i16>, ptr)


define ptr @test_v4i16_post_imm_st1x2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_imm_st1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    st1.4h { v0, v1 }, [x0], #16
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_st1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    add x0, x0, #16
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    st1.4h { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 8
  ret ptr %tmp
}

define ptr @test_v4i16_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_reg_st1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    st1.4h { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_st1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    st1.4h { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16>, <4 x i16>, ptr)


define ptr @test_v4i32_post_imm_st1x2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_imm_st1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st1.4s { v0, v1 }, [x0], #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_st1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x0, x0, #32
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st1.4s { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 8
  ret ptr %tmp
}

define ptr @test_v4i32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_reg_st1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st1.4s { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_st1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st1.4s { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32>, <4 x i32>, ptr)


define ptr @test_v2i32_post_imm_st1x2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_imm_st1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    st1.2s { v0, v1 }, [x0], #16
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_st1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    add x0, x0, #16
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    st1.2s { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 4
  ret ptr %tmp
}

define ptr @test_v2i32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_reg_st1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    st1.2s { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_st1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    st1.2s { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32>, <2 x i32>, ptr)


define ptr @test_v2i64_post_imm_st1x2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_imm_st1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st1.2d { v0, v1 }, [x0], #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_st1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x0, x0, #32
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st1.2d { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 4
  ret ptr %tmp
}

define ptr @test_v2i64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_reg_st1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st1.2d { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_st1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st1.2d { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64>, <2 x i64>, ptr)


define ptr @test_v1i64_post_imm_st1x2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_imm_st1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], #16
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_st1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    add x0, x0, #16
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 2
  ret ptr %tmp
}

define ptr @test_v1i64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_reg_st1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_st1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64>, <1 x i64>, ptr)


define ptr @test_v4f32_post_imm_st1x2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_imm_st1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st1.4s { v0, v1 }, [x0], #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_st1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x0, x0, #32
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st1.4s { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A)
  %tmp = getelementptr float, ptr %A, i32 8
  ret ptr %tmp
}

define ptr @test_v4f32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_reg_st1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st1.4s { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_st1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st1.4s { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float>, <4 x float>, ptr)


define ptr @test_v2f32_post_imm_st1x2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_imm_st1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    st1.2s { v0, v1 }, [x0], #16
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_st1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    add x0, x0, #16
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    st1.2s { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A)
  %tmp = getelementptr float, ptr %A, i32 4
  ret ptr %tmp
}

define ptr @test_v2f32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_reg_st1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    st1.2s { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_st1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    st1.2s { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float>, <2 x float>, ptr)


define ptr @test_v2f64_post_imm_st1x2(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_imm_st1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st1.2d { v0, v1 }, [x0], #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_st1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x0, x0, #32
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st1.2d { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 4
  ret ptr %tmp
}

define ptr @test_v2f64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_reg_st1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st1.2d { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_st1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st1.2d { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double>, <2 x double>, ptr)


define ptr @test_v1f64_post_imm_st1x2(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_imm_st1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], #16
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_st1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    add x0, x0, #16
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 2
  ret ptr %tmp
}

define ptr @test_v1f64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_reg_st1x2:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_st1x2:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double>, <1 x double>, ptr)


define ptr @test_v16i8_post_imm_st1x3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_imm_st1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st1.16b { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_st1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #48
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st1.16b { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 48
  ret ptr %tmp
}

define ptr @test_v16i8_post_reg_st1x3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_reg_st1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st1.16b { v0, v1, v2 }, [x0], x2
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_st1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st1.16b { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, ptr)


define ptr @test_v8i8_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_imm_st1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    st1.8b { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_st1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #24
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    st1.8b { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 24
  ret ptr %tmp
}

define ptr @test_v8i8_post_reg_st1x3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_reg_st1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    st1.8b { v0, v1, v2 }, [x0], x2
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_st1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    st1.8b { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, ptr)


define ptr @test_v8i16_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_imm_st1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st1.8h { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_st1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #48
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st1.8h { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 24
  ret ptr %tmp
}

define ptr @test_v8i16_post_reg_st1x3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_reg_st1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st1.8h { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_st1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st1.8h { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, ptr)


define ptr @test_v4i16_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_imm_st1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    st1.4h { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_st1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #24
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    st1.4h { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 12
  ret ptr %tmp
}

define ptr @test_v4i16_post_reg_st1x3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_reg_st1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    st1.4h { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_st1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    st1.4h { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, ptr)


define ptr @test_v4i32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_imm_st1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st1.4s { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_st1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #48
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st1.4s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 12
  ret ptr %tmp
}

define ptr @test_v4i32_post_reg_st1x3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_reg_st1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st1.4s { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_st1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st1.4s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, ptr)


define ptr @test_v2i32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_imm_st1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    st1.2s { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_st1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #24
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    st1.2s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 6
  ret ptr %tmp
}

define ptr @test_v2i32_post_reg_st1x3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_reg_st1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    st1.2s { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_st1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    st1.2s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, ptr)


define ptr @test_v2i64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_imm_st1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st1.2d { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_st1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #48
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st1.2d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 6
  ret ptr %tmp
}

define ptr @test_v2i64_post_reg_st1x3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_reg_st1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st1.2d { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_st1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st1.2d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, ptr)


define ptr @test_v1i64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_imm_st1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_st1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #24
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 3
  ret ptr %tmp
}

define ptr @test_v1i64_post_reg_st1x3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_reg_st1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_st1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, ptr)


define ptr @test_v4f32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_imm_st1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st1.4s { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_st1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #48
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st1.4s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A)
  %tmp = getelementptr float, ptr %A, i32 12
  ret ptr %tmp
}

define ptr @test_v4f32_post_reg_st1x3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_reg_st1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st1.4s { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_st1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st1.4s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float>, <4 x float>, <4 x float>, ptr)


define ptr @test_v2f32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_imm_st1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    st1.2s { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_st1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #24
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    st1.2s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A)
  %tmp = getelementptr float, ptr %A, i32 6
  ret ptr %tmp
}

define ptr @test_v2f32_post_reg_st1x3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_reg_st1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    st1.2s { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_st1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    st1.2s { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float>, <2 x float>, <2 x float>, ptr)


define ptr @test_v2f64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_imm_st1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st1.2d { v0, v1, v2 }, [x0], #48
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_st1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #48
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st1.2d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 6
  ret ptr %tmp
}

define ptr @test_v2f64_post_reg_st1x3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_reg_st1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st1.2d { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_st1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st1.2d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double>, <2 x double>, <2 x double>, ptr)


define ptr @test_v1f64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_imm_st1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], #24
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_st1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #24
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 3
  ret ptr %tmp
}

define ptr @test_v1f64_post_reg_st1x3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_reg_st1x3:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_st1x3:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double>, <1 x double>, <1 x double>, ptr)


define ptr @test_v16i8_post_imm_st1x4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_imm_st1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st1.16b { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_st1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #64
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st1.16b { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 64
  ret ptr %tmp
}

define ptr @test_v16i8_post_reg_st1x4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_reg_st1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st1.16b { v0, v1, v2, v3 }, [x0], x2
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_st1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st1.16b { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, ptr)


define ptr @test_v8i8_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_imm_st1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    st1.8b { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_st1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #32
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    st1.8b { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 32
  ret ptr %tmp
}

define ptr @test_v8i8_post_reg_st1x4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_reg_st1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    st1.8b { v0, v1, v2, v3 }, [x0], x2
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_st1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    st1.8b { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, ptr)


define ptr @test_v8i16_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_imm_st1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st1.8h { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_st1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #64
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st1.8h { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 32
  ret ptr %tmp
}

define ptr @test_v8i16_post_reg_st1x4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_reg_st1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st1.8h { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_st1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st1.8h { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, ptr)


define ptr @test_v4i16_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_imm_st1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    st1.4h { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_st1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #32
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    st1.4h { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 16
  ret ptr %tmp
}

define ptr @test_v4i16_post_reg_st1x4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_reg_st1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    st1.4h { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_st1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    st1.4h { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>,<4 x i16>,  ptr)


define ptr @test_v4i32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_imm_st1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st1.4s { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_st1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #64
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st1.4s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 16
  ret ptr %tmp
}

define ptr @test_v4i32_post_reg_st1x4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_reg_st1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st1.4s { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_st1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st1.4s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>,<4 x i32>,  ptr)


define ptr @test_v2i32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_imm_st1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    st1.2s { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_st1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #32
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    st1.2s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 8
  ret ptr %tmp
}

define ptr @test_v2i32_post_reg_st1x4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_reg_st1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    st1.2s { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_st1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    st1.2s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, ptr)


define ptr @test_v2i64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_imm_st1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st1.2d { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_st1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #64
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st1.2d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 8
  ret ptr %tmp
}

define ptr @test_v2i64_post_reg_st1x4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_reg_st1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st1.2d { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_st1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st1.2d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>,<2 x i64>,  ptr)


define ptr @test_v1i64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_imm_st1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_st1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #32
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 4
  ret ptr %tmp
}

define ptr @test_v1i64_post_reg_st1x4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_reg_st1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_st1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>,<1 x i64>,  ptr)


define ptr @test_v4f32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_imm_st1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st1.4s { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_st1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #64
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st1.4s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A)
  %tmp = getelementptr float, ptr %A, i32 16
  ret ptr %tmp
}

define ptr @test_v4f32_post_reg_st1x4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_reg_st1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st1.4s { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_st1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st1.4s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float>, <4 x float>, <4 x float>, <4 x float>, ptr)


define ptr @test_v2f32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_imm_st1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    st1.2s { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_st1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #32
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    st1.2s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A)
  %tmp = getelementptr float, ptr %A, i32 8
  ret ptr %tmp
}

define ptr @test_v2f32_post_reg_st1x4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_reg_st1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    st1.2s { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_st1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    st1.2s { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float>, <2 x float>, <2 x float>, <2 x float>, ptr)


define ptr @test_v2f64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_imm_st1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st1.2d { v0, v1, v2, v3 }, [x0], #64
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_st1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #64
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st1.2d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 8
  ret ptr %tmp
}

define ptr @test_v2f64_post_reg_st1x4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_reg_st1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st1.2d { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_st1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st1.2d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double>, <2 x double>, <2 x double>,<2 x double>,  ptr)


define ptr @test_v1f64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_imm_st1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_st1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #32
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 4
  ret ptr %tmp
}

define ptr @test_v1f64_post_reg_st1x4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_reg_st1x4:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_st1x4:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double>, <1 x double>, <1 x double>, <1 x double>, ptr)

define ptr @test_v16i8_post_imm_st2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_imm_st2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.b { v0, v1 }[0], [x0], #2
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_st2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x0, x0, #2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.b { v0, v1 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 2
  ret ptr %tmp
}

define ptr @test_v16i8_post_reg_st2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_reg_st2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.b { v0, v1 }[0], [x0], x2
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_st2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x0, x0, x2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.b { v0, v1 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8>, <16 x i8>, i64, ptr)


define ptr @test_v8i8_post_imm_st2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_imm_st2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.b { v0, v1 }[0], [x0], #2
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_st2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x0, x0, #2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.b { v0, v1 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 2
  ret ptr %tmp
}

define ptr @test_v8i8_post_reg_st2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_reg_st2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.b { v0, v1 }[0], [x0], x2
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_st2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x0, x0, x2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.b { v0, v1 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8>, <8 x i8>, i64, ptr)


define ptr @test_v8i16_post_imm_st2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_imm_st2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.h { v0, v1 }[0], [x0], #4
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_st2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x0, x0, #4
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.h { v0, v1 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 2
  ret ptr %tmp
}

define ptr @test_v8i16_post_reg_st2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_reg_st2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.h { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_st2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.h { v0, v1 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16>, <8 x i16>, i64, ptr)


define ptr @test_v4i16_post_imm_st2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_imm_st2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.h { v0, v1 }[0], [x0], #4
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_st2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x0, x0, #4
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.h { v0, v1 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 2
  ret ptr %tmp
}

define ptr @test_v4i16_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_reg_st2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.h { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_st2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.h { v0, v1 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16>, <4 x i16>, i64, ptr)


define ptr @test_v4i32_post_imm_st2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_imm_st2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], #8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_st2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x0, x0, #8
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 2
  ret ptr %tmp
}

define ptr @test_v4i32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_reg_st2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_st2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32>, <4 x i32>, i64, ptr)


define ptr @test_v2i32_post_imm_st2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_imm_st2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], #8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_st2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x0, x0, #8
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 2
  ret ptr %tmp
}

define ptr @test_v2i32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_reg_st2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_st2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32>, <2 x i32>, i64, ptr)


define ptr @test_v2i64_post_imm_st2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_imm_st2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], #16
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_st2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x0, x0, #16
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 2
  ret ptr %tmp
}

define ptr @test_v2i64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_reg_st2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_st2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64>, <2 x i64>, i64, ptr)


define ptr @test_v1i64_post_imm_st2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_imm_st2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], #16
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_st2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x0, x0, #16
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 2
  ret ptr %tmp
}

define ptr @test_v1i64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_reg_st2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_st2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64>, <1 x i64>, i64, ptr)


define ptr @test_v4f32_post_imm_st2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_imm_st2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], #8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_st2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x0, x0, #8
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A)
  %tmp = getelementptr float, ptr %A, i32 2
  ret ptr %tmp
}

define ptr @test_v4f32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_reg_st2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_st2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st2lane.v4f32.p0(<4 x float>, <4 x float>, i64, ptr)


define ptr @test_v2f32_post_imm_st2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_imm_st2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], #8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_st2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x0, x0, #8
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A)
  %tmp = getelementptr float, ptr %A, i32 2
  ret ptr %tmp
}

define ptr @test_v2f32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_reg_st2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_st2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st2lane.v2f32.p0(<2 x float>, <2 x float>, i64, ptr)


define ptr @test_v2f64_post_imm_st2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_imm_st2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], #16
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_st2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x0, x0, #16
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 2
  ret ptr %tmp
}

define ptr @test_v2f64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_reg_st2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_st2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st2lane.v2f64.p0(<2 x double>, <2 x double>, i64, ptr)


define ptr @test_v1f64_post_imm_st2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_imm_st2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], #16
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_st2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    add x0, x0, #16
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 2
  ret ptr %tmp
}

define ptr @test_v1f64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_reg_st2lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_st2lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st2lane.v1f64.p0(<1 x double>, <1 x double>, i64, ptr)


define ptr @test_v16i8_post_imm_st3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_imm_st3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.b { v0, v1, v2 }[0], [x0], #3
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_st3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #3
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.b { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 3
  ret ptr %tmp
}

define ptr @test_v16i8_post_reg_st3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_reg_st3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.b { v0, v1, v2 }[0], [x0], x2
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_st3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.b { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, i64, ptr)


define ptr @test_v8i8_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_imm_st3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.b { v0, v1, v2 }[0], [x0], #3
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_st3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #3
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.b { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 3
  ret ptr %tmp
}

define ptr @test_v8i8_post_reg_st3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_reg_st3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.b { v0, v1, v2 }[0], [x0], x2
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_st3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.b { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, i64, ptr)


define ptr @test_v8i16_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_imm_st3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.h { v0, v1, v2 }[0], [x0], #6
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_st3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #6
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.h { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 3
  ret ptr %tmp
}

define ptr @test_v8i16_post_reg_st3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_reg_st3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.h { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_st3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.h { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, i64, ptr)


define ptr @test_v4i16_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_imm_st3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.h { v0, v1, v2 }[0], [x0], #6
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_st3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #6
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.h { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 3
  ret ptr %tmp
}

define ptr @test_v4i16_post_reg_st3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_reg_st3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.h { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_st3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.h { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, i64, ptr)


define ptr @test_v4i32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_imm_st3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], #12
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_st3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #12
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 3
  ret ptr %tmp
}

define ptr @test_v4i32_post_reg_st3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_reg_st3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_st3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, i64, ptr)


define ptr @test_v2i32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_imm_st3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], #12
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_st3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #12
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 3
  ret ptr %tmp
}

define ptr @test_v2i32_post_reg_st3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_reg_st3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_st3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, i64, ptr)


define ptr @test_v2i64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_imm_st3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], #24
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_st3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #24
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 3
  ret ptr %tmp
}

define ptr @test_v2i64_post_reg_st3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_reg_st3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_st3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, i64, ptr)


define ptr @test_v1i64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_imm_st3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], #24
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_st3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #24
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 3
  ret ptr %tmp
}

define ptr @test_v1i64_post_reg_st3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_reg_st3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_st3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, i64, ptr)


define ptr @test_v4f32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_imm_st3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], #12
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_st3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #12
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A)
  %tmp = getelementptr float, ptr %A, i32 3
  ret ptr %tmp
}

define ptr @test_v4f32_post_reg_st3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_reg_st3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_st3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st3lane.v4f32.p0(<4 x float>, <4 x float>, <4 x float>, i64, ptr)


define ptr @test_v2f32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_imm_st3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], #12
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_st3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #12
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A)
  %tmp = getelementptr float, ptr %A, i32 3
  ret ptr %tmp
}

define ptr @test_v2f32_post_reg_st3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_reg_st3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_st3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st3lane.v2f32.p0(<2 x float>, <2 x float>, <2 x float>, i64, ptr)


define ptr @test_v2f64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_imm_st3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], #24
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_st3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #24
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 3
  ret ptr %tmp
}

define ptr @test_v2f64_post_reg_st3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_reg_st3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_st3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st3lane.v2f64.p0(<2 x double>, <2 x double>, <2 x double>, i64, ptr)


define ptr @test_v1f64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_imm_st3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], #24
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_st3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #24
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 3
  ret ptr %tmp
}

define ptr @test_v1f64_post_reg_st3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_reg_st3lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_st3lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st3lane.v1f64.p0(<1 x double>, <1 x double>, <1 x double>, i64, ptr)


define ptr @test_v16i8_post_imm_st4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_imm_st4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x0], #4
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_st4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #4
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 4
  ret ptr %tmp
}

define ptr @test_v16i8_post_reg_st4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v16i8_post_reg_st4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x0], x2
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_st4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, ptr)


define ptr @test_v8i8_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_imm_st4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x0], #4
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_st4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #4
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A)
  %tmp = getelementptr i8, ptr %A, i32 4
  ret ptr %tmp
}

define ptr @test_v8i8_post_reg_st4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i8_post_reg_st4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x0], x2
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_st4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A)
  %tmp = getelementptr i8, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64, ptr)


define ptr @test_v8i16_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_imm_st4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x0], #8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_st4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #8
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 4
  ret ptr %tmp
}

define ptr @test_v8i16_post_reg_st4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v8i16_post_reg_st4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_st4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, ptr)


define ptr @test_v4i16_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_imm_st4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x0], #8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_st4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #8
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A)
  %tmp = getelementptr i16, ptr %A, i32 4
  ret ptr %tmp
}

define ptr @test_v4i16_post_reg_st4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i16_post_reg_st4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_st4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A)
  %tmp = getelementptr i16, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i64, ptr)


define ptr @test_v4i32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_imm_st4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], #16
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_st4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #16
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 4
  ret ptr %tmp
}

define ptr @test_v4i32_post_reg_st4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4i32_post_reg_st4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_st4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, ptr)


define ptr @test_v2i32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_imm_st4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], #16
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_st4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #16
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A)
  %tmp = getelementptr i32, ptr %A, i32 4
  ret ptr %tmp
}

define ptr @test_v2i32_post_reg_st4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i32_post_reg_st4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_st4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A)
  %tmp = getelementptr i32, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i64, ptr)


define ptr @test_v2i64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_imm_st4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_st4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #32
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 4
  ret ptr %tmp
}

define ptr @test_v2i64_post_reg_st4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2i64_post_reg_st4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_st4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, ptr)


define ptr @test_v1i64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_imm_st4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_imm_st4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #32
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 4
  ret ptr %tmp
}

define ptr @test_v1i64_post_reg_st4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1i64_post_reg_st4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1i64_post_reg_st4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A)
  %tmp = getelementptr i64, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, ptr)


define ptr @test_v4f32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_imm_st4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], #16
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_st4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #16
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A)
  %tmp = getelementptr float, ptr %A, i32 4
  ret ptr %tmp
}

define ptr @test_v4f32_post_reg_st4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v4f32_post_reg_st4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_st4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st4lane.v4f32.p0(<4 x float>, <4 x float>, <4 x float>, <4 x float>, i64, ptr)


define ptr @test_v2f32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_imm_st4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], #16
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_st4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #16
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A)
  %tmp = getelementptr float, ptr %A, i32 4
  ret ptr %tmp
}

define ptr @test_v2f32_post_reg_st4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f32_post_reg_st4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_st4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A)
  %tmp = getelementptr float, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st4lane.v2f32.p0(<2 x float>, <2 x float>, <2 x float>, <2 x float>, i64, ptr)


define ptr @test_v2f64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_imm_st4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_st4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #32
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 4
  ret ptr %tmp
}

define ptr @test_v2f64_post_reg_st4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v2f64_post_reg_st4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_st4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st4lane.v2f64.p0(<2 x double>, <2 x double>, <2 x double>, <2 x double>, i64, ptr)


define ptr @test_v1f64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_imm_st4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_imm_st4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, #32
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 4
  ret ptr %tmp
}

define ptr @test_v1f64_post_reg_st4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
; CHECK-SD-LABEL: test_v1f64_post_reg_st4lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], x8
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v1f64_post_reg_st4lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    mov x8, x0
; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
; CHECK-GI-NEXT:    ret
  call void @llvm.aarch64.neon.st4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A)
  %tmp = getelementptr double, ptr %A, i64 %inc
  ret ptr %tmp
}

declare void @llvm.aarch64.neon.st4lane.v1f64.p0(<1 x double>, <1 x double>, <1 x double>, <1 x double>, i64, ptr)

define <16 x i8> @test_v16i8_post_imm_ld1r(ptr %bar, ptr %ptr) {
; CHECK-SD-LABEL: test_v16i8_post_imm_ld1r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1r.16b { v0 }, [x0], #1
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_ld1r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ldrb w8, [x0], #1
; CHECK-GI-NEXT:    str x0, [x1]
; CHECK-GI-NEXT:    dup.16b v0, w8
; CHECK-GI-NEXT:    ret
  %tmp1 = load i8, ptr %bar
  %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
  %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
  %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
  %tmp5 = insertelement <16 x i8> %tmp4, i8 %tmp1, i32 3
  %tmp6 = insertelement <16 x i8> %tmp5, i8 %tmp1, i32 4
  %tmp7 = insertelement <16 x i8> %tmp6, i8 %tmp1, i32 5
  %tmp8 = insertelement <16 x i8> %tmp7, i8 %tmp1, i32 6
  %tmp9 = insertelement <16 x i8> %tmp8, i8 %tmp1, i32 7
  %tmp10 = insertelement <16 x i8> %tmp9, i8 %tmp1, i32 8
  %tmp11 = insertelement <16 x i8> %tmp10, i8 %tmp1, i32 9
  %tmp12 = insertelement <16 x i8> %tmp11, i8 %tmp1, i32 10
  %tmp13 = insertelement <16 x i8> %tmp12, i8 %tmp1, i32 11
  %tmp14 = insertelement <16 x i8> %tmp13, i8 %tmp1, i32 12
  %tmp15 = insertelement <16 x i8> %tmp14, i8 %tmp1, i32 13
  %tmp16 = insertelement <16 x i8> %tmp15, i8 %tmp1, i32 14
  %tmp17 = insertelement <16 x i8> %tmp16, i8 %tmp1, i32 15
  %tmp18 = getelementptr i8, ptr %bar, i64 1
  store ptr %tmp18, ptr %ptr
  ret <16 x i8> %tmp17
}

define <16 x i8> @test_v16i8_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v16i8_post_reg_ld1r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1r.16b { v0 }, [x0], x2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_ld1r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1r.16b { v0 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %tmp1 = load i8, ptr %bar
  %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
  %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
  %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
  %tmp5 = insertelement <16 x i8> %tmp4, i8 %tmp1, i32 3
  %tmp6 = insertelement <16 x i8> %tmp5, i8 %tmp1, i32 4
  %tmp7 = insertelement <16 x i8> %tmp6, i8 %tmp1, i32 5
  %tmp8 = insertelement <16 x i8> %tmp7, i8 %tmp1, i32 6
  %tmp9 = insertelement <16 x i8> %tmp8, i8 %tmp1, i32 7
  %tmp10 = insertelement <16 x i8> %tmp9, i8 %tmp1, i32 8
  %tmp11 = insertelement <16 x i8> %tmp10, i8 %tmp1, i32 9
  %tmp12 = insertelement <16 x i8> %tmp11, i8 %tmp1, i32 10
  %tmp13 = insertelement <16 x i8> %tmp12, i8 %tmp1, i32 11
  %tmp14 = insertelement <16 x i8> %tmp13, i8 %tmp1, i32 12
  %tmp15 = insertelement <16 x i8> %tmp14, i8 %tmp1, i32 13
  %tmp16 = insertelement <16 x i8> %tmp15, i8 %tmp1, i32 14
  %tmp17 = insertelement <16 x i8> %tmp16, i8 %tmp1, i32 15
  %tmp18 = getelementptr i8, ptr %bar, i64 %inc
  store ptr %tmp18, ptr %ptr
  ret <16 x i8> %tmp17
}

define <8 x i8> @test_v8i8_post_imm_ld1r(ptr %bar, ptr %ptr) {
; CHECK-SD-LABEL: test_v8i8_post_imm_ld1r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1r.8b { v0 }, [x0], #1
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_ld1r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ldrb w8, [x0], #1
; CHECK-GI-NEXT:    str x0, [x1]
; CHECK-GI-NEXT:    dup.8b v0, w8
; CHECK-GI-NEXT:    ret
  %tmp1 = load i8, ptr %bar
  %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
  %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
  %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
  %tmp5 = insertelement <8 x i8> %tmp4, i8 %tmp1, i32 3
  %tmp6 = insertelement <8 x i8> %tmp5, i8 %tmp1, i32 4
  %tmp7 = insertelement <8 x i8> %tmp6, i8 %tmp1, i32 5
  %tmp8 = insertelement <8 x i8> %tmp7, i8 %tmp1, i32 6
  %tmp9 = insertelement <8 x i8> %tmp8, i8 %tmp1, i32 7
  %tmp10 = getelementptr i8, ptr %bar, i64 1
  store ptr %tmp10, ptr %ptr
  ret <8 x i8> %tmp9
}

define <8 x i8> @test_v8i8_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v8i8_post_reg_ld1r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1r.8b { v0 }, [x0], x2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_ld1r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1r.8b { v0 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %tmp1 = load i8, ptr %bar
  %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
  %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
  %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
  %tmp5 = insertelement <8 x i8> %tmp4, i8 %tmp1, i32 3
  %tmp6 = insertelement <8 x i8> %tmp5, i8 %tmp1, i32 4
  %tmp7 = insertelement <8 x i8> %tmp6, i8 %tmp1, i32 5
  %tmp8 = insertelement <8 x i8> %tmp7, i8 %tmp1, i32 6
  %tmp9 = insertelement <8 x i8> %tmp8, i8 %tmp1, i32 7
  %tmp10 = getelementptr i8, ptr %bar, i64 %inc
  store ptr %tmp10, ptr %ptr
  ret <8 x i8> %tmp9
}

define <8 x i16> @test_v8i16_post_imm_ld1r(ptr %bar, ptr %ptr) {
; CHECK-SD-LABEL: test_v8i16_post_imm_ld1r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1r.8h { v0 }, [x0], #2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_ld1r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ldrh w8, [x0], #2
; CHECK-GI-NEXT:    str x0, [x1]
; CHECK-GI-NEXT:    dup.8h v0, w8
; CHECK-GI-NEXT:    ret
  %tmp1 = load i16, ptr %bar
  %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
  %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
  %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
  %tmp5 = insertelement <8 x i16> %tmp4, i16 %tmp1, i32 3
  %tmp6 = insertelement <8 x i16> %tmp5, i16 %tmp1, i32 4
  %tmp7 = insertelement <8 x i16> %tmp6, i16 %tmp1, i32 5
  %tmp8 = insertelement <8 x i16> %tmp7, i16 %tmp1, i32 6
  %tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 7
  %tmp10 = getelementptr i16, ptr %bar, i64 1
  store ptr %tmp10, ptr %ptr
  ret <8 x i16> %tmp9
}

define <8 x i16> @test_v8i16_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v8i16_post_reg_ld1r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ld1r.8h { v0 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_ld1r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1r.8h { v0 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %tmp1 = load i16, ptr %bar
  %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
  %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
  %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
  %tmp5 = insertelement <8 x i16> %tmp4, i16 %tmp1, i32 3
  %tmp6 = insertelement <8 x i16> %tmp5, i16 %tmp1, i32 4
  %tmp7 = insertelement <8 x i16> %tmp6, i16 %tmp1, i32 5
  %tmp8 = insertelement <8 x i16> %tmp7, i16 %tmp1, i32 6
  %tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 7
  %tmp10 = getelementptr i16, ptr %bar, i64 %inc
  store ptr %tmp10, ptr %ptr
  ret <8 x i16> %tmp9
}

define <4 x i16> @test_v4i16_post_imm_ld1r(ptr %bar, ptr %ptr) {
; CHECK-SD-LABEL: test_v4i16_post_imm_ld1r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1r.4h { v0 }, [x0], #2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_ld1r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ldrh w8, [x0], #2
; CHECK-GI-NEXT:    str x0, [x1]
; CHECK-GI-NEXT:    dup.4h v0, w8
; CHECK-GI-NEXT:    ret
  %tmp1 = load i16, ptr %bar
  %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
  %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
  %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
  %tmp5 = insertelement <4 x i16> %tmp4, i16 %tmp1, i32 3
  %tmp6 = getelementptr i16, ptr %bar, i64 1
  store ptr %tmp6, ptr %ptr
  ret <4 x i16> %tmp5
}

define <4 x i16> @test_v4i16_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v4i16_post_reg_ld1r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ld1r.4h { v0 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_ld1r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1r.4h { v0 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %tmp1 = load i16, ptr %bar
  %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
  %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
  %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
  %tmp5 = insertelement <4 x i16> %tmp4, i16 %tmp1, i32 3
  %tmp6 = getelementptr i16, ptr %bar, i64 %inc
  store ptr %tmp6, ptr %ptr
  ret <4 x i16> %tmp5
}

define <4 x i32> @test_v4i32_post_imm_ld1r(ptr %bar, ptr %ptr) {
; CHECK-SD-LABEL: test_v4i32_post_imm_ld1r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1r.4s { v0 }, [x0], #4
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_ld1r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ldr w8, [x0], #4
; CHECK-GI-NEXT:    str x0, [x1]
; CHECK-GI-NEXT:    dup.4s v0, w8
; CHECK-GI-NEXT:    ret
  %tmp1 = load i32, ptr %bar
  %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
  %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
  %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
  %tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3
  %tmp6 = getelementptr i32, ptr %bar, i64 1
  store ptr %tmp6, ptr %ptr
  ret <4 x i32> %tmp5
}

define <4 x i32> @test_v4i32_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v4i32_post_reg_ld1r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld1r.4s { v0 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_ld1r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1r.4s { v0 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %tmp1 = load i32, ptr %bar
  %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
  %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
  %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
  %tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3
  %tmp6 = getelementptr i32, ptr %bar, i64 %inc
  store ptr %tmp6, ptr %ptr
  ret <4 x i32> %tmp5
}

define <2 x i32> @test_v2i32_post_imm_ld1r(ptr %bar, ptr %ptr) {
; CHECK-SD-LABEL: test_v2i32_post_imm_ld1r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1r.2s { v0 }, [x0], #4
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_ld1r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ldr w8, [x0], #4
; CHECK-GI-NEXT:    str x0, [x1]
; CHECK-GI-NEXT:    dup.2s v0, w8
; CHECK-GI-NEXT:    ret
  %tmp1 = load i32, ptr %bar
  %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
  %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
  %tmp4 = getelementptr i32, ptr %bar, i64 1
  store ptr %tmp4, ptr %ptr
  ret <2 x i32> %tmp3
}

define <2 x i32> @test_v2i32_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v2i32_post_reg_ld1r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld1r.2s { v0 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_ld1r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1r.2s { v0 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %tmp1 = load i32, ptr %bar
  %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
  %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
  %tmp4 = getelementptr i32, ptr %bar, i64 %inc
  store ptr %tmp4, ptr %ptr
  ret <2 x i32> %tmp3
}

define <2 x i64> @test_v2i64_post_imm_ld1r(ptr %bar, ptr %ptr) {
; CHECK-SD-LABEL: test_v2i64_post_imm_ld1r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1r.2d { v0 }, [x0], #8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_ld1r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ldr x8, [x0], #8
; CHECK-GI-NEXT:    str x0, [x1]
; CHECK-GI-NEXT:    dup.2d v0, x8
; CHECK-GI-NEXT:    ret
  %tmp1 = load i64, ptr %bar
  %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
  %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
  %tmp4 = getelementptr i64, ptr %bar, i64 1
  store ptr %tmp4, ptr %ptr
  ret <2 x i64> %tmp3
}

define <2 x i64> @test_v2i64_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v2i64_post_reg_ld1r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld1r.2d { v0 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_ld1r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1r.2d { v0 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %tmp1 = load i64, ptr %bar
  %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
  %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
  %tmp4 = getelementptr i64, ptr %bar, i64 %inc
  store ptr %tmp4, ptr %ptr
  ret <2 x i64> %tmp3
}

define <4 x float> @test_v4f32_post_imm_ld1r(ptr %bar, ptr %ptr) {
; CHECK-SD-LABEL: test_v4f32_post_imm_ld1r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1r.4s { v0 }, [x0], #4
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_ld1r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ldr s0, [x0], #4
; CHECK-GI-NEXT:    str x0, [x1]
; CHECK-GI-NEXT:    dup.4s v0, v0[0]
; CHECK-GI-NEXT:    ret
  %tmp1 = load float, ptr %bar
  %tmp2 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %tmp1, i32 0
  %tmp3 = insertelement <4 x float> %tmp2, float %tmp1, i32 1
  %tmp4 = insertelement <4 x float> %tmp3, float %tmp1, i32 2
  %tmp5 = insertelement <4 x float> %tmp4, float %tmp1, i32 3
  %tmp6 = getelementptr float, ptr %bar, i64 1
  store ptr %tmp6, ptr %ptr
  ret <4 x float> %tmp5
}

define <4 x float> @test_v4f32_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v4f32_post_reg_ld1r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld1r.4s { v0 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_ld1r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1r.4s { v0 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %tmp1 = load float, ptr %bar
  %tmp2 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %tmp1, i32 0
  %tmp3 = insertelement <4 x float> %tmp2, float %tmp1, i32 1
  %tmp4 = insertelement <4 x float> %tmp3, float %tmp1, i32 2
  %tmp5 = insertelement <4 x float> %tmp4, float %tmp1, i32 3
  %tmp6 = getelementptr float, ptr %bar, i64 %inc
  store ptr %tmp6, ptr %ptr
  ret <4 x float> %tmp5
}

define <2 x float> @test_v2f32_post_imm_ld1r(ptr %bar, ptr %ptr) {
; CHECK-SD-LABEL: test_v2f32_post_imm_ld1r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1r.2s { v0 }, [x0], #4
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_ld1r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ldr s0, [x0], #4
; CHECK-GI-NEXT:    str x0, [x1]
; CHECK-GI-NEXT:    dup.2s v0, v0[0]
; CHECK-GI-NEXT:    ret
  %tmp1 = load float, ptr %bar
  %tmp2 = insertelement <2 x float> <float undef, float undef>, float %tmp1, i32 0
  %tmp3 = insertelement <2 x float> %tmp2, float %tmp1, i32 1
  %tmp4 = getelementptr float, ptr %bar, i64 1
  store ptr %tmp4, ptr %ptr
  ret <2 x float> %tmp3
}

define <2 x float> @test_v2f32_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v2f32_post_reg_ld1r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld1r.2s { v0 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_ld1r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1r.2s { v0 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %tmp1 = load float, ptr %bar
  %tmp2 = insertelement <2 x float> <float undef, float undef>, float %tmp1, i32 0
  %tmp3 = insertelement <2 x float> %tmp2, float %tmp1, i32 1
  %tmp4 = getelementptr float, ptr %bar, i64 %inc
  store ptr %tmp4, ptr %ptr
  ret <2 x float> %tmp3
}

define <2 x double> @test_v2f64_post_imm_ld1r(ptr %bar, ptr %ptr) {
; CHECK-SD-LABEL: test_v2f64_post_imm_ld1r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1r.2d { v0 }, [x0], #8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_ld1r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ldr d0, [x0], #8
; CHECK-GI-NEXT:    str x0, [x1]
; CHECK-GI-NEXT:    dup.2d v0, v0[0]
; CHECK-GI-NEXT:    ret
  %tmp1 = load double, ptr %bar
  %tmp2 = insertelement <2 x double> <double undef, double undef>, double %tmp1, i32 0
  %tmp3 = insertelement <2 x double> %tmp2, double %tmp1, i32 1
  %tmp4 = getelementptr double, ptr %bar, i64 1
  store ptr %tmp4, ptr %ptr
  ret <2 x double> %tmp3
}

define <2 x double> @test_v2f64_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) {
; CHECK-SD-LABEL: test_v2f64_post_reg_ld1r:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld1r.2d { v0 }, [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_ld1r:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1r.2d { v0 }, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %tmp1 = load double, ptr %bar
  %tmp2 = insertelement <2 x double> <double undef, double undef>, double %tmp1, i32 0
  %tmp3 = insertelement <2 x double> %tmp2, double %tmp1, i32 1
  %tmp4 = getelementptr double, ptr %bar, i64 %inc
  store ptr %tmp4, ptr %ptr
  ret <2 x double> %tmp3
}

define <16 x i8> @test_v16i8_post_imm_ld1lane(ptr %bar, ptr %ptr, <16 x i8> %A) {
; CHECK-SD-LABEL: test_v16i8_post_imm_ld1lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.b { v0 }[1], [x0], #1
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_imm_ld1lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ldrb w8, [x0], #1
; CHECK-GI-NEXT:    str x0, [x1]
; CHECK-GI-NEXT:    mov.b v0[1], w8
; CHECK-GI-NEXT:    ret
  %tmp1 = load i8, ptr %bar
  %tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
  %tmp3 = getelementptr i8, ptr %bar, i64 1
  store ptr %tmp3, ptr %ptr
  ret <16 x i8> %tmp2
}

define <16 x i8> @test_v16i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <16 x i8> %A) {
; CHECK-SD-LABEL: test_v16i8_post_reg_ld1lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.b { v0 }[1], [x0], x2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v16i8_post_reg_ld1lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ldr b1, [x0]
; CHECK-GI-NEXT:    add x8, x0, x2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    mov.b v0[1], v1[0]
; CHECK-GI-NEXT:    ret
  %tmp1 = load i8, ptr %bar
  %tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
  %tmp3 = getelementptr i8, ptr %bar, i64 %inc
  store ptr %tmp3, ptr %ptr
  ret <16 x i8> %tmp2
}

define <8 x i8> @test_v8i8_post_imm_ld1lane(ptr %bar, ptr %ptr, <8 x i8> %A) {
; CHECK-SD-LABEL: test_v8i8_post_imm_ld1lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    ld1.b { v0 }[1], [x0], #1
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_imm_ld1lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ldrb w8, [x0], #1
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    str x0, [x1]
; CHECK-GI-NEXT:    mov.b v0[1], w8
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT:    ret
  %tmp1 = load i8, ptr %bar
  %tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1
  %tmp3 = getelementptr i8, ptr %bar, i64 1
  store ptr %tmp3, ptr %ptr
  ret <8 x i8> %tmp2
}

define <8 x i8> @test_v8i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <8 x i8> %A) {
; CHECK-SD-LABEL: test_v8i8_post_reg_ld1lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    ld1.b { v0 }[1], [x0], x2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i8_post_reg_ld1lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ldr b1, [x0]
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    add x8, x0, x2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    mov.b v0[1], v1[0]
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT:    ret
  %tmp1 = load i8, ptr %bar
  %tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1
  %tmp3 = getelementptr i8, ptr %bar, i64 %inc
  store ptr %tmp3, ptr %ptr
  ret <8 x i8> %tmp2
}

define <8 x i16> @test_v8i16_post_imm_ld1lane(ptr %bar, ptr %ptr, <8 x i16> %A) {
; CHECK-SD-LABEL: test_v8i16_post_imm_ld1lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.h { v0 }[1], [x0], #2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_imm_ld1lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ldrh w8, [x0], #2
; CHECK-GI-NEXT:    str x0, [x1]
; CHECK-GI-NEXT:    mov.h v0[1], w8
; CHECK-GI-NEXT:    ret
  %tmp1 = load i16, ptr %bar
  %tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1
  %tmp3 = getelementptr i16, ptr %bar, i64 1
  store ptr %tmp3, ptr %ptr
  ret <8 x i16> %tmp2
}

define <8 x i16> @test_v8i16_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <8 x i16> %A) {
; CHECK-SD-LABEL: test_v8i16_post_reg_ld1lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ld1.h { v0 }[1], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v8i16_post_reg_ld1lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.h { v0 }[1], [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %tmp1 = load i16, ptr %bar
  %tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1
  %tmp3 = getelementptr i16, ptr %bar, i64 %inc
  store ptr %tmp3, ptr %ptr
  ret <8 x i16> %tmp2
}

define <4 x i16> @test_v4i16_post_imm_ld1lane(ptr %bar, ptr %ptr, <4 x i16> %A) {
; CHECK-SD-LABEL: test_v4i16_post_imm_ld1lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    ld1.h { v0 }[1], [x0], #2
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_imm_ld1lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ldrh w8, [x0], #2
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    str x0, [x1]
; CHECK-GI-NEXT:    mov.h v0[1], w8
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT:    ret
  %tmp1 = load i16, ptr %bar
  %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
  %tmp3 = getelementptr i16, ptr %bar, i64 1
  store ptr %tmp3, ptr %ptr
  ret <4 x i16> %tmp2
}

define <4 x i16> @test_v4i16_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 x i16> %A) {
; CHECK-SD-LABEL: test_v4i16_post_reg_ld1lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    ld1.h { v0 }[1], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_ld1lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
; CHECK-GI-NEXT:    ld1.h { v0 }[1], [x0]
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %tmp1 = load i16, ptr %bar
  %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
  %tmp3 = getelementptr i16, ptr %bar, i64 %inc
  store ptr %tmp3, ptr %ptr
  ret <4 x i16> %tmp2
}

define <4 x i32> @test_v4i32_post_imm_ld1lane(ptr %bar, ptr %ptr, <4 x i32> %A) {
; CHECK-SD-LABEL: test_v4i32_post_imm_ld1lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.s { v0 }[1], [x0], #4
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_imm_ld1lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ldr w8, [x0], #4
; CHECK-GI-NEXT:    str x0, [x1]
; CHECK-GI-NEXT:    mov.s v0[1], w8
; CHECK-GI-NEXT:    ret
  %tmp1 = load i32, ptr %bar
  %tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1
  %tmp3 = getelementptr i32, ptr %bar, i64 1
  store ptr %tmp3, ptr %ptr
  ret <4 x i32> %tmp2
}

define <4 x i32> @test_v4i32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 x i32> %A) {
; CHECK-SD-LABEL: test_v4i32_post_reg_ld1lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld1.s { v0 }[1], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i32_post_reg_ld1lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.s { v0 }[1], [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %tmp1 = load i32, ptr %bar
  %tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1
  %tmp3 = getelementptr i32, ptr %bar, i64 %inc
  store ptr %tmp3, ptr %ptr
  ret <4 x i32> %tmp2
}

define <2 x i32> @test_v2i32_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x i32> %A) {
; CHECK-SD-LABEL: test_v2i32_post_imm_ld1lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    ld1.s { v0 }[1], [x0], #4
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_imm_ld1lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ldr w8, [x0], #4
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    str x0, [x1]
; CHECK-GI-NEXT:    mov.s v0[1], w8
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT:    ret
  %tmp1 = load i32, ptr %bar
  %tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1
  %tmp3 = getelementptr i32, ptr %bar, i64 1
  store ptr %tmp3, ptr %ptr
  ret <2 x i32> %tmp2
}

define <2 x i32> @test_v2i32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x i32> %A) {
; CHECK-SD-LABEL: test_v2i32_post_reg_ld1lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    ld1.s { v0 }[1], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i32_post_reg_ld1lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    ld1.s { v0 }[1], [x0]
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %tmp1 = load i32, ptr %bar
  %tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1
  %tmp3 = getelementptr i32, ptr %bar, i64 %inc
  store ptr %tmp3, ptr %ptr
  ret <2 x i32> %tmp2
}

define <2 x i64> @test_v2i64_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x i64> %A) {
; CHECK-SD-LABEL: test_v2i64_post_imm_ld1lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.d { v0 }[1], [x0], #8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_imm_ld1lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ldr x8, [x0], #8
; CHECK-GI-NEXT:    str x0, [x1]
; CHECK-GI-NEXT:    mov.d v0[1], x8
; CHECK-GI-NEXT:    ret
  %tmp1 = load i64, ptr %bar
  %tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1
  %tmp3 = getelementptr i64, ptr %bar, i64 1
  store ptr %tmp3, ptr %ptr
  ret <2 x i64> %tmp2
}

define <2 x i64> @test_v2i64_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x i64> %A) {
; CHECK-SD-LABEL: test_v2i64_post_reg_ld1lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld1.d { v0 }[1], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2i64_post_reg_ld1lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.d { v0 }[1], [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %tmp1 = load i64, ptr %bar
  %tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1
  %tmp3 = getelementptr i64, ptr %bar, i64 %inc
  store ptr %tmp3, ptr %ptr
  ret <2 x i64> %tmp2
}

define <4 x float> @test_v4f32_post_imm_ld1lane(ptr %bar, ptr %ptr, <4 x float> %A) {
; CHECK-SD-LABEL: test_v4f32_post_imm_ld1lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.s { v0 }[1], [x0], #4
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_imm_ld1lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ldr s1, [x0], #4
; CHECK-GI-NEXT:    str x0, [x1]
; CHECK-GI-NEXT:    mov.s v0[1], v1[0]
; CHECK-GI-NEXT:    ret
  %tmp1 = load float, ptr %bar
  %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
  %tmp3 = getelementptr float, ptr %bar, i64 1
  store ptr %tmp3, ptr %ptr
  ret <4 x float> %tmp2
}

define <4 x float> @test_v4f32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 x float> %A) {
; CHECK-SD-LABEL: test_v4f32_post_reg_ld1lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ld1.s { v0 }[1], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4f32_post_reg_ld1lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.s { v0 }[1], [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %tmp1 = load float, ptr %bar
  %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
  %tmp3 = getelementptr float, ptr %bar, i64 %inc
  store ptr %tmp3, ptr %ptr
  ret <4 x float> %tmp2
}

define <2 x float> @test_v2f32_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x float> %A) {
; CHECK-SD-LABEL: test_v2f32_post_imm_ld1lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    ld1.s { v0 }[1], [x0], #4
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_imm_ld1lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ldr s1, [x0], #4
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    str x0, [x1]
; CHECK-GI-NEXT:    mov.s v0[1], v1[0]
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT:    ret
  %tmp1 = load float, ptr %bar
  %tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1
  %tmp3 = getelementptr float, ptr %bar, i64 1
  store ptr %tmp3, ptr %ptr
  ret <2 x float> %tmp2
}

define <2 x float> @test_v2f32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x float> %A) {
; CHECK-SD-LABEL: test_v2f32_post_reg_ld1lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #2
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    ld1.s { v0 }[1], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f32_post_reg_ld1lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
; CHECK-GI-NEXT:    ld1.s { v0 }[1], [x0]
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %tmp1 = load float, ptr %bar
  %tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1
  %tmp3 = getelementptr float, ptr %bar, i64 %inc
  store ptr %tmp3, ptr %ptr
  ret <2 x float> %tmp2
}

define <2 x double> @test_v2f64_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x double> %A) {
; CHECK-SD-LABEL: test_v2f64_post_imm_ld1lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ld1.d { v0 }[1], [x0], #8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_imm_ld1lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ldr d1, [x0], #8
; CHECK-GI-NEXT:    str x0, [x1]
; CHECK-GI-NEXT:    mov.d v0[1], v1[0]
; CHECK-GI-NEXT:    ret
  %tmp1 = load double, ptr %bar
  %tmp2 = insertelement <2 x double> %A, double %tmp1, i32 1
  %tmp3 = getelementptr double, ptr %bar, i64 1
  store ptr %tmp3, ptr %ptr
  ret <2 x double> %tmp2
}

define <2 x double> @test_v2f64_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x double> %A) {
; CHECK-SD-LABEL: test_v2f64_post_reg_ld1lane:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #3
; CHECK-SD-NEXT:    ld1.d { v0 }[1], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v2f64_post_reg_ld1lane:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ld1.d { v0 }[1], [x0]
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ret
  %tmp1 = load double, ptr %bar
  %tmp2 = insertelement <2 x double> %A, double %tmp1, i32 1
  %tmp3 = getelementptr double, ptr %bar, i64 %inc
  store ptr %tmp3, ptr %ptr
  ret <2 x double> %tmp2
}

; Check for dependencies between the vector and the scalar load.
define <4 x float> @test_v4f32_post_reg_ld1lane_dep_vec_on_load(ptr %bar, ptr %ptr, i64 %inc, ptr %dep_ptr_1, ptr %dep_ptr_2, <4 x float> %vec) {
; CHECK-LABEL: test_v4f32_post_reg_ld1lane_dep_vec_on_load:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr s1, [x0]
; CHECK-NEXT:    str q0, [x3]
; CHECK-NEXT:    add x8, x0, x2, lsl #2
; CHECK-NEXT:    ldr q0, [x4]
; CHECK-NEXT:    str x8, [x1]
; CHECK-NEXT:    mov.s v0[1], v1[0]
; CHECK-NEXT:    ret
  %tmp1 = load float, ptr %bar
  store <4 x float> %vec, ptr %dep_ptr_1, align 16
  %A = load <4 x float>, ptr %dep_ptr_2, align 16
  %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
  %tmp3 = getelementptr float, ptr %bar, i64 %inc
  store ptr %tmp3, ptr %ptr
  ret <4 x float> %tmp2
}

; Make sure that we test the narrow V64 code path.
; The tests above don't, because there, 64-bit insert_vector_elt nodes will be
; widened to 128-bit before the LD1LANEpost combine has the chance to run,
; making it avoid narrow vector types.
; One way to trick that combine into running early is to force the vector ops
; legalizer to run.  We achieve that using the ctpop.
; PR23265
define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(ptr %bar, ptr %ptr, i64 %inc, <4 x i16> %A, ptr %d) {
; CHECK-SD-LABEL: test_v4i16_post_reg_ld1lane_forced_narrow:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    lsl x8, x2, #1
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    ld1.h { v0 }[1], [x0], x8
; CHECK-SD-NEXT:    str x0, [x1]
; CHECK-SD-NEXT:    ldr d1, [x3]
; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT:    cnt.8b v1, v1
; CHECK-SD-NEXT:    uaddlp.4h v1, v1
; CHECK-SD-NEXT:    uaddlp.2s v1, v1
; CHECK-SD-NEXT:    str d1, [x3]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_v4i16_post_reg_ld1lane_forced_narrow:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    ld1.h { v0 }[1], [x0]
; CHECK-GI-NEXT:    str x8, [x1]
; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT:    ldr d1, [x3]
; CHECK-GI-NEXT:    cnt.8b v1, v1
; CHECK-GI-NEXT:    uaddlp.4h v1, v1
; CHECK-GI-NEXT:    uaddlp.2s v1, v1
; CHECK-GI-NEXT:    str d1, [x3]
; CHECK-GI-NEXT:    ret
  %tmp1 = load i16, ptr %bar
  %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
  %tmp3 = getelementptr i16, ptr %bar, i64 %inc
  store ptr %tmp3, ptr %ptr
  %dl =  load <2 x i32>,  ptr %d
  %dr = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %dl)
  store <2 x i32> %dr, ptr %d
  ret <4 x i16> %tmp2
}

declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)

define void @test_ld1lane_build(ptr %ptr0, ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %out) {
; CHECK-SD-LABEL: test_ld1lane_build:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ldr s0, [x2]
; CHECK-SD-NEXT:    ldr s1, [x0]
; CHECK-SD-NEXT:    ld1.s { v0 }[1], [x3]
; CHECK-SD-NEXT:    ld1.s { v1 }[1], [x1]
; CHECK-SD-NEXT:    sub.2s v0, v1, v0
; CHECK-SD-NEXT:    str d0, [x4]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_ld1lane_build:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ldr s0, [x0]
; CHECK-GI-NEXT:    ldr s1, [x1]
; CHECK-GI-NEXT:    ldr s2, [x2]
; CHECK-GI-NEXT:    ldr s3, [x3]
; CHECK-GI-NEXT:    mov.s v0[1], v1[0]
; CHECK-GI-NEXT:    mov.s v2[1], v3[0]
; CHECK-GI-NEXT:    sub.2s v0, v0, v2
; CHECK-GI-NEXT:    str d0, [x4]
; CHECK-GI-NEXT:    ret
  %load0 = load i32, ptr %ptr0, align 4
  %load1 = load i32, ptr %ptr1, align 4
  %vec0_0 = insertelement <2 x i32> undef, i32 %load0, i32 0
  %vec0_1 = insertelement <2 x i32> %vec0_0, i32 %load1, i32 1

  %load2 = load i32, ptr %ptr2, align 4
  %load3 = load i32, ptr %ptr3, align 4
  %vec1_0 = insertelement <2 x i32> undef, i32 %load2, i32 0
  %vec1_1 = insertelement <2 x i32> %vec1_0, i32 %load3, i32 1

  %sub = sub nsw <2 x i32> %vec0_1, %vec1_1
  store <2 x i32> %sub, ptr %out, align 16
  ret void
}

define void  @test_ld1lane_build_i16(ptr %a, ptr %b, ptr %c, ptr %d, <4 x i16> %e, ptr %p) {
; CHECK-SD-LABEL: test_ld1lane_build_i16:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ldr h1, [x0]
; CHECK-SD-NEXT:    ld1.h { v1 }[1], [x1]
; CHECK-SD-NEXT:    ld1.h { v1 }[2], [x2]
; CHECK-SD-NEXT:    ld1.h { v1 }[3], [x3]
; CHECK-SD-NEXT:    sub.4h v0, v1, v0
; CHECK-SD-NEXT:    str d0, [x4]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_ld1lane_build_i16:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ldr h1, [x0]
; CHECK-GI-NEXT:    ldr h2, [x1]
; CHECK-GI-NEXT:    mov.h v1[1], v2[0]
; CHECK-GI-NEXT:    ldr h2, [x2]
; CHECK-GI-NEXT:    mov.h v1[2], v2[0]
; CHECK-GI-NEXT:    ldr h2, [x3]
; CHECK-GI-NEXT:    mov.h v1[3], v2[0]
; CHECK-GI-NEXT:    sub.4h v0, v1, v0
; CHECK-GI-NEXT:    str d0, [x4]
; CHECK-GI-NEXT:    ret
  %ld.a = load i16, ptr %a
  %ld.b = load i16, ptr %b
  %ld.c = load i16, ptr %c
  %ld.d = load i16, ptr %d
  %v.a = insertelement <4 x i16> undef, i16 %ld.a, i64 0
  %v.b = insertelement <4 x i16> %v.a, i16 %ld.b, i64 1
  %v.c = insertelement <4 x i16> %v.b, i16 %ld.c, i64 2
  %v = insertelement <4 x i16> %v.c, i16 %ld.d, i64 3
  %sub = sub nsw <4 x i16> %v, %e
  store <4 x i16> %sub, ptr %p
  ret void
}

define void  @test_ld1lane_build_half(ptr %a, ptr %b, ptr %c, ptr %d, <4 x half> %e, ptr %p) {
; CHECK-SD-LABEL: test_ld1lane_build_half:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ldr h1, [x0]
; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
; CHECK-SD-NEXT:    ld1.h { v1 }[1], [x1]
; CHECK-SD-NEXT:    ld1.h { v1 }[2], [x2]
; CHECK-SD-NEXT:    ld1.h { v1 }[3], [x3]
; CHECK-SD-NEXT:    fcvtl v1.4s, v1.4h
; CHECK-SD-NEXT:    fsub.4s v0, v1, v0
; CHECK-SD-NEXT:    fcvtn v0.4h, v0.4s
; CHECK-SD-NEXT:    str d0, [x4]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_ld1lane_build_half:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ldr h1, [x0]
; CHECK-GI-NEXT:    ldr h2, [x1]
; CHECK-GI-NEXT:    fcvtl v0.4s, v0.4h
; CHECK-GI-NEXT:    mov.h v1[1], v2[0]
; CHECK-GI-NEXT:    ldr h2, [x2]
; CHECK-GI-NEXT:    mov.h v1[2], v2[0]
; CHECK-GI-NEXT:    ldr h2, [x3]
; CHECK-GI-NEXT:    mov.h v1[3], v2[0]
; CHECK-GI-NEXT:    fcvtl v1.4s, v1.4h
; CHECK-GI-NEXT:    fsub.4s v0, v1, v0
; CHECK-GI-NEXT:    fcvtn v0.4h, v0.4s
; CHECK-GI-NEXT:    str d0, [x4]
; CHECK-GI-NEXT:    ret
  %ld.a = load half, ptr %a
  %ld.b = load half, ptr %b
  %ld.c = load half, ptr %c
  %ld.d = load half, ptr %d
  %v.a = insertelement <4 x half> undef, half %ld.a, i64 0
  %v.b = insertelement <4 x half> %v.a, half %ld.b, i64 1
  %v.c = insertelement <4 x half> %v.b, half %ld.c, i64 2
  %v = insertelement <4 x half> %v.c, half %ld.d, i64 3
  %sub = fsub <4 x half> %v, %e
  store <4 x half> %sub, ptr %p
  ret void
}

define void  @test_ld1lane_build_i8(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e, ptr %f, ptr %g, ptr %h, <8 x i8> %v, ptr %p) {
; CHECK-SD-LABEL: test_ld1lane_build_i8:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ldr b1, [x0]
; CHECK-SD-NEXT:    ldr x8, [sp]
; CHECK-SD-NEXT:    ld1.b { v1 }[1], [x1]
; CHECK-SD-NEXT:    ld1.b { v1 }[2], [x2]
; CHECK-SD-NEXT:    ld1.b { v1 }[3], [x3]
; CHECK-SD-NEXT:    ld1.b { v1 }[4], [x4]
; CHECK-SD-NEXT:    ld1.b { v1 }[5], [x5]
; CHECK-SD-NEXT:    ld1.b { v1 }[6], [x6]
; CHECK-SD-NEXT:    ld1.b { v1 }[7], [x7]
; CHECK-SD-NEXT:    sub.8b v0, v1, v0
; CHECK-SD-NEXT:    str d0, [x8]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_ld1lane_build_i8:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    ldr b1, [x0]
; CHECK-GI-NEXT:    ldr b2, [x1]
; CHECK-GI-NEXT:    ldr x8, [sp]
; CHECK-GI-NEXT:    mov.b v1[1], v2[0]
; CHECK-GI-NEXT:    ldr b2, [x2]
; CHECK-GI-NEXT:    mov.b v1[2], v2[0]
; CHECK-GI-NEXT:    ldr b2, [x3]
; CHECK-GI-NEXT:    mov.b v1[3], v2[0]
; CHECK-GI-NEXT:    ldr b2, [x4]
; CHECK-GI-NEXT:    mov.b v1[4], v2[0]
; CHECK-GI-NEXT:    ldr b2, [x5]
; CHECK-GI-NEXT:    mov.b v1[5], v2[0]
; CHECK-GI-NEXT:    ldr b2, [x6]
; CHECK-GI-NEXT:    mov.b v1[6], v2[0]
; CHECK-GI-NEXT:    ldr b2, [x7]
; CHECK-GI-NEXT:    mov.b v1[7], v2[0]
; CHECK-GI-NEXT:    sub.8b v0, v1, v0
; CHECK-GI-NEXT:    str d0, [x8]
; CHECK-GI-NEXT:    ret
  %ld.a = load i8, ptr %a
  %ld.b = load i8, ptr %b
  %ld.c = load i8, ptr %c
  %ld.d = load i8, ptr %d
  %ld.e = load i8, ptr %e
  %ld.f = load i8, ptr %f
  %ld.g = load i8, ptr %g
  %ld.h = load i8, ptr %h
  %v.a = insertelement <8 x i8> undef, i8 %ld.a, i64 0
  %v.b = insertelement <8 x i8> %v.a,  i8 %ld.b, i64 1
  %v.c = insertelement <8 x i8> %v.b,  i8 %ld.c, i64 2
  %v.d = insertelement <8 x i8> %v.c,  i8 %ld.d, i64 3
  %v.e = insertelement <8 x i8> %v.d,  i8 %ld.e, i64 4
  %v.f = insertelement <8 x i8> %v.e,  i8 %ld.f, i64 5
  %v.g = insertelement <8 x i8> %v.f,  i8 %ld.g, i64 6
  %v1 = insertelement <8 x i8> %v.g,  i8 %ld.h, i64 7
  %sub = sub nsw <8 x i8> %v1, %v
  store <8 x i8> %sub, ptr %p
  ret void
}

define <4 x i32> @test_inc_cycle(<4 x i32> %vec, ptr %in) {
; CHECK-LABEL: test_inc_cycle:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ld1.s { v0 }[0], [x0]
; CHECK-NEXT:    adrp x9, _var@PAGE
; CHECK-NEXT:    fmov x8, d0
; CHECK-NEXT:    add x8, x0, x8, lsl #2
; CHECK-NEXT:    str x8, [x9, _var@PAGEOFF]
; CHECK-NEXT:    ret
  %elt = load i32, ptr %in
  %newvec = insertelement <4 x i32> %vec, i32 %elt, i32 0

  ; %inc cannot be %elt directly because we check that the load is only
  ; used by the insert before trying to form post-inc.
  %inc.vec = bitcast <4 x i32> %newvec to <2 x i64>
  %inc = extractelement <2 x i64> %inc.vec, i32 0
  %newaddr = getelementptr i32, ptr %in, i64 %inc
  store ptr %newaddr, ptr @var

  ret <4 x i32> %newvec
}

@var = global ptr null

define i8 @load_single_extract_variable_index_i8(ptr %A, i32 %idx) {
; CHECK-SD-LABEL: load_single_extract_variable_index_i8:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    sub sp, sp, #16
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
; CHECK-SD-NEXT:    mov x8, sp
; CHECK-SD-NEXT:    ldr q0, [x0]
; CHECK-SD-NEXT:    ; kill: def $w1 killed $w1 def $x1
; CHECK-SD-NEXT:    bfxil x8, x1, #0, #4
; CHECK-SD-NEXT:    str q0, [sp]
; CHECK-SD-NEXT:    ldrb w0, [x8]
; CHECK-SD-NEXT:    add sp, sp, #16
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: load_single_extract_variable_index_i8:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov w8, w1
; CHECK-GI-NEXT:    and x8, x8, #0xf
; CHECK-GI-NEXT:    ldrb w0, [x0, x8]
; CHECK-GI-NEXT:    ret
  %lv = load <16 x i8>, ptr %A
  %e = extractelement <16 x i8> %lv, i32 %idx
  ret i8 %e
}

define i16 @load_single_extract_variable_index_i16(ptr %A, i32 %idx) {
; CHECK-SD-LABEL: load_single_extract_variable_index_i16:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    sub sp, sp, #16
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
; CHECK-SD-NEXT:    mov x8, sp
; CHECK-SD-NEXT:    ldr q0, [x0]
; CHECK-SD-NEXT:    ; kill: def $w1 killed $w1 def $x1
; CHECK-SD-NEXT:    bfi x8, x1, #1, #3
; CHECK-SD-NEXT:    str q0, [sp]
; CHECK-SD-NEXT:    ldrh w0, [x8]
; CHECK-SD-NEXT:    add sp, sp, #16
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: load_single_extract_variable_index_i16:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov w8, w1
; CHECK-GI-NEXT:    and x8, x8, #0x7
; CHECK-GI-NEXT:    ldrh w0, [x0, x8, lsl #1]
; CHECK-GI-NEXT:    ret
  %lv = load <8 x i16>, ptr %A
  %e = extractelement <8 x i16> %lv, i32 %idx
  ret i16 %e
}

define i32 @load_single_extract_variable_index_i32(ptr %A, i32 %idx) {
; CHECK-SD-LABEL: load_single_extract_variable_index_i32:
; CHECK-SD:       ; %bb.0:
; CHECK-SD-NEXT:    ; kill: def $w1 killed $w1 def $x1
; CHECK-SD-NEXT:    and x8, x1, #0x3
; CHECK-SD-NEXT:    ldr w0, [x0, x8, lsl #2]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: load_single_extract_variable_index_i32:
; CHECK-GI:       ; %bb.0:
; CHECK-GI-NEXT:    mov w8, w1
; CHECK-GI-NEXT:    and x8, x8, #0x3
; CHECK-GI-NEXT:    ldr w0, [x0, x8, lsl #2]
; CHECK-GI-NEXT:    ret
  %lv = load <4 x i32>, ptr %A
  %e = extractelement <4 x i32> %lv, i32 %idx
  ret i32 %e
}

define i32 @load_single_extract_variable_index_v3i32_small_align(ptr %A, i32 %idx) {
; CHECK-LABEL: load_single_extract_variable_index_v3i32_small_align:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    mov w9, w1
; CHECK-NEXT:    mov w8, #2 ; =0x2
; CHECK-NEXT:    cmp x9, #2
; CHECK-NEXT:    csel x8, x9, x8, lo
; CHECK-NEXT:    ldr w0, [x0, x8, lsl #2]
; CHECK-NEXT:    ret
  %lv = load <3 x i32>, ptr %A, align 2
  %e = extractelement <3 x i32> %lv, i32 %idx
  ret i32 %e
}

define i32 @load_single_extract_variable_index_v3i32_default_align(ptr %A, i32 %idx) {
; CHECK-LABEL: load_single_extract_variable_index_v3i32_default_align:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    mov w9, w1
; CHECK-NEXT:    mov w8, #2 ; =0x2
; CHECK-NEXT:    cmp x9, #2
; CHECK-NEXT:    csel x8, x9, x8, lo
; CHECK-NEXT:    ldr w0, [x0, x8, lsl #2]
; CHECK-NEXT:    ret
  %lv = load <3 x i32>, ptr %A
  %e = extractelement <3 x i32> %lv, i32 %idx
  ret i32 %e
}

define i32 @load_single_extract_valid_const_index_v3i32(ptr %A, i32 %idx) {
; CHECK-LABEL: load_single_extract_valid_const_index_v3i32:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr w0, [x0, #8]
; CHECK-NEXT:    ret
  %lv = load <3 x i32>, ptr %A
  %e = extractelement <3 x i32> %lv, i32 2
  ret i32 %e
}

define i32 @load_single_extract_variable_index_masked_i32(ptr %A, i32 %idx) {
; CHECK-LABEL: load_single_extract_variable_index_masked_i32:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    and w8, w1, #0x3
; CHECK-NEXT:    ldr w0, [x0, w8, uxtw #2]
; CHECK-NEXT:    ret
  %idx.x = and i32 %idx, 3
  %lv = load <4 x i32>, ptr %A
  %e = extractelement <4 x i32> %lv, i32 %idx.x
  ret i32 %e
}

define i32 @load_single_extract_variable_index_masked2_i32(ptr %A, i32 %idx) {
; CHECK-LABEL: load_single_extract_variable_index_masked2_i32:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    and w8, w1, #0x1
; CHECK-NEXT:    ldr w0, [x0, w8, uxtw #2]
; CHECK-NEXT:    ret
  %idx.x = and i32 %idx, 1
  %lv = load <4 x i32>, ptr %A
  %e = extractelement <4 x i32> %lv, i32 %idx.x
  ret i32 %e
}