llvm/llvm/test/CodeGen/AArch64/nontemporal-load.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc --mattr=+sve -aarch64-enable-sink-fold=true < %s -mtriple aarch64-apple-darwin | FileCheck %s
; RUN: llc --mattr=+sve -aarch64-enable-sink-fold=true < %s -mtriple aarch64_be-unknown-unknown | FileCheck --check-prefix CHECK-BE %s

define <4 x double> @test_ldnp_v4f64(ptr %A) {
; CHECK-LABEL: test_ldnp_v4f64:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldnp q0, q1, [x0]
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v4f64:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldp q0, q1, [x0]
; CHECK-BE-NEXT:    ret
  %lv = load <4 x double>, ptr %A, align 8, !nontemporal !0
  ret <4 x double> %lv
}

define <4 x i64> @test_ldnp_v4i64(ptr %A) {
; CHECK-LABEL: test_ldnp_v4i64:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldnp q0, q1, [x0]
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v4i64:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldp q0, q1, [x0]
; CHECK-BE-NEXT:    ret
  %lv = load <4 x i64>, ptr %A, align 8, !nontemporal !0
  ret <4 x i64> %lv
}

define <8 x i32> @test_ldnp_v8i32(ptr %A) {
; CHECK-LABEL: test_ldnp_v8i32:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldnp q0, q1, [x0]
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v8i32:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldp q0, q1, [x0]
; CHECK-BE-NEXT:    ret
  %lv = load <8 x i32>, ptr %A, align 8, !nontemporal !0
  ret <8 x i32> %lv
}

define <8 x float> @test_ldnp_v8f32(ptr %A) {
; CHECK-LABEL: test_ldnp_v8f32:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldnp q0, q1, [x0]
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v8f32:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldp q0, q1, [x0]
; CHECK-BE-NEXT:    ret
  %lv = load <8 x float>, ptr %A, align 8, !nontemporal !0
  ret <8 x float> %lv
}

define <16 x i16> @test_ldnp_v16i16(ptr %A) {
; CHECK-LABEL: test_ldnp_v16i16:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldnp q0, q1, [x0]
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v16i16:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldp q0, q1, [x0]
; CHECK-BE-NEXT:    ret
  %lv = load <16 x i16>, ptr %A, align 8, !nontemporal !0
  ret <16 x i16> %lv
}

define <16 x half> @test_ldnp_v16f16(ptr %A) {
; CHECK-LABEL: test_ldnp_v16f16:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldnp q0, q1, [x0]
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v16f16:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldp q0, q1, [x0]
; CHECK-BE-NEXT:    ret
  %lv = load <16 x half>, ptr %A, align 8, !nontemporal !0
  ret <16 x half> %lv
}

define <32 x i8> @test_ldnp_v32i8(ptr %A) {
; CHECK-LABEL: test_ldnp_v32i8:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldnp q0, q1, [x0]
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v32i8:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldp q0, q1, [x0]
; CHECK-BE-NEXT:    ret
  %lv = load <32 x i8>, ptr %A, align 8, !nontemporal !0
  ret <32 x i8> %lv
}

define <4 x i32> @test_ldnp_v4i32(ptr %A) {
; CHECK-LABEL: test_ldnp_v4i32:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v4i32:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldr q0, [x0]
; CHECK-BE-NEXT:    ret
  %lv = load<4 x i32>, ptr %A, align 8, !nontemporal !0
  ret <4 x i32> %lv
}

define <4 x float> @test_ldnp_v4f32(ptr %A) {
; CHECK-LABEL: test_ldnp_v4f32:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v4f32:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldr q0, [x0]
; CHECK-BE-NEXT:    ret
  %lv = load<4 x float>, ptr %A, align 8, !nontemporal !0
  ret <4 x float> %lv
}

define <8 x i16> @test_ldnp_v8i16(ptr %A) {
; CHECK-LABEL: test_ldnp_v8i16:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v8i16:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldr q0, [x0]
; CHECK-BE-NEXT:    ret
  %lv = load <8 x i16>, ptr %A, align 8, !nontemporal !0
  ret <8 x i16> %lv
}

define <16 x i8> @test_ldnp_v16i8(ptr %A) {
; CHECK-LABEL: test_ldnp_v16i8:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v16i8:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldr q0, [x0]
; CHECK-BE-NEXT:    ret
  %lv = load <16 x i8>, ptr %A, align 8, !nontemporal !0
  ret <16 x i8> %lv
}
define <2 x double> @test_ldnp_v2f64(ptr %A) {
; CHECK-LABEL: test_ldnp_v2f64:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr q0, [x0]
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v2f64:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldr q0, [x0]
; CHECK-BE-NEXT:    ret
  %lv = load <2 x double>, ptr %A, align 8, !nontemporal !0
  ret <2 x double> %lv
}

define <2 x i32> @test_ldnp_v2i32(ptr %A) {
; CHECK-LABEL: test_ldnp_v2i32:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr d0, [x0]
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v2i32:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldr d0, [x0]
; CHECK-BE-NEXT:    ret
  %lv = load <2 x i32>, ptr %A, align 8, !nontemporal !0
  ret <2 x i32> %lv
}

define <2 x float> @test_ldnp_v2f32(ptr %A) {
; CHECK-LABEL: test_ldnp_v2f32:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr d0, [x0]
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v2f32:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldr d0, [x0]
; CHECK-BE-NEXT:    ret
  %lv = load <2 x float>, ptr %A, align 8, !nontemporal !0
  ret <2 x float> %lv
}

define <4 x i16> @test_ldnp_v4i16(ptr %A) {
; CHECK-LABEL: test_ldnp_v4i16:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr d0, [x0]
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v4i16:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldr d0, [x0]
; CHECK-BE-NEXT:    ret
  %lv = load <4 x i16>, ptr %A, align 8, !nontemporal !0
  ret <4 x i16> %lv
}

define <8 x i8> @test_ldnp_v8i8(ptr %A) {
; CHECK-LABEL: test_ldnp_v8i8:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr d0, [x0]
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v8i8:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldr d0, [x0]
; CHECK-BE-NEXT:    ret
  %lv = load <8 x i8>, ptr %A, align 8, !nontemporal !0
  ret <8 x i8> %lv
}

define <1 x double> @test_ldnp_v1f64(ptr %A) {
; CHECK-LABEL: test_ldnp_v1f64:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr d0, [x0]
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v1f64:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldr d0, [x0]
; CHECK-BE-NEXT:    ret
  %lv = load <1 x double>, ptr %A, align 8, !nontemporal !0
  ret <1 x double> %lv
}

define <1 x i64> @test_ldnp_v1i64(ptr %A) {
; CHECK-LABEL: test_ldnp_v1i64:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldr d0, [x0]
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v1i64:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldr d0, [x0]
; CHECK-BE-NEXT:    ret
  %lv = load <1 x i64>, ptr %A, align 8, !nontemporal !0
  ret <1 x i64> %lv
}

define <32 x i16> @test_ldnp_v32i16(ptr %A) {
; CHECK-LABEL: test_ldnp_v32i16:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldnp q0, q1, [x0]
; CHECK-NEXT:    ldnp q2, q3, [x0, #32]
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v32i16:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldp q0, q1, [x0]
; CHECK-BE-NEXT:    ldp q2, q3, [x0, #32]
; CHECK-BE-NEXT:    ret
  %lv = load <32 x i16>, ptr %A, align 8, !nontemporal !0
  ret <32 x i16> %lv
}

define <32 x half> @test_ldnp_v32f16(ptr %A) {
; CHECK-LABEL: test_ldnp_v32f16:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldnp q0, q1, [x0]
; CHECK-NEXT:    ldnp q2, q3, [x0, #32]
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v32f16:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldp q0, q1, [x0]
; CHECK-BE-NEXT:    ldp q2, q3, [x0, #32]
; CHECK-BE-NEXT:    ret
  %lv = load <32 x half>, ptr %A, align 8, !nontemporal !0
  ret <32 x half> %lv
}

define <16 x i32> @test_ldnp_v16i32(ptr %A) {
; CHECK-LABEL: test_ldnp_v16i32:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldnp q0, q1, [x0]
; CHECK-NEXT:    ldnp q2, q3, [x0, #32]
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v16i32:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldp q0, q1, [x0]
; CHECK-BE-NEXT:    ldp q2, q3, [x0, #32]
; CHECK-BE-NEXT:    ret
  %lv = load <16 x i32>, ptr %A, align 8, !nontemporal !0
  ret <16 x i32> %lv
}

define <16 x float> @test_ldnp_v16f32(ptr %A) {
; CHECK-LABEL: test_ldnp_v16f32:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldnp q0, q1, [x0]
; CHECK-NEXT:    ldnp q2, q3, [x0, #32]
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v16f32:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldp q0, q1, [x0]
; CHECK-BE-NEXT:    ldp q2, q3, [x0, #32]
; CHECK-BE-NEXT:    ret
  %lv = load <16 x float>, ptr %A, align 8, !nontemporal !0
  ret <16 x float> %lv
}

define <17 x float> @test_ldnp_v17f32(ptr %A) {
; CHECK-LABEL: test_ldnp_v17f32:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldnp q0, q1, [x0, #32]
; CHECK-NEXT:    ldr s2, [x0, #64]
; CHECK-NEXT:    ldnp q3, q4, [x0]
; CHECK-NEXT:    stp q0, q1, [x8, #32]
; CHECK-NEXT:    stp q3, q4, [x8]
; CHECK-NEXT:    str s2, [x8, #64]
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v17f32:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    add x9, x0, #48
; CHECK-BE-NEXT:    ld1 { v0.4s }, [x0]
; CHECK-BE-NEXT:    add x10, x0, #32
; CHECK-BE-NEXT:    ld1 { v2.4s }, [x9]
; CHECK-BE-NEXT:    add x9, x0, #16
; CHECK-BE-NEXT:    ldr s1, [x0, #64]
; CHECK-BE-NEXT:    ld1 { v3.4s }, [x9]
; CHECK-BE-NEXT:    ld1 { v4.4s }, [x10]
; CHECK-BE-NEXT:    add x9, x8, #48
; CHECK-BE-NEXT:    str s1, [x8, #64]
; CHECK-BE-NEXT:    add x10, x8, #32
; CHECK-BE-NEXT:    st1 { v0.4s }, [x8]
; CHECK-BE-NEXT:    add x8, x8, #16
; CHECK-BE-NEXT:    st1 { v2.4s }, [x9]
; CHECK-BE-NEXT:    st1 { v4.4s }, [x10]
; CHECK-BE-NEXT:    st1 { v3.4s }, [x8]
; CHECK-BE-NEXT:    ret
  %lv = load <17 x float>, ptr %A, align 8, !nontemporal !0
  ret <17 x float> %lv
}

define <33 x double> @test_ldnp_v33f64(ptr %A) {
; CHECK-LABEL: test_ldnp_v33f64:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldnp q0, q1, [x0]
; CHECK-NEXT:    ldr d20, [x0, #256]
; CHECK-NEXT:    ldnp q2, q3, [x0, #32]
; CHECK-NEXT:    ldnp q4, q5, [x0, #64]
; CHECK-NEXT:    ldnp q6, q7, [x0, #96]
; CHECK-NEXT:    ldnp q16, q17, [x0, #128]
; CHECK-NEXT:    ldnp q18, q19, [x0, #224]
; CHECK-NEXT:    ldnp q21, q22, [x0, #160]
; CHECK-NEXT:    ldnp q23, q24, [x0, #192]
; CHECK-NEXT:    stp q0, q1, [x8]
; CHECK-NEXT:    stp q2, q3, [x8, #32]
; CHECK-NEXT:    stp q4, q5, [x8, #64]
; CHECK-NEXT:    stp q6, q7, [x8, #96]
; CHECK-NEXT:    stp q16, q17, [x8, #128]
; CHECK-NEXT:    stp q21, q22, [x8, #160]
; CHECK-NEXT:    stp q23, q24, [x8, #192]
; CHECK-NEXT:    stp q18, q19, [x8, #224]
; CHECK-NEXT:    str d20, [x8, #256]
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v33f64:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    add x9, x0, #16
; CHECK-BE-NEXT:    add x10, x0, #32
; CHECK-BE-NEXT:    add x11, x0, #48
; CHECK-BE-NEXT:    ld1 { v0.2d }, [x9]
; CHECK-BE-NEXT:    add x9, x0, #64
; CHECK-BE-NEXT:    ld1 { v1.2d }, [x10]
; CHECK-BE-NEXT:    add x10, x0, #80
; CHECK-BE-NEXT:    ld1 { v3.2d }, [x9]
; CHECK-BE-NEXT:    add x9, x0, #112
; CHECK-BE-NEXT:    ld1 { v4.2d }, [x10]
; CHECK-BE-NEXT:    add x10, x0, #128
; CHECK-BE-NEXT:    ld1 { v6.2d }, [x9]
; CHECK-BE-NEXT:    add x9, x0, #160
; CHECK-BE-NEXT:    ld1 { v7.2d }, [x10]
; CHECK-BE-NEXT:    add x10, x0, #176
; CHECK-BE-NEXT:    ld1 { v17.2d }, [x9]
; CHECK-BE-NEXT:    add x9, x0, #240
; CHECK-BE-NEXT:    ld1 { v2.2d }, [x11]
; CHECK-BE-NEXT:    add x11, x0, #96
; CHECK-BE-NEXT:    ld1 { v18.2d }, [x10]
; CHECK-BE-NEXT:    ld1 { v20.2d }, [x0]
; CHECK-BE-NEXT:    ld1 { v22.2d }, [x9]
; CHECK-BE-NEXT:    add x10, x0, #224
; CHECK-BE-NEXT:    ld1 { v5.2d }, [x11]
; CHECK-BE-NEXT:    add x11, x0, #144
; CHECK-BE-NEXT:    ldr d21, [x0, #256]
; CHECK-BE-NEXT:    add x9, x0, #208
; CHECK-BE-NEXT:    ld1 { v24.2d }, [x10]
; CHECK-BE-NEXT:    ld1 { v16.2d }, [x11]
; CHECK-BE-NEXT:    add x11, x0, #192
; CHECK-BE-NEXT:    ld1 { v23.2d }, [x9]
; CHECK-BE-NEXT:    add x9, x8, #240
; CHECK-BE-NEXT:    ld1 { v19.2d }, [x11]
; CHECK-BE-NEXT:    str d21, [x8, #256]
; CHECK-BE-NEXT:    st1 { v20.2d }, [x8]
; CHECK-BE-NEXT:    st1 { v22.2d }, [x9]
; CHECK-BE-NEXT:    add x9, x8, #224
; CHECK-BE-NEXT:    st1 { v24.2d }, [x9]
; CHECK-BE-NEXT:    add x9, x8, #208
; CHECK-BE-NEXT:    st1 { v23.2d }, [x9]
; CHECK-BE-NEXT:    add x9, x8, #192
; CHECK-BE-NEXT:    st1 { v19.2d }, [x9]
; CHECK-BE-NEXT:    add x9, x8, #176
; CHECK-BE-NEXT:    st1 { v18.2d }, [x9]
; CHECK-BE-NEXT:    add x9, x8, #160
; CHECK-BE-NEXT:    st1 { v17.2d }, [x9]
; CHECK-BE-NEXT:    add x9, x8, #144
; CHECK-BE-NEXT:    st1 { v16.2d }, [x9]
; CHECK-BE-NEXT:    add x9, x8, #128
; CHECK-BE-NEXT:    st1 { v7.2d }, [x9]
; CHECK-BE-NEXT:    add x9, x8, #112
; CHECK-BE-NEXT:    st1 { v6.2d }, [x9]
; CHECK-BE-NEXT:    add x9, x8, #96
; CHECK-BE-NEXT:    st1 { v5.2d }, [x9]
; CHECK-BE-NEXT:    add x9, x8, #80
; CHECK-BE-NEXT:    st1 { v4.2d }, [x9]
; CHECK-BE-NEXT:    add x9, x8, #64
; CHECK-BE-NEXT:    st1 { v3.2d }, [x9]
; CHECK-BE-NEXT:    add x9, x8, #48
; CHECK-BE-NEXT:    st1 { v2.2d }, [x9]
; CHECK-BE-NEXT:    add x9, x8, #32
; CHECK-BE-NEXT:    add x8, x8, #16
; CHECK-BE-NEXT:    st1 { v1.2d }, [x9]
; CHECK-BE-NEXT:    st1 { v0.2d }, [x8]
; CHECK-BE-NEXT:    ret
  %lv = load <33 x double>, ptr %A, align 8, !nontemporal !0
  ret <33 x double> %lv
}

define <33 x i8> @test_ldnp_v33i8(ptr %A) {
; CHECK-LABEL: test_ldnp_v33i8:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldnp q0, q1, [x0]
; CHECK-NEXT:    add x9, x8, #32
; CHECK-NEXT:    ldr b2, [x0, #32]
; CHECK-NEXT:    stp q0, q1, [x8]
; CHECK-NEXT:    st1.b { v2 }[0], [x9]
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v33i8:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    add x9, x0, #16
; CHECK-BE-NEXT:    ld1 { v0.16b }, [x0]
; CHECK-BE-NEXT:    ldrb w10, [x0, #32]
; CHECK-BE-NEXT:    ld1 { v1.16b }, [x9]
; CHECK-BE-NEXT:    strb w10, [x8, #32]
; CHECK-BE-NEXT:    st1 { v0.16b }, [x8]
; CHECK-BE-NEXT:    add x8, x8, #16
; CHECK-BE-NEXT:    st1 { v1.16b }, [x8]
; CHECK-BE-NEXT:    ret
  %lv = load<33 x i8>, ptr %A, align 8, !nontemporal !0
  ret <33 x i8> %lv
}

define <4 x i65> @test_ldnp_v4i65(ptr %A) {
; CHECK-LABEL: test_ldnp_v4i65:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldp x8, x9, [x0, #16]
; CHECK-NEXT:    ldrb w11, [x0, #32]
; CHECK-NEXT:    ldp x0, x10, [x0]
; CHECK-NEXT:    ubfx x7, x11, #3, #1
; CHECK-NEXT:    extr x4, x9, x8, #2
; CHECK-NEXT:    extr x6, x11, x9, #3
; CHECK-NEXT:    ubfx x3, x8, #1, #1
; CHECK-NEXT:    extr x2, x8, x10, #1
; CHECK-NEXT:    ubfx x5, x9, #2, #1
; CHECK-NEXT:    and x1, x10, #0x1
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v4i65:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldp x9, x8, [x0]
; CHECK-BE-NEXT:    ldrb w12, [x0, #32]
; CHECK-BE-NEXT:    ldp x10, x11, [x0, #16]
; CHECK-BE-NEXT:    extr x13, x9, x8, #56
; CHECK-BE-NEXT:    lsr x14, x9, #56
; CHECK-BE-NEXT:    lsr x16, x8, #56
; CHECK-BE-NEXT:    extr x15, x8, x10, #56
; CHECK-BE-NEXT:    orr x7, x12, x11, lsl #8
; CHECK-BE-NEXT:    extr x11, x10, x11, #56
; CHECK-BE-NEXT:    lsr x12, x10, #56
; CHECK-BE-NEXT:    extr x1, x14, x13, #3
; CHECK-BE-NEXT:    lsr x0, x9, #59
; CHECK-BE-NEXT:    ubfx x2, x8, #58, #1
; CHECK-BE-NEXT:    ubfx x4, x10, #57, #1
; CHECK-BE-NEXT:    extr x3, x16, x15, #2
; CHECK-BE-NEXT:    extr x5, x12, x11, #1
; CHECK-BE-NEXT:    and x6, x11, #0x1
; CHECK-BE-NEXT:    ret
  %lv = load <4 x i65>, ptr %A, align 8, !nontemporal !0
  ret <4 x i65> %lv
}

define <4 x i63> @test_ldnp_v4i63(ptr %A) {
; CHECK-LABEL: test_ldnp_v4i63:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldp x8, x9, [x0, #16]
; CHECK-NEXT:    ldp x10, x11, [x0]
; CHECK-NEXT:    extr x3, x9, x8, #61
; CHECK-NEXT:    extr x9, x11, x10, #63
; CHECK-NEXT:    extr x8, x8, x11, #62
; CHECK-NEXT:    and x0, x10, #0x7fffffffffffffff
; CHECK-NEXT:    and x1, x9, #0x7fffffffffffffff
; CHECK-NEXT:    and x2, x8, #0x7fffffffffffffff
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v4i63:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldp x9, x8, [x0, #8]
; CHECK-BE-NEXT:    ldr x11, [x0, #24]
; CHECK-BE-NEXT:    ldr x10, [x0]
; CHECK-BE-NEXT:    and x3, x11, #0x7fffffffffffffff
; CHECK-BE-NEXT:    extr x12, x9, x8, #62
; CHECK-BE-NEXT:    extr x8, x8, x11, #63
; CHECK-BE-NEXT:    extr x0, x10, x9, #61
; CHECK-BE-NEXT:    and x1, x12, #0x7fffffffffffffff
; CHECK-BE-NEXT:    and x2, x8, #0x7fffffffffffffff
; CHECK-BE-NEXT:    ret
  %lv = load <4 x i63>, ptr %A, align 8, !nontemporal !0
  ret <4 x i63> %lv
}

define <5 x double> @test_ldnp_v5f64(ptr %A) {
; CHECK-LABEL: test_ldnp_v5f64:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldnp q0, q2, [x0]
; CHECK-NEXT:    ldr d4, [x0, #32]
; CHECK-NEXT:    ext.16b v1, v0, v0, #8
; CHECK-NEXT:    ext.16b v3, v2, v2, #8
; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q2
; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q1
; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $q3
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v5f64:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    add x8, x0, #16
; CHECK-BE-NEXT:    ld1 { v0.2d }, [x0]
; CHECK-BE-NEXT:    ldr d4, [x0, #32]
; CHECK-BE-NEXT:    ld1 { v2.2d }, [x8]
; CHECK-BE-NEXT:    // kill: def $d4 killed $d4 killed $q4
; CHECK-BE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
; CHECK-BE-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-BE-NEXT:    ext v3.16b, v2.16b, v2.16b, #8
; CHECK-BE-NEXT:    // kill: def $d1 killed $d1 killed $q1
; CHECK-BE-NEXT:    // kill: def $d2 killed $d2 killed $q2
; CHECK-BE-NEXT:    // kill: def $d3 killed $d3 killed $q3
; CHECK-BE-NEXT:    ret
  %lv = load<5 x double>, ptr %A, align 8, !nontemporal !0
  ret <5 x double> %lv
}

define <16 x i64> @test_ldnp_v16i64(ptr %A) {
; CHECK-LABEL: test_ldnp_v16i64:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldnp q0, q1, [x0]
; CHECK-NEXT:    ldnp q2, q3, [x0, #32]
; CHECK-NEXT:    ldnp q4, q5, [x0, #64]
; CHECK-NEXT:    ldnp q6, q7, [x0, #96]
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v16i64:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldp q0, q1, [x0]
; CHECK-BE-NEXT:    ldp q2, q3, [x0, #32]
; CHECK-BE-NEXT:    ldp q4, q5, [x0, #64]
; CHECK-BE-NEXT:    ldp q6, q7, [x0, #96]
; CHECK-BE-NEXT:    ret
  %lv = load <16 x i64>, ptr %A, align 8, !nontemporal !0
  ret <16 x i64> %lv
}

define <16 x double> @test_ldnp_v16f64(ptr %A) {
; CHECK-LABEL: test_ldnp_v16f64:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ldnp q0, q1, [x0]
; CHECK-NEXT:    ldnp q2, q3, [x0, #32]
; CHECK-NEXT:    ldnp q4, q5, [x0, #64]
; CHECK-NEXT:    ldnp q6, q7, [x0, #96]
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v16f64:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldp q0, q1, [x0]
; CHECK-BE-NEXT:    ldp q2, q3, [x0, #32]
; CHECK-BE-NEXT:    ldp q4, q5, [x0, #64]
; CHECK-BE-NEXT:    ldp q6, q7, [x0, #96]
; CHECK-BE-NEXT:    ret
  %lv = load <16 x double>, ptr %A, align 8, !nontemporal !0
  ret <16 x double> %lv
}

define <vscale x 20 x float> @test_ldnp_v20f32_vscale(ptr %A) {
; CHECK-LABEL: test_ldnp_v20f32_vscale:
; CHECK:       ; %bb.0:
; CHECK-NEXT:    ptrue p0.s
; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x0, #1, mul vl]
; CHECK-NEXT:    ld1w { z2.s }, p0/z, [x0, #2, mul vl]
; CHECK-NEXT:    ld1w { z3.s }, p0/z, [x0, #3, mul vl]
; CHECK-NEXT:    ld1w { z4.s }, p0/z, [x0, #4, mul vl]
; CHECK-NEXT:    ret
;
; CHECK-BE-LABEL: test_ldnp_v20f32_vscale:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ptrue p0.s
; CHECK-BE-NEXT:    ld1w { z0.s }, p0/z, [x0]
; CHECK-BE-NEXT:    ld1w { z1.s }, p0/z, [x0, #1, mul vl]
; CHECK-BE-NEXT:    ld1w { z2.s }, p0/z, [x0, #2, mul vl]
; CHECK-BE-NEXT:    ld1w { z3.s }, p0/z, [x0, #3, mul vl]
; CHECK-BE-NEXT:    ld1w { z4.s }, p0/z, [x0, #4, mul vl]
; CHECK-BE-NEXT:    ret
  %lv = load<vscale x 20 x float>, ptr %A, align 8, !nontemporal !0
  ret <vscale x 20 x float> %lv
}

!0 = !{i32 1}