llvm/llvm/test/CodeGen/AArch64/load-insert-undef.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16,+bf16,+sve | FileCheck %s

define <8 x i8> @loadv8i8(ptr %p) {
; CHECK-LABEL: loadv8i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr b0, [x0]
; CHECK-NEXT:    ret
  %l = load i8, ptr %p
  %v = insertelement <8 x i8> poison, i8 %l, i32 0
  ret <8 x i8> %v
}

define <16 x i8> @loadv16i8(ptr %p) {
; CHECK-LABEL: loadv16i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr b0, [x0]
; CHECK-NEXT:    ret
  %l = load i8, ptr %p
  %v = insertelement <16 x i8> poison, i8 %l, i32 0
  ret <16 x i8> %v
}

define <4 x i16> @loadv4i16(ptr %p) {
; CHECK-LABEL: loadv4i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr h0, [x0]
; CHECK-NEXT:    ret
  %l = load i16, ptr %p
  %v = insertelement <4 x i16> poison, i16 %l, i32 0
  ret <4 x i16> %v
}

define <8 x i16> @loadv8i16(ptr %p) {
; CHECK-LABEL: loadv8i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr h0, [x0]
; CHECK-NEXT:    ret
  %l = load i16, ptr %p
  %v = insertelement <8 x i16> poison, i16 %l, i32 0
  ret <8 x i16> %v
}

define <2 x i32> @loadv2i32(ptr %p) {
; CHECK-LABEL: loadv2i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr s0, [x0]
; CHECK-NEXT:    ret
  %l = load i32, ptr %p
  %v = insertelement <2 x i32> poison, i32 %l, i32 0
  ret <2 x i32> %v
}

define <4 x i32> @loadv4i32(ptr %p) {
; CHECK-LABEL: loadv4i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr s0, [x0]
; CHECK-NEXT:    ret
  %l = load i32, ptr %p
  %v = insertelement <4 x i32> poison, i32 %l, i32 0
  ret <4 x i32> %v
}

define <2 x i64> @loadv2i64(ptr %p) {
; CHECK-LABEL: loadv2i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr d0, [x0]
; CHECK-NEXT:    ret
  %l = load i64, ptr %p
  %v = insertelement <2 x i64> poison, i64 %l, i32 0
  ret <2 x i64> %v
}


define <4 x half> @loadv4f16(ptr %p) {
; CHECK-LABEL: loadv4f16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr h0, [x0]
; CHECK-NEXT:    ret
  %l = load half, ptr %p
  %v = insertelement <4 x half> poison, half %l, i32 0
  ret <4 x half> %v
}

define <8 x half> @loadv8f16(ptr %p) {
; CHECK-LABEL: loadv8f16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr h0, [x0]
; CHECK-NEXT:    ret
  %l = load half, ptr %p
  %v = insertelement <8 x half> poison, half %l, i32 0
  ret <8 x half> %v
}

define <4 x bfloat> @loadv4bf16(ptr %p) {
; CHECK-LABEL: loadv4bf16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr h0, [x0]
; CHECK-NEXT:    ret
  %l = load bfloat, ptr %p
  %v = insertelement <4 x bfloat> poison, bfloat %l, i32 0
  ret <4 x bfloat> %v
}

define <8 x bfloat> @loadv8bf16(ptr %p) {
; CHECK-LABEL: loadv8bf16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr h0, [x0]
; CHECK-NEXT:    ret
  %l = load bfloat, ptr %p
  %v = insertelement <8 x bfloat> poison, bfloat %l, i32 0
  ret <8 x bfloat> %v
}

define <2 x float> @loadv2f32(ptr %p) {
; CHECK-LABEL: loadv2f32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr s0, [x0]
; CHECK-NEXT:    ret
  %l = load float, ptr %p
  %v = insertelement <2 x float> poison, float %l, i32 0
  ret <2 x float> %v
}

define <4 x float> @loadv4f32(ptr %p) {
; CHECK-LABEL: loadv4f32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr s0, [x0]
; CHECK-NEXT:    ret
  %l = load float, ptr %p
  %v = insertelement <4 x float> poison, float %l, i32 0
  ret <4 x float> %v
}

define <2 x double> @loadv2f64(ptr %p) {
; CHECK-LABEL: loadv2f64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr d0, [x0]
; CHECK-NEXT:    ret
  %l = load double, ptr %p
  %v = insertelement <2 x double> poison, double %l, i32 0
  ret <2 x double> %v
}


; Unscaled

define <8 x i8> @loadv8i8_offset(ptr %p) {
; CHECK-LABEL: loadv8i8_offset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr b0, [x0, #1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 1
  %l = load i8, ptr %g
  %v = insertelement <8 x i8> poison, i8 %l, i32 0
  ret <8 x i8> %v
}

define <16 x i8> @loadv16i8_offset(ptr %p) {
; CHECK-LABEL: loadv16i8_offset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr b0, [x0, #1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 1
  %l = load i8, ptr %g
  %v = insertelement <16 x i8> poison, i8 %l, i32 0
  ret <16 x i8> %v
}

define <4 x i16> @loadv4i16_offset(ptr %p) {
; CHECK-LABEL: loadv4i16_offset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldurh w8, [x0, #1]
; CHECK-NEXT:    fmov s0, w8
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 1
  %l = load i16, ptr %g
  %v = insertelement <4 x i16> poison, i16 %l, i32 0
  ret <4 x i16> %v
}

define <8 x i16> @loadv8i16_offset(ptr %p) {
; CHECK-LABEL: loadv8i16_offset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldurh w8, [x0, #1]
; CHECK-NEXT:    fmov s0, w8
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 1
  %l = load i16, ptr %g
  %v = insertelement <8 x i16> poison, i16 %l, i32 0
  ret <8 x i16> %v
}

define <2 x i32> @loadv2i32_offset(ptr %p) {
; CHECK-LABEL: loadv2i32_offset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur w8, [x0, #1]
; CHECK-NEXT:    fmov s0, w8
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 1
  %l = load i32, ptr %g
  %v = insertelement <2 x i32> poison, i32 %l, i32 0
  ret <2 x i32> %v
}

define <4 x i32> @loadv4i32_offset(ptr %p) {
; CHECK-LABEL: loadv4i32_offset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur w8, [x0, #1]
; CHECK-NEXT:    fmov s0, w8
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 1
  %l = load i32, ptr %g
  %v = insertelement <4 x i32> poison, i32 %l, i32 0
  ret <4 x i32> %v
}

define <2 x i64> @loadv2i64_offset(ptr %p) {
; CHECK-LABEL: loadv2i64_offset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur x8, [x0, #1]
; CHECK-NEXT:    fmov d0, x8
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 1
  %l = load i64, ptr %g
  %v = insertelement <2 x i64> poison, i64 %l, i32 0
  ret <2 x i64> %v
}


define <4 x half> @loadv4f16_offset(ptr %p) {
; CHECK-LABEL: loadv4f16_offset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur h0, [x0, #1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 1
  %l = load half, ptr %g
  %v = insertelement <4 x half> poison, half %l, i32 0
  ret <4 x half> %v
}

define <8 x half> @loadv8f16_offset(ptr %p) {
; CHECK-LABEL: loadv8f16_offset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur h0, [x0, #1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 1
  %l = load half, ptr %g
  %v = insertelement <8 x half> poison, half %l, i32 0
  ret <8 x half> %v
}

define <4 x bfloat> @loadv4bf16_offset(ptr %p) {
; CHECK-LABEL: loadv4bf16_offset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur h0, [x0, #1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 1
  %l = load bfloat, ptr %g
  %v = insertelement <4 x bfloat> poison, bfloat %l, i32 0
  ret <4 x bfloat> %v
}

define <8 x bfloat> @loadv8bf16_offset(ptr %p) {
; CHECK-LABEL: loadv8bf16_offset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur h0, [x0, #1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 1
  %l = load bfloat, ptr %g
  %v = insertelement <8 x bfloat> poison, bfloat %l, i32 0
  ret <8 x bfloat> %v
}

define <2 x float> @loadv2f32_offset(ptr %p) {
; CHECK-LABEL: loadv2f32_offset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur s0, [x0, #1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 1
  %l = load float, ptr %g
  %v = insertelement <2 x float> poison, float %l, i32 0
  ret <2 x float> %v
}

define <4 x float> @loadv4f32_offset(ptr %p) {
; CHECK-LABEL: loadv4f32_offset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur s0, [x0, #1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 1
  %l = load float, ptr %g
  %v = insertelement <4 x float> poison, float %l, i32 0
  ret <4 x float> %v
}

define <2 x double> @loadv2f64_offset(ptr %p) {
; CHECK-LABEL: loadv2f64_offset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur d0, [x0, #1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 1
  %l = load double, ptr %g
  %v = insertelement <2 x double> poison, double %l, i32 0
  ret <2 x double> %v
}


define <8 x i8> @loadv8i8_noffset(ptr %p) {
; CHECK-LABEL: loadv8i8_noffset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldurb w8, [x0, #-1]
; CHECK-NEXT:    fmov s0, w8
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 -1
  %l = load i8, ptr %g
  %v = insertelement <8 x i8> poison, i8 %l, i32 0
  ret <8 x i8> %v
}

define <16 x i8> @loadv16i8_noffset(ptr %p) {
; CHECK-LABEL: loadv16i8_noffset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldurb w8, [x0, #-1]
; CHECK-NEXT:    fmov s0, w8
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 -1
  %l = load i8, ptr %g
  %v = insertelement <16 x i8> poison, i8 %l, i32 0
  ret <16 x i8> %v
}

define <4 x i16> @loadv4i16_noffset(ptr %p) {
; CHECK-LABEL: loadv4i16_noffset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldurh w8, [x0, #-1]
; CHECK-NEXT:    fmov s0, w8
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 -1
  %l = load i16, ptr %g
  %v = insertelement <4 x i16> poison, i16 %l, i32 0
  ret <4 x i16> %v
}

define <8 x i16> @loadv8i16_noffset(ptr %p) {
; CHECK-LABEL: loadv8i16_noffset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldurh w8, [x0, #-1]
; CHECK-NEXT:    fmov s0, w8
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 -1
  %l = load i16, ptr %g
  %v = insertelement <8 x i16> poison, i16 %l, i32 0
  ret <8 x i16> %v
}

define <2 x i32> @loadv2i32_noffset(ptr %p) {
; CHECK-LABEL: loadv2i32_noffset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur w8, [x0, #-1]
; CHECK-NEXT:    fmov s0, w8
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 -1
  %l = load i32, ptr %g
  %v = insertelement <2 x i32> poison, i32 %l, i32 0
  ret <2 x i32> %v
}

define <4 x i32> @loadv4i32_noffset(ptr %p) {
; CHECK-LABEL: loadv4i32_noffset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur w8, [x0, #-1]
; CHECK-NEXT:    fmov s0, w8
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 -1
  %l = load i32, ptr %g
  %v = insertelement <4 x i32> poison, i32 %l, i32 0
  ret <4 x i32> %v
}

define <2 x i64> @loadv2i64_noffset(ptr %p) {
; CHECK-LABEL: loadv2i64_noffset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur x8, [x0, #-1]
; CHECK-NEXT:    fmov d0, x8
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 -1
  %l = load i64, ptr %g
  %v = insertelement <2 x i64> poison, i64 %l, i32 0
  ret <2 x i64> %v
}


define <4 x half> @loadv4f16_noffset(ptr %p) {
; CHECK-LABEL: loadv4f16_noffset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur h0, [x0, #-1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 -1
  %l = load half, ptr %g
  %v = insertelement <4 x half> poison, half %l, i32 0
  ret <4 x half> %v
}

define <8 x half> @loadv8f16_noffset(ptr %p) {
; CHECK-LABEL: loadv8f16_noffset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur h0, [x0, #-1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 -1
  %l = load half, ptr %g
  %v = insertelement <8 x half> poison, half %l, i32 0
  ret <8 x half> %v
}

define <4 x bfloat> @loadv4bf16_noffset(ptr %p) {
; CHECK-LABEL: loadv4bf16_noffset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur h0, [x0, #-1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 -1
  %l = load bfloat, ptr %g
  %v = insertelement <4 x bfloat> poison, bfloat %l, i32 0
  ret <4 x bfloat> %v
}

define <8 x bfloat> @loadv8bf16_noffset(ptr %p) {
; CHECK-LABEL: loadv8bf16_noffset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur h0, [x0, #-1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 -1
  %l = load bfloat, ptr %g
  %v = insertelement <8 x bfloat> poison, bfloat %l, i32 0
  ret <8 x bfloat> %v
}

define <2 x float> @loadv2f32_noffset(ptr %p) {
; CHECK-LABEL: loadv2f32_noffset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur s0, [x0, #-1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 -1
  %l = load float, ptr %g
  %v = insertelement <2 x float> poison, float %l, i32 0
  ret <2 x float> %v
}

define <4 x float> @loadv4f32_noffset(ptr %p) {
; CHECK-LABEL: loadv4f32_noffset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur s0, [x0, #-1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 -1
  %l = load float, ptr %g
  %v = insertelement <4 x float> poison, float %l, i32 0
  ret <4 x float> %v
}

define <2 x double> @loadv2f64_noffset(ptr %p) {
; CHECK-LABEL: loadv2f64_noffset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur d0, [x0, #-1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 -1
  %l = load double, ptr %g
  %v = insertelement <2 x double> poison, double %l, i32 0
  ret <2 x double> %v
}


; ROW addressing modes

define <8 x i8> @loadv8i8_roW(ptr %p, i32 %o) {
; CHECK-LABEL: loadv8i8_roW:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr b0, [x0, w1, sxtw]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i32 %o
  %l = load i8, ptr %g
  %v = insertelement <8 x i8> poison, i8 %l, i32 0
  ret <8 x i8> %v
}

define <16 x i8> @loadv16i8_roW(ptr %p, i32 %o) {
; CHECK-LABEL: loadv16i8_roW:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr b0, [x0, w1, sxtw]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i32 %o
  %l = load i8, ptr %g
  %v = insertelement <16 x i8> poison, i8 %l, i32 0
  ret <16 x i8> %v
}

define <4 x i16> @loadv4i16_roW(ptr %p, i32 %o) {
; CHECK-LABEL: loadv4i16_roW:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr h0, [x0, w1, sxtw #1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i16, ptr %p, i32 %o
  %l = load i16, ptr %g
  %v = insertelement <4 x i16> poison, i16 %l, i32 0
  ret <4 x i16> %v
}

define <8 x i16> @loadv8i16_roW(ptr %p, i32 %o) {
; CHECK-LABEL: loadv8i16_roW:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr h0, [x0, w1, sxtw #1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i16, ptr %p, i32 %o
  %l = load i16, ptr %g
  %v = insertelement <8 x i16> poison, i16 %l, i32 0
  ret <8 x i16> %v
}

define <2 x i32> @loadv2i32_roW(ptr %p, i32 %o) {
; CHECK-LABEL: loadv2i32_roW:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr s0, [x0, w1, sxtw #2]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i32, ptr %p, i32 %o
  %l = load i32, ptr %g
  %v = insertelement <2 x i32> poison, i32 %l, i32 0
  ret <2 x i32> %v
}

define <4 x i32> @loadv4i32_roW(ptr %p, i32 %o) {
; CHECK-LABEL: loadv4i32_roW:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr s0, [x0, w1, sxtw #2]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i32, ptr %p, i32 %o
  %l = load i32, ptr %g
  %v = insertelement <4 x i32> poison, i32 %l, i32 0
  ret <4 x i32> %v
}

define <2 x i64> @loadv2i64_roW(ptr %p, i32 %o) {
; CHECK-LABEL: loadv2i64_roW:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr d0, [x0, w1, sxtw #3]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i64, ptr %p, i32 %o
  %l = load i64, ptr %g
  %v = insertelement <2 x i64> poison, i64 %l, i32 0
  ret <2 x i64> %v
}

define <4 x half> @loadv4f16_roW(ptr %p, i32 %o) {
; CHECK-LABEL: loadv4f16_roW:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr h0, [x0, w1, sxtw #1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds half, ptr %p, i32 %o
  %l = load half, ptr %g
  %v = insertelement <4 x half> poison, half %l, i32 0
  ret <4 x half> %v
}

define <8 x half> @loadv8f16_roW(ptr %p, i32 %o) {
; CHECK-LABEL: loadv8f16_roW:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr h0, [x0, w1, sxtw #1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds half, ptr %p, i32 %o
  %l = load half, ptr %g
  %v = insertelement <8 x half> poison, half %l, i32 0
  ret <8 x half> %v
}

define <4 x bfloat> @loadv4bf16_roW(ptr %p, i32 %o) {
; CHECK-LABEL: loadv4bf16_roW:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr h0, [x0, w1, sxtw #1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds bfloat, ptr %p, i32 %o
  %l = load bfloat, ptr %g
  %v = insertelement <4 x bfloat> poison, bfloat %l, i32 0
  ret <4 x bfloat> %v
}

define <8 x bfloat> @loadv8bf16_roW(ptr %p, i32 %o) {
; CHECK-LABEL: loadv8bf16_roW:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr h0, [x0, w1, sxtw #1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds bfloat, ptr %p, i32 %o
  %l = load bfloat, ptr %g
  %v = insertelement <8 x bfloat> poison, bfloat %l, i32 0
  ret <8 x bfloat> %v
}

define <2 x float> @loadv2f32_roW(ptr %p, i32 %o) {
; CHECK-LABEL: loadv2f32_roW:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr s0, [x0, w1, sxtw #2]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds float, ptr %p, i32 %o
  %l = load float, ptr %g
  %v = insertelement <2 x float> poison, float %l, i32 0
  ret <2 x float> %v
}

define <4 x float> @loadv4f32_roW(ptr %p, i32 %o) {
; CHECK-LABEL: loadv4f32_roW:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr s0, [x0, w1, sxtw #2]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds float, ptr %p, i32 %o
  %l = load float, ptr %g
  %v = insertelement <4 x float> poison, float %l, i32 0
  ret <4 x float> %v
}

define <2 x double> @loadv2f64_roW(ptr %p, i32 %o) {
; CHECK-LABEL: loadv2f64_roW:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr d0, [x0, w1, sxtw #3]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds double, ptr %p, i32 %o
  %l = load double, ptr %g
  %v = insertelement <2 x double> poison, double %l, i32 0
  ret <2 x double> %v
}

; roX

define <8 x i8> @loadv8i8_roX(ptr %p, i64 %o) {
; CHECK-LABEL: loadv8i8_roX:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr b0, [x0, x1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 %o
  %l = load i8, ptr %g
  %v = insertelement <8 x i8> poison, i8 %l, i32 0
  ret <8 x i8> %v
}

define <16 x i8> @loadv16i8_roX(ptr %p, i64 %o) {
; CHECK-LABEL: loadv16i8_roX:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr b0, [x0, x1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 %o
  %l = load i8, ptr %g
  %v = insertelement <16 x i8> poison, i8 %l, i32 0
  ret <16 x i8> %v
}

define <4 x i16> @loadv4i16_roX(ptr %p, i64 %o) {
; CHECK-LABEL: loadv4i16_roX:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr h0, [x0, x1, lsl #1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i16, ptr %p, i64 %o
  %l = load i16, ptr %g
  %v = insertelement <4 x i16> poison, i16 %l, i32 0
  ret <4 x i16> %v
}

define <8 x i16> @loadv8i16_roX(ptr %p, i64 %o) {
; CHECK-LABEL: loadv8i16_roX:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr h0, [x0, x1, lsl #1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i16, ptr %p, i64 %o
  %l = load i16, ptr %g
  %v = insertelement <8 x i16> poison, i16 %l, i32 0
  ret <8 x i16> %v
}

define <2 x i32> @loadv2i32_roX(ptr %p, i64 %o) {
; CHECK-LABEL: loadv2i32_roX:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr s0, [x0, x1, lsl #2]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i32, ptr %p, i64 %o
  %l = load i32, ptr %g
  %v = insertelement <2 x i32> poison, i32 %l, i32 0
  ret <2 x i32> %v
}

define <4 x i32> @loadv4i32_roX(ptr %p, i64 %o) {
; CHECK-LABEL: loadv4i32_roX:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr s0, [x0, x1, lsl #2]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i32, ptr %p, i64 %o
  %l = load i32, ptr %g
  %v = insertelement <4 x i32> poison, i32 %l, i32 0
  ret <4 x i32> %v
}

define <2 x i64> @loadv2i64_roX(ptr %p, i64 %o) {
; CHECK-LABEL: loadv2i64_roX:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr d0, [x0, x1, lsl #3]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i64, ptr %p, i64 %o
  %l = load i64, ptr %g
  %v = insertelement <2 x i64> poison, i64 %l, i32 0
  ret <2 x i64> %v
}

define <4 x half> @loadv4f16_roX(ptr %p, i64 %o) {
; CHECK-LABEL: loadv4f16_roX:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr h0, [x0, x1, lsl #1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds half, ptr %p, i64 %o
  %l = load half, ptr %g
  %v = insertelement <4 x half> poison, half %l, i32 0
  ret <4 x half> %v
}

define <8 x half> @loadv8f16_roX(ptr %p, i64 %o) {
; CHECK-LABEL: loadv8f16_roX:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr h0, [x0, x1, lsl #1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds half, ptr %p, i64 %o
  %l = load half, ptr %g
  %v = insertelement <8 x half> poison, half %l, i32 0
  ret <8 x half> %v
}

define <4 x bfloat> @loadv4bf16_roX(ptr %p, i64 %o) {
; CHECK-LABEL: loadv4bf16_roX:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr h0, [x0, x1, lsl #1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds bfloat, ptr %p, i64 %o
  %l = load bfloat, ptr %g
  %v = insertelement <4 x bfloat> poison, bfloat %l, i32 0
  ret <4 x bfloat> %v
}

define <8 x bfloat> @loadv8bf16_roX(ptr %p, i64 %o) {
; CHECK-LABEL: loadv8bf16_roX:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr h0, [x0, x1, lsl #1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds bfloat, ptr %p, i64 %o
  %l = load bfloat, ptr %g
  %v = insertelement <8 x bfloat> poison, bfloat %l, i32 0
  ret <8 x bfloat> %v
}

define <2 x float> @loadv2f32_roX(ptr %p, i64 %o) {
; CHECK-LABEL: loadv2f32_roX:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr s0, [x0, x1, lsl #2]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds float, ptr %p, i64 %o
  %l = load float, ptr %g
  %v = insertelement <2 x float> poison, float %l, i32 0
  ret <2 x float> %v
}

define <4 x float> @loadv4f32_roX(ptr %p, i64 %o) {
; CHECK-LABEL: loadv4f32_roX:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr s0, [x0, x1, lsl #2]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds float, ptr %p, i64 %o
  %l = load float, ptr %g
  %v = insertelement <4 x float> poison, float %l, i32 0
  ret <4 x float> %v
}

define <2 x double> @loadv2f64_roX(ptr %p, i64 %o) {
; CHECK-LABEL: loadv2f64_roX:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr d0, [x0, x1, lsl #3]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds double, ptr %p, i64 %o
  %l = load double, ptr %g
  %v = insertelement <2 x double> poison, double %l, i32 0
  ret <2 x double> %v
}


; SVE

define <vscale x 8 x i8> @loadnxv8i8(ptr %p) {
; CHECK-LABEL: loadnxv8i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldrb w8, [x0]
; CHECK-NEXT:    fmov s0, w8
; CHECK-NEXT:    ret
  %l = load i8, ptr %p
  %v = insertelement <vscale x 8 x i8> poison, i8 %l, i32 0
  ret <vscale x 8 x i8> %v
}

define <vscale x 16 x i8> @loadnxv16i8(ptr %p) {
; CHECK-LABEL: loadnxv16i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldrb w8, [x0]
; CHECK-NEXT:    fmov s0, w8
; CHECK-NEXT:    ret
  %l = load i8, ptr %p
  %v = insertelement <vscale x 16 x i8> poison, i8 %l, i32 0
  ret <vscale x 16 x i8> %v
}

define <vscale x 4 x i16> @loadnxv4i16(ptr %p) {
; CHECK-LABEL: loadnxv4i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldrh w8, [x0]
; CHECK-NEXT:    fmov s0, w8
; CHECK-NEXT:    ret
  %l = load i16, ptr %p
  %v = insertelement <vscale x 4 x i16> poison, i16 %l, i32 0
  ret <vscale x 4 x i16> %v
}

define <vscale x 8 x i16> @loadnxv8i16(ptr %p) {
; CHECK-LABEL: loadnxv8i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldrh w8, [x0]
; CHECK-NEXT:    fmov s0, w8
; CHECK-NEXT:    ret
  %l = load i16, ptr %p
  %v = insertelement <vscale x 8 x i16> poison, i16 %l, i32 0
  ret <vscale x 8 x i16> %v
}

define <vscale x 2 x i32> @loadnxv2i32(ptr %p) {
; CHECK-LABEL: loadnxv2i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr w8, [x0]
; CHECK-NEXT:    fmov d0, x8
; CHECK-NEXT:    ret
  %l = load i32, ptr %p
  %v = insertelement <vscale x 2 x i32> poison, i32 %l, i32 0
  ret <vscale x 2 x i32> %v
}

define <vscale x 4 x i32> @loadnxv4i32(ptr %p) {
; CHECK-LABEL: loadnxv4i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr w8, [x0]
; CHECK-NEXT:    fmov s0, w8
; CHECK-NEXT:    ret
  %l = load i32, ptr %p
  %v = insertelement <vscale x 4 x i32> poison, i32 %l, i32 0
  ret <vscale x 4 x i32> %v
}

define <vscale x 2 x i64> @loadnxv2i64(ptr %p) {
; CHECK-LABEL: loadnxv2i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr x8, [x0]
; CHECK-NEXT:    fmov d0, x8
; CHECK-NEXT:    ret
  %l = load i64, ptr %p
  %v = insertelement <vscale x 2 x i64> poison, i64 %l, i32 0
  ret <vscale x 2 x i64> %v
}


define <vscale x 4 x half> @loadnxv4f16(ptr %p) {
; CHECK-LABEL: loadnxv4f16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr h0, [x0]
; CHECK-NEXT:    ret
  %l = load half, ptr %p
  %v = insertelement <vscale x 4 x half> poison, half %l, i32 0
  ret <vscale x 4 x half> %v
}

define <vscale x 8 x half> @loadnxv8f16(ptr %p) {
; CHECK-LABEL: loadnxv8f16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr h0, [x0]
; CHECK-NEXT:    ret
  %l = load half, ptr %p
  %v = insertelement <vscale x 8 x half> poison, half %l, i32 0
  ret <vscale x 8 x half> %v
}

define <vscale x 4 x bfloat> @loadnxv4bf16(ptr %p) {
; CHECK-LABEL: loadnxv4bf16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr h0, [x0]
; CHECK-NEXT:    ret
  %l = load bfloat, ptr %p
  %v = insertelement <vscale x 4 x bfloat> poison, bfloat %l, i32 0
  ret <vscale x 4 x bfloat> %v
}

define <vscale x 8 x bfloat> @loadnxv8bf16(ptr %p) {
; CHECK-LABEL: loadnxv8bf16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr h0, [x0]
; CHECK-NEXT:    ret
  %l = load bfloat, ptr %p
  %v = insertelement <vscale x 8 x bfloat> poison, bfloat %l, i32 0
  ret <vscale x 8 x bfloat> %v
}

define <vscale x 2 x float> @loadnxv2f32(ptr %p) {
; CHECK-LABEL: loadnxv2f32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr s0, [x0]
; CHECK-NEXT:    ret
  %l = load float, ptr %p
  %v = insertelement <vscale x 2 x float> poison, float %l, i32 0
  ret <vscale x 2 x float> %v
}

define <vscale x 4 x float> @loadnxv4f32(ptr %p) {
; CHECK-LABEL: loadnxv4f32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr s0, [x0]
; CHECK-NEXT:    ret
  %l = load float, ptr %p
  %v = insertelement <vscale x 4 x float> poison, float %l, i32 0
  ret <vscale x 4 x float> %v
}

define <vscale x 2 x double> @loadnxv2f64(ptr %p) {
; CHECK-LABEL: loadnxv2f64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldr d0, [x0]
; CHECK-NEXT:    ret
  %l = load double, ptr %p
  %v = insertelement <vscale x 2 x double> poison, double %l, i32 0
  ret <vscale x 2 x double> %v
}


; Unscaled

define <vscale x 8 x i8> @loadnxv8i8_offset(ptr %p) {
; CHECK-LABEL: loadnxv8i8_offset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldrb w8, [x0, #1]
; CHECK-NEXT:    fmov s0, w8
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 1
  %l = load i8, ptr %g
  %v = insertelement <vscale x 8 x i8> poison, i8 %l, i32 0
  ret <vscale x 8 x i8> %v
}

define <vscale x 16 x i8> @loadnxv16i8_offset(ptr %p) {
; CHECK-LABEL: loadnxv16i8_offset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldrb w8, [x0, #1]
; CHECK-NEXT:    fmov s0, w8
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 1
  %l = load i8, ptr %g
  %v = insertelement <vscale x 16 x i8> poison, i8 %l, i32 0
  ret <vscale x 16 x i8> %v
}

define <vscale x 4 x i16> @loadnxv4i16_offset(ptr %p) {
; CHECK-LABEL: loadnxv4i16_offset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldurh w8, [x0, #1]
; CHECK-NEXT:    fmov s0, w8
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 1
  %l = load i16, ptr %g
  %v = insertelement <vscale x 4 x i16> poison, i16 %l, i32 0
  ret <vscale x 4 x i16> %v
}

define <vscale x 8 x i16> @loadnxv8i16_offset(ptr %p) {
; CHECK-LABEL: loadnxv8i16_offset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldurh w8, [x0, #1]
; CHECK-NEXT:    fmov s0, w8
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 1
  %l = load i16, ptr %g
  %v = insertelement <vscale x 8 x i16> poison, i16 %l, i32 0
  ret <vscale x 8 x i16> %v
}

define <vscale x 2 x i32> @loadnxv2i32_offset(ptr %p) {
; CHECK-LABEL: loadnxv2i32_offset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur w8, [x0, #1]
; CHECK-NEXT:    fmov d0, x8
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 1
  %l = load i32, ptr %g
  %v = insertelement <vscale x 2 x i32> poison, i32 %l, i32 0
  ret <vscale x 2 x i32> %v
}

define <vscale x 4 x i32> @loadnxv4i32_offset(ptr %p) {
; CHECK-LABEL: loadnxv4i32_offset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur w8, [x0, #1]
; CHECK-NEXT:    fmov s0, w8
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 1
  %l = load i32, ptr %g
  %v = insertelement <vscale x 4 x i32> poison, i32 %l, i32 0
  ret <vscale x 4 x i32> %v
}

define <vscale x 2 x i64> @loadnxv2i64_offset(ptr %p) {
; CHECK-LABEL: loadnxv2i64_offset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur x8, [x0, #1]
; CHECK-NEXT:    fmov d0, x8
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 1
  %l = load i64, ptr %g
  %v = insertelement <vscale x 2 x i64> poison, i64 %l, i32 0
  ret <vscale x 2 x i64> %v
}


define <vscale x 4 x half> @loadnxv4f16_offset(ptr %p) {
; CHECK-LABEL: loadnxv4f16_offset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur h0, [x0, #1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 1
  %l = load half, ptr %g
  %v = insertelement <vscale x 4 x half> poison, half %l, i32 0
  ret <vscale x 4 x half> %v
}

define <vscale x 8 x half> @loadnxv8f16_offset(ptr %p) {
; CHECK-LABEL: loadnxv8f16_offset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur h0, [x0, #1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 1
  %l = load half, ptr %g
  %v = insertelement <vscale x 8 x half> poison, half %l, i32 0
  ret <vscale x 8 x half> %v
}

define <vscale x 4 x bfloat> @loadnxv4bf16_offset(ptr %p) {
; CHECK-LABEL: loadnxv4bf16_offset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur h0, [x0, #1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 1
  %l = load bfloat, ptr %g
  %v = insertelement <vscale x 4 x bfloat> poison, bfloat %l, i32 0
  ret <vscale x 4 x bfloat> %v
}

define <vscale x 8 x bfloat> @loadnxv8bf16_offset(ptr %p) {
; CHECK-LABEL: loadnxv8bf16_offset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur h0, [x0, #1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 1
  %l = load bfloat, ptr %g
  %v = insertelement <vscale x 8 x bfloat> poison, bfloat %l, i32 0
  ret <vscale x 8 x bfloat> %v
}

define <vscale x 2 x float> @loadnxv2f32_offset(ptr %p) {
; CHECK-LABEL: loadnxv2f32_offset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur s0, [x0, #1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 1
  %l = load float, ptr %g
  %v = insertelement <vscale x 2 x float> poison, float %l, i32 0
  ret <vscale x 2 x float> %v
}

define <vscale x 4 x float> @loadnxv4f32_offset(ptr %p) {
; CHECK-LABEL: loadnxv4f32_offset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur s0, [x0, #1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 1
  %l = load float, ptr %g
  %v = insertelement <vscale x 4 x float> poison, float %l, i32 0
  ret <vscale x 4 x float> %v
}

define <vscale x 2 x double> @loadnxv2f64_offset(ptr %p) {
; CHECK-LABEL: loadnxv2f64_offset:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldur d0, [x0, #1]
; CHECK-NEXT:    ret
  %g = getelementptr inbounds i8, ptr %p, i64 1
  %l = load double, ptr %g
  %v = insertelement <vscale x 2 x double> poison, double %l, i32 0
  ret <vscale x 2 x double> %v
}