llvm/llvm/test/CodeGen/Thumb2/mve-ldst-preinc.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE

define ptr @ldrwu32_4(ptr %x, ptr %y) {
; CHECK-LABEL: ldrwu32_4:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrw.u32 q0, [r0, #4]!
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 4
  %0 = load <4 x i32>, ptr %z, align 4
  store <4 x i32> %0, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrwu32_3(ptr %x, ptr %y) {
; CHECK-LE-LABEL: ldrwu32_3:
; CHECK-LE:       @ %bb.0: @ %entry
; CHECK-LE-NEXT:    vldrb.u8 q0, [r0, #3]!
; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
; CHECK-LE-NEXT:    bx lr
;
; CHECK-BE-LABEL: ldrwu32_3:
; CHECK-BE:       @ %bb.0: @ %entry
; CHECK-BE-NEXT:    adds r0, #3
; CHECK-BE-NEXT:    vldrw.u32 q0, [r0]
; CHECK-BE-NEXT:    vstrw.32 q0, [r1]
; CHECK-BE-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 3
  %0 = load <4 x i32>, ptr %z, align 4
  store <4 x i32> %0, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrwu32_m4(ptr %x, ptr %y) {
; CHECK-LABEL: ldrwu32_m4:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrw.u32 q0, [r0, #-4]!
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 -4
  %0 = load <4 x i32>, ptr %z, align 4
  store <4 x i32> %0, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrwu32_508(ptr %x, ptr %y) {
; CHECK-LABEL: ldrwu32_508:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrw.u32 q0, [r0, #508]!
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 508
  %0 = load <4 x i32>, ptr %z, align 4
  store <4 x i32> %0, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrwu32_512(ptr %x, ptr %y) {
; CHECK-LABEL: ldrwu32_512:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    add.w r0, r0, #512
; CHECK-NEXT:    vldrw.u32 q0, [r0]
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 512
  %0 = load <4 x i32>, ptr %z, align 4
  store <4 x i32> %0, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrwu32_m508(ptr %x, ptr %y) {
; CHECK-LABEL: ldrwu32_m508:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrw.u32 q0, [r0, #-508]!
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 -508
  %0 = load <4 x i32>, ptr %z, align 4
  store <4 x i32> %0, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrwu32_m512(ptr %x, ptr %y) {
; CHECK-LABEL: ldrwu32_m512:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    sub.w r0, r0, #512
; CHECK-NEXT:    vldrw.u32 q0, [r0]
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 -512
  %0 = load <4 x i32>, ptr %z, align 4
  store <4 x i32> %0, ptr %y, align 4
  ret ptr %z
}


define ptr @ldrhu32_4(ptr %x, ptr %y) {
; CHECK-LABEL: ldrhu32_4:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrh.u32 q0, [r0, #4]!
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 4
  %0 = load <4 x i16>, ptr %z, align 2
  %1 = zext <4 x i16> %0 to <4 x i32>
  store <4 x i32> %1, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrhu32_3(ptr %x, ptr %y) {
; CHECK-LABEL: ldrhu32_3:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    adds r0, #3
; CHECK-NEXT:    vldrh.u32 q0, [r0]
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 3
  %0 = load <4 x i16>, ptr %z, align 2
  %1 = zext <4 x i16> %0 to <4 x i32>
  store <4 x i32> %1, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrhu32_2(ptr %x, ptr %y) {
; CHECK-LABEL: ldrhu32_2:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrh.u32 q0, [r0, #2]!
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 2
  %0 = load <4 x i16>, ptr %z, align 2
  %1 = zext <4 x i16> %0 to <4 x i32>
  store <4 x i32> %1, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrhu32_254(ptr %x, ptr %y) {
; CHECK-LABEL: ldrhu32_254:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrh.u32 q0, [r0, #254]!
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 254
  %0 = load <4 x i16>, ptr %z, align 2
  %1 = zext <4 x i16> %0 to <4 x i32>
  store <4 x i32> %1, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrhu32_256(ptr %x, ptr %y) {
; CHECK-LABEL: ldrhu32_256:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    add.w r0, r0, #256
; CHECK-NEXT:    vldrh.u32 q0, [r0]
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 256
  %0 = load <4 x i16>, ptr %z, align 2
  %1 = zext <4 x i16> %0 to <4 x i32>
  store <4 x i32> %1, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrhu32_m254(ptr %x, ptr %y) {
; CHECK-LABEL: ldrhu32_m254:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrh.u32 q0, [r0, #-254]!
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 -254
  %0 = load <4 x i16>, ptr %z, align 2
  %1 = zext <4 x i16> %0 to <4 x i32>
  store <4 x i32> %1, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrhu32_m256(ptr %x, ptr %y) {
; CHECK-LABEL: ldrhu32_m256:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    sub.w r0, r0, #256
; CHECK-NEXT:    vldrh.u32 q0, [r0]
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 -256
  %0 = load <4 x i16>, ptr %z, align 2
  %1 = zext <4 x i16> %0 to <4 x i32>
  store <4 x i32> %1, ptr %y, align 4
  ret ptr %z
}


define ptr @ldrhs32_4(ptr %x, ptr %y) {
; CHECK-LABEL: ldrhs32_4:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrh.s32 q0, [r0, #4]!
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 4
  %0 = load <4 x i16>, ptr %z, align 2
  %1 = sext <4 x i16> %0 to <4 x i32>
  store <4 x i32> %1, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrhs32_3(ptr %x, ptr %y) {
; CHECK-LABEL: ldrhs32_3:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    adds r0, #3
; CHECK-NEXT:    vldrh.s32 q0, [r0]
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 3
  %0 = load <4 x i16>, ptr %z, align 2
  %1 = sext <4 x i16> %0 to <4 x i32>
  store <4 x i32> %1, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrhs32_2(ptr %x, ptr %y) {
; CHECK-LABEL: ldrhs32_2:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrh.s32 q0, [r0, #2]!
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 2
  %0 = load <4 x i16>, ptr %z, align 2
  %1 = sext <4 x i16> %0 to <4 x i32>
  store <4 x i32> %1, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrhs32_254(ptr %x, ptr %y) {
; CHECK-LABEL: ldrhs32_254:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrh.s32 q0, [r0, #254]!
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 254
  %0 = load <4 x i16>, ptr %z, align 2
  %1 = sext <4 x i16> %0 to <4 x i32>
  store <4 x i32> %1, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrhs32_256(ptr %x, ptr %y) {
; CHECK-LABEL: ldrhs32_256:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    add.w r0, r0, #256
; CHECK-NEXT:    vldrh.s32 q0, [r0]
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 256
  %0 = load <4 x i16>, ptr %z, align 2
  %1 = sext <4 x i16> %0 to <4 x i32>
  store <4 x i32> %1, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrhs32_m254(ptr %x, ptr %y) {
; CHECK-LABEL: ldrhs32_m254:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrh.s32 q0, [r0, #-254]!
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 -254
  %0 = load <4 x i16>, ptr %z, align 2
  %1 = sext <4 x i16> %0 to <4 x i32>
  store <4 x i32> %1, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrhs32_m256(ptr %x, ptr %y) {
; CHECK-LABEL: ldrhs32_m256:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    sub.w r0, r0, #256
; CHECK-NEXT:    vldrh.s32 q0, [r0]
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 -256
  %0 = load <4 x i16>, ptr %z, align 2
  %1 = sext <4 x i16> %0 to <4 x i32>
  store <4 x i32> %1, ptr %y, align 4
  ret ptr %z
}


define ptr @ldrhu16_4(ptr %x, ptr %y) {
; CHECK-LABEL: ldrhu16_4:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrh.u16 q0, [r0, #4]!
; CHECK-NEXT:    vstrh.16 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 4
  %0 = load <8 x i16>, ptr %z, align 2
  store <8 x i16> %0, ptr %y, align 2
  ret ptr %z
}

define ptr @ldrhu16_3(ptr %x, ptr %y) {
; CHECK-LE-LABEL: ldrhu16_3:
; CHECK-LE:       @ %bb.0: @ %entry
; CHECK-LE-NEXT:    vldrb.u8 q0, [r0, #3]!
; CHECK-LE-NEXT:    vstrh.16 q0, [r1]
; CHECK-LE-NEXT:    bx lr
;
; CHECK-BE-LABEL: ldrhu16_3:
; CHECK-BE:       @ %bb.0: @ %entry
; CHECK-BE-NEXT:    adds r0, #3
; CHECK-BE-NEXT:    vldrh.u16 q0, [r0]
; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
; CHECK-BE-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 3
  %0 = load <8 x i16>, ptr %z, align 2
  store <8 x i16> %0, ptr %y, align 2
  ret ptr %z
}

define ptr @ldrhu16_2(ptr %x, ptr %y) {
; CHECK-LABEL: ldrhu16_2:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrh.u16 q0, [r0, #2]!
; CHECK-NEXT:    vstrh.16 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 2
  %0 = load <8 x i16>, ptr %z, align 2
  store <8 x i16> %0, ptr %y, align 2
  ret ptr %z
}

define ptr @ldrhu16_254(ptr %x, ptr %y) {
; CHECK-LABEL: ldrhu16_254:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrh.u16 q0, [r0, #254]!
; CHECK-NEXT:    vstrh.16 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 254
  %0 = load <8 x i16>, ptr %z, align 2
  store <8 x i16> %0, ptr %y, align 2
  ret ptr %z
}

define ptr @ldrhu16_256(ptr %x, ptr %y) {
; CHECK-LABEL: ldrhu16_256:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    add.w r0, r0, #256
; CHECK-NEXT:    vldrh.u16 q0, [r0]
; CHECK-NEXT:    vstrh.16 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 256
  %0 = load <8 x i16>, ptr %z, align 2
  store <8 x i16> %0, ptr %y, align 2
  ret ptr %z
}

define ptr @ldrhu16_m254(ptr %x, ptr %y) {
; CHECK-LABEL: ldrhu16_m254:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrh.u16 q0, [r0, #-254]!
; CHECK-NEXT:    vstrh.16 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 -254
  %0 = load <8 x i16>, ptr %z, align 2
  store <8 x i16> %0, ptr %y, align 2
  ret ptr %z
}

define ptr @ldrhu16_m256(ptr %x, ptr %y) {
; CHECK-LABEL: ldrhu16_m256:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    sub.w r0, r0, #256
; CHECK-NEXT:    vldrh.u16 q0, [r0]
; CHECK-NEXT:    vstrh.16 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 -256
  %0 = load <8 x i16>, ptr %z, align 2
  store <8 x i16> %0, ptr %y, align 2
  ret ptr %z
}


define ptr @ldrbu32_4(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbu32_4:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.u32 q0, [r0, #4]!
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 4
  %0 = load <4 x i8>, ptr %z, align 1
  %1 = zext <4 x i8> %0 to <4 x i32>
  store <4 x i32> %1, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrbu32_3(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbu32_3:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.u32 q0, [r0, #3]!
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 3
  %0 = load <4 x i8>, ptr %z, align 1
  %1 = zext <4 x i8> %0 to <4 x i32>
  store <4 x i32> %1, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrbu32_127(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbu32_127:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.u32 q0, [r0, #127]!
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 127
  %0 = load <4 x i8>, ptr %z, align 1
  %1 = zext <4 x i8> %0 to <4 x i32>
  store <4 x i32> %1, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrbu32_128(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbu32_128:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    adds r0, #128
; CHECK-NEXT:    vldrb.u32 q0, [r0]
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 128
  %0 = load <4 x i8>, ptr %z, align 1
  %1 = zext <4 x i8> %0 to <4 x i32>
  store <4 x i32> %1, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrbu32_m127(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbu32_m127:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.u32 q0, [r0, #-127]!
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 -127
  %0 = load <4 x i8>, ptr %z, align 1
  %1 = zext <4 x i8> %0 to <4 x i32>
  store <4 x i32> %1, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrbu32_m128(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbu32_m128:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    subs r0, #128
; CHECK-NEXT:    vldrb.u32 q0, [r0]
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 -128
  %0 = load <4 x i8>, ptr %z, align 1
  %1 = zext <4 x i8> %0 to <4 x i32>
  store <4 x i32> %1, ptr %y, align 4
  ret ptr %z
}


define ptr @ldrbs32_4(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbs32_4:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.s32 q0, [r0, #4]!
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 4
  %0 = load <4 x i8>, ptr %z, align 1
  %1 = sext <4 x i8> %0 to <4 x i32>
  store <4 x i32> %1, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrbs32_3(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbs32_3:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.s32 q0, [r0, #3]!
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 3
  %0 = load <4 x i8>, ptr %z, align 1
  %1 = sext <4 x i8> %0 to <4 x i32>
  store <4 x i32> %1, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrbs32_127(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbs32_127:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.s32 q0, [r0, #127]!
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 127
  %0 = load <4 x i8>, ptr %z, align 1
  %1 = sext <4 x i8> %0 to <4 x i32>
  store <4 x i32> %1, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrbs32_128(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbs32_128:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    adds r0, #128
; CHECK-NEXT:    vldrb.s32 q0, [r0]
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 128
  %0 = load <4 x i8>, ptr %z, align 1
  %1 = sext <4 x i8> %0 to <4 x i32>
  store <4 x i32> %1, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrbs32_m127(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbs32_m127:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.s32 q0, [r0, #-127]!
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 -127
  %0 = load <4 x i8>, ptr %z, align 1
  %1 = sext <4 x i8> %0 to <4 x i32>
  store <4 x i32> %1, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrbs32_m128(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbs32_m128:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    subs r0, #128
; CHECK-NEXT:    vldrb.s32 q0, [r0]
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 -128
  %0 = load <4 x i8>, ptr %z, align 1
  %1 = sext <4 x i8> %0 to <4 x i32>
  store <4 x i32> %1, ptr %y, align 4
  ret ptr %z
}


define ptr @ldrbu16_4(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbu16_4:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.u16 q0, [r0, #4]!
; CHECK-NEXT:    vstrh.16 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 4
  %0 = load <8 x i8>, ptr %z, align 1
  %1 = zext <8 x i8> %0 to <8 x i16>
  store <8 x i16> %1, ptr %y, align 2
  ret ptr %z
}

define ptr @ldrbu16_3(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbu16_3:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.u16 q0, [r0, #3]!
; CHECK-NEXT:    vstrh.16 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 3
  %0 = load <8 x i8>, ptr %z, align 1
  %1 = zext <8 x i8> %0 to <8 x i16>
  store <8 x i16> %1, ptr %y, align 2
  ret ptr %z
}

define ptr @ldrbu16_127(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbu16_127:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.u16 q0, [r0, #127]!
; CHECK-NEXT:    vstrh.16 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 127
  %0 = load <8 x i8>, ptr %z, align 1
  %1 = zext <8 x i8> %0 to <8 x i16>
  store <8 x i16> %1, ptr %y, align 2
  ret ptr %z
}

define ptr @ldrbu16_128(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbu16_128:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    adds r0, #128
; CHECK-NEXT:    vldrb.u16 q0, [r0]
; CHECK-NEXT:    vstrh.16 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 128
  %0 = load <8 x i8>, ptr %z, align 1
  %1 = zext <8 x i8> %0 to <8 x i16>
  store <8 x i16> %1, ptr %y, align 2
  ret ptr %z
}

define ptr @ldrbu16_m127(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbu16_m127:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.u16 q0, [r0, #-127]!
; CHECK-NEXT:    vstrh.16 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 -127
  %0 = load <8 x i8>, ptr %z, align 1
  %1 = zext <8 x i8> %0 to <8 x i16>
  store <8 x i16> %1, ptr %y, align 2
  ret ptr %z
}

define ptr @ldrbu16_m128(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbu16_m128:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    subs r0, #128
; CHECK-NEXT:    vldrb.u16 q0, [r0]
; CHECK-NEXT:    vstrh.16 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 -128
  %0 = load <8 x i8>, ptr %z, align 1
  %1 = zext <8 x i8> %0 to <8 x i16>
  store <8 x i16> %1, ptr %y, align 2
  ret ptr %z
}


define ptr @ldrbs16_4(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbs16_4:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.s16 q0, [r0, #4]!
; CHECK-NEXT:    vstrh.16 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 4
  %0 = load <8 x i8>, ptr %z, align 1
  %1 = sext <8 x i8> %0 to <8 x i16>
  store <8 x i16> %1, ptr %y, align 2
  ret ptr %z
}

define ptr @ldrbs16_3(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbs16_3:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.s16 q0, [r0, #3]!
; CHECK-NEXT:    vstrh.16 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 3
  %0 = load <8 x i8>, ptr %z, align 1
  %1 = sext <8 x i8> %0 to <8 x i16>
  store <8 x i16> %1, ptr %y, align 2
  ret ptr %z
}

define ptr @ldrbs16_127(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbs16_127:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.s16 q0, [r0, #127]!
; CHECK-NEXT:    vstrh.16 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 127
  %0 = load <8 x i8>, ptr %z, align 1
  %1 = sext <8 x i8> %0 to <8 x i16>
  store <8 x i16> %1, ptr %y, align 2
  ret ptr %z
}

define ptr @ldrbs16_128(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbs16_128:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    adds r0, #128
; CHECK-NEXT:    vldrb.s16 q0, [r0]
; CHECK-NEXT:    vstrh.16 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 128
  %0 = load <8 x i8>, ptr %z, align 1
  %1 = sext <8 x i8> %0 to <8 x i16>
  store <8 x i16> %1, ptr %y, align 2
  ret ptr %z
}

define ptr @ldrbs16_m127(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbs16_m127:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.s16 q0, [r0, #-127]!
; CHECK-NEXT:    vstrh.16 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 -127
  %0 = load <8 x i8>, ptr %z, align 1
  %1 = sext <8 x i8> %0 to <8 x i16>
  store <8 x i16> %1, ptr %y, align 2
  ret ptr %z
}

define ptr @ldrbs16_m128(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbs16_m128:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    subs r0, #128
; CHECK-NEXT:    vldrb.s16 q0, [r0]
; CHECK-NEXT:    vstrh.16 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 -128
  %0 = load <8 x i8>, ptr %z, align 1
  %1 = sext <8 x i8> %0 to <8 x i16>
  store <8 x i16> %1, ptr %y, align 2
  ret ptr %z
}


define ptr @ldrbu8_4(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbu8_4:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.u8 q0, [r0, #4]!
; CHECK-NEXT:    vstrb.8 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 4
  %0 = load <16 x i8>, ptr %z, align 1
  store <16 x i8> %0, ptr %y, align 1
  ret ptr %z
}

define ptr @ldrbu8_3(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbu8_3:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.u8 q0, [r0, #3]!
; CHECK-NEXT:    vstrb.8 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 3
  %0 = load <16 x i8>, ptr %z, align 1
  store <16 x i8> %0, ptr %y, align 1
  ret ptr %z
}

define ptr @ldrbu8_127(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbu8_127:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.u8 q0, [r0, #127]!
; CHECK-NEXT:    vstrb.8 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 127
  %0 = load <16 x i8>, ptr %z, align 1
  store <16 x i8> %0, ptr %y, align 1
  ret ptr %z
}

define ptr @ldrbu8_128(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbu8_128:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    adds r0, #128
; CHECK-NEXT:    vldrb.u8 q0, [r0]
; CHECK-NEXT:    vstrb.8 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 128
  %0 = load <16 x i8>, ptr %z, align 1
  store <16 x i8> %0, ptr %y, align 1
  ret ptr %z
}

define ptr @ldrbu8_m127(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbu8_m127:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.u8 q0, [r0, #-127]!
; CHECK-NEXT:    vstrb.8 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 -127
  %0 = load <16 x i8>, ptr %z, align 1
  store <16 x i8> %0, ptr %y, align 1
  ret ptr %z
}

define ptr @ldrbu8_m128(ptr %x, ptr %y) {
; CHECK-LABEL: ldrbu8_m128:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    subs r0, #128
; CHECK-NEXT:    vldrb.u8 q0, [r0]
; CHECK-NEXT:    vstrb.8 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 -128
  %0 = load <16 x i8>, ptr %z, align 1
  store <16 x i8> %0, ptr %y, align 1
  ret ptr %z
}


define ptr @ldrwf32_4(ptr %x, ptr %y) {
; CHECK-LABEL: ldrwf32_4:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrw.u32 q0, [r0, #4]!
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 4
  %0 = load <4 x float>, ptr %z, align 4
  store <4 x float> %0, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrwf16_4(ptr %x, ptr %y) {
; CHECK-LABEL: ldrwf16_4:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrh.u16 q0, [r0, #4]!
; CHECK-NEXT:    vstrh.16 q0, [r1]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 4
  %0 = load <8 x half>, ptr %z, align 2
  store <8 x half> %0, ptr %y, align 2
  ret ptr %z
}

define ptr @ldrwi32_align1(ptr %x, ptr %y) {
; CHECK-LE-LABEL: ldrwi32_align1:
; CHECK-LE:       @ %bb.0: @ %entry
; CHECK-LE-NEXT:    vldrb.u8 q0, [r0, #3]!
; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
; CHECK-LE-NEXT:    bx lr
;
; CHECK-BE-LABEL: ldrwi32_align1:
; CHECK-BE:       @ %bb.0: @ %entry
; CHECK-BE-NEXT:    vldrb.u8 q0, [r0, #3]
; CHECK-BE-NEXT:    adds r0, #3
; CHECK-BE-NEXT:    vrev32.8 q0, q0
; CHECK-BE-NEXT:    vstrw.32 q0, [r1]
; CHECK-BE-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 3
  %0 = load <4 x i32>, ptr %z, align 1
  store <4 x i32> %0, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrhi16_align1(ptr %x, ptr %y) {
; CHECK-LE-LABEL: ldrhi16_align1:
; CHECK-LE:       @ %bb.0: @ %entry
; CHECK-LE-NEXT:    vldrb.u8 q0, [r0, #3]!
; CHECK-LE-NEXT:    vstrh.16 q0, [r1]
; CHECK-LE-NEXT:    bx lr
;
; CHECK-BE-LABEL: ldrhi16_align1:
; CHECK-BE:       @ %bb.0: @ %entry
; CHECK-BE-NEXT:    vldrb.u8 q0, [r0, #3]
; CHECK-BE-NEXT:    adds r0, #3
; CHECK-BE-NEXT:    vrev16.8 q0, q0
; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
; CHECK-BE-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 3
  %0 = load <8 x i16>, ptr %z, align 1
  store <8 x i16> %0, ptr %y, align 2
  ret ptr %z
}

define ptr @ldrhi32_align1(ptr %x, ptr %y) {
; CHECK-LABEL: ldrhi32_align1:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    .pad #8
; CHECK-NEXT:    sub sp, #8
; CHECK-NEXT:    ldr r2, [r0, #3]!
; CHECK-NEXT:    str r2, [sp]
; CHECK-NEXT:    ldr r2, [r0, #4]
; CHECK-NEXT:    str r2, [sp, #4]
; CHECK-NEXT:    mov r2, sp
; CHECK-NEXT:    vldrh.s32 q0, [r2]
; CHECK-NEXT:    vstrw.32 q0, [r1]
; CHECK-NEXT:    add sp, #8
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 3
  %0 = load <4 x i16>, ptr %z, align 1
  %1 = sext <4 x i16> %0 to <4 x i32>
  store <4 x i32> %1, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrf32_align1(ptr %x, ptr %y) {
; CHECK-LE-LABEL: ldrf32_align1:
; CHECK-LE:       @ %bb.0: @ %entry
; CHECK-LE-NEXT:    vldrb.u8 q0, [r0, #3]!
; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
; CHECK-LE-NEXT:    bx lr
;
; CHECK-BE-LABEL: ldrf32_align1:
; CHECK-BE:       @ %bb.0: @ %entry
; CHECK-BE-NEXT:    vldrb.u8 q0, [r0, #3]
; CHECK-BE-NEXT:    adds r0, #3
; CHECK-BE-NEXT:    vrev32.8 q0, q0
; CHECK-BE-NEXT:    vstrw.32 q0, [r1]
; CHECK-BE-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 3
  %0 = load <4 x float>, ptr %z, align 1
  store <4 x float> %0, ptr %y, align 4
  ret ptr %z
}

define ptr @ldrf16_align1(ptr %x, ptr %y) {
; CHECK-LE-LABEL: ldrf16_align1:
; CHECK-LE:       @ %bb.0: @ %entry
; CHECK-LE-NEXT:    vldrb.u8 q0, [r0, #3]!
; CHECK-LE-NEXT:    vstrh.16 q0, [r1]
; CHECK-LE-NEXT:    bx lr
;
; CHECK-BE-LABEL: ldrf16_align1:
; CHECK-BE:       @ %bb.0: @ %entry
; CHECK-BE-NEXT:    vldrb.u8 q0, [r0, #3]
; CHECK-BE-NEXT:    adds r0, #3
; CHECK-BE-NEXT:    vrev16.8 q0, q0
; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
; CHECK-BE-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 3
  %0 = load <8 x half>, ptr %z, align 1
  store <8 x half> %0, ptr %y, align 2
  ret ptr %z
}

define ptr @ldrh16_align8(ptr %x, ptr %y) {
; CHECK-LE-LABEL: ldrh16_align8:
; CHECK-LE:       @ %bb.0: @ %entry
; CHECK-LE-NEXT:    vldrw.u32 q0, [r0, #4]!
; CHECK-LE-NEXT:    vstrh.16 q0, [r1]
; CHECK-LE-NEXT:    bx lr
;
; CHECK-BE-LABEL: ldrh16_align8:
; CHECK-BE:       @ %bb.0: @ %entry
; CHECK-BE-NEXT:    vldrh.u16 q0, [r0, #4]!
; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
; CHECK-BE-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %x, i32 4
  %0 = load <8 x i16>, ptr %z, align 8
  store <8 x i16> %0, ptr %y, align 2
  ret ptr %z
}





define ptr @strw32_4(ptr %y, ptr %x) {
; CHECK-LE-LABEL: strw32_4:
; CHECK-LE:       @ %bb.0: @ %entry
; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #4]!
; CHECK-LE-NEXT:    bx lr
;
; CHECK-BE-LABEL: strw32_4:
; CHECK-BE:       @ %bb.0: @ %entry
; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
; CHECK-BE-NEXT:    vstrw.32 q0, [r0, #4]!
; CHECK-BE-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 4
  %0 = load <4 x i32>, ptr %x, align 4
  store <4 x i32> %0, ptr %z, align 4
  ret ptr %z
}

define ptr @strw32_3(ptr %y, ptr %x) {
; CHECK-LE-LABEL: strw32_3:
; CHECK-LE:       @ %bb.0: @ %entry
; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #3]!
; CHECK-LE-NEXT:    bx lr
;
; CHECK-BE-LABEL: strw32_3:
; CHECK-BE:       @ %bb.0: @ %entry
; CHECK-BE-NEXT:    adds r0, #3
; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
; CHECK-BE-NEXT:    vstrw.32 q0, [r0]
; CHECK-BE-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 3
  %0 = load <4 x i32>, ptr %x, align 4
  store <4 x i32> %0, ptr %z, align 4
  ret ptr %z
}

define ptr @strw32_m4(ptr %y, ptr %x) {
; CHECK-LE-LABEL: strw32_m4:
; CHECK-LE:       @ %bb.0: @ %entry
; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #-4]!
; CHECK-LE-NEXT:    bx lr
;
; CHECK-BE-LABEL: strw32_m4:
; CHECK-BE:       @ %bb.0: @ %entry
; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
; CHECK-BE-NEXT:    vstrw.32 q0, [r0, #-4]!
; CHECK-BE-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 -4
  %0 = load <4 x i32>, ptr %x, align 4
  store <4 x i32> %0, ptr %z, align 4
  ret ptr %z
}

define ptr @strw32_508(ptr %y, ptr %x) {
; CHECK-LABEL: strw32_508:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrw.u32 q0, [r1]
; CHECK-NEXT:    vstrw.32 q0, [r0, #508]!
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 508
  %0 = load <4 x i32>, ptr %x, align 4
  store <4 x i32> %0, ptr %z, align 4
  ret ptr %z
}

define ptr @strw32_512(ptr %y, ptr %x) {
; CHECK-LABEL: strw32_512:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    add.w r0, r0, #512
; CHECK-NEXT:    vldrw.u32 q0, [r1]
; CHECK-NEXT:    vstrw.32 q0, [r0]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 512
  %0 = load <4 x i32>, ptr %x, align 4
  store <4 x i32> %0, ptr %z, align 4
  ret ptr %z
}

define ptr @strw32_m508(ptr %y, ptr %x) {
; CHECK-LABEL: strw32_m508:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrw.u32 q0, [r1]
; CHECK-NEXT:    vstrw.32 q0, [r0, #-508]!
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 -508
  %0 = load <4 x i32>, ptr %x, align 4
  store <4 x i32> %0, ptr %z, align 4
  ret ptr %z
}

define ptr @strw32_m512(ptr %y, ptr %x) {
; CHECK-LABEL: strw32_m512:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    sub.w r0, r0, #512
; CHECK-NEXT:    vldrw.u32 q0, [r1]
; CHECK-NEXT:    vstrw.32 q0, [r0]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 -512
  %0 = load <4 x i32>, ptr %x, align 4
  store <4 x i32> %0, ptr %z, align 4
  ret ptr %z
}


define ptr @strh32_4(ptr %y, ptr %x) {
; CHECK-LABEL: strh32_4:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrh.u32 q0, [r1]
; CHECK-NEXT:    vstrh.32 q0, [r0, #4]!
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 4
  %0 = load <4 x i16>, ptr %x, align 2
  store <4 x i16> %0, ptr %z, align 2
  ret ptr %z
}

define ptr @strh32_3(ptr %y, ptr %x) {
; CHECK-LABEL: strh32_3:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    adds r0, #3
; CHECK-NEXT:    vldrh.u32 q0, [r1]
; CHECK-NEXT:    vstrh.32 q0, [r0]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 3
  %0 = load <4 x i16>, ptr %x, align 2
  store <4 x i16> %0, ptr %z, align 2
  ret ptr %z
}

define ptr @strh32_2(ptr %y, ptr %x) {
; CHECK-LABEL: strh32_2:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrh.u32 q0, [r1]
; CHECK-NEXT:    vstrh.32 q0, [r0, #2]!
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 2
  %0 = load <4 x i16>, ptr %x, align 2
  store <4 x i16> %0, ptr %z, align 2
  ret ptr %z
}

define ptr @strh32_254(ptr %y, ptr %x) {
; CHECK-LABEL: strh32_254:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrh.u32 q0, [r1]
; CHECK-NEXT:    vstrh.32 q0, [r0, #254]!
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 254
  %0 = load <4 x i16>, ptr %x, align 2
  store <4 x i16> %0, ptr %z, align 2
  ret ptr %z
}

define ptr @strh32_256(ptr %y, ptr %x) {
; CHECK-LABEL: strh32_256:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    add.w r0, r0, #256
; CHECK-NEXT:    vldrh.u32 q0, [r1]
; CHECK-NEXT:    vstrh.32 q0, [r0]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 256
  %0 = load <4 x i16>, ptr %x, align 2
  store <4 x i16> %0, ptr %z, align 2
  ret ptr %z
}

define ptr @strh32_m254(ptr %y, ptr %x) {
; CHECK-LABEL: strh32_m254:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrh.u32 q0, [r1]
; CHECK-NEXT:    vstrh.32 q0, [r0, #-254]!
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 -254
  %0 = load <4 x i16>, ptr %x, align 2
  store <4 x i16> %0, ptr %z, align 2
  ret ptr %z
}

define ptr @strh32_m256(ptr %y, ptr %x) {
; CHECK-LABEL: strh32_m256:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    sub.w r0, r0, #256
; CHECK-NEXT:    vldrh.u32 q0, [r1]
; CHECK-NEXT:    vstrh.32 q0, [r0]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 -256
  %0 = load <4 x i16>, ptr %x, align 2
  store <4 x i16> %0, ptr %z, align 2
  ret ptr %z
}


define ptr @strh16_4(ptr %y, ptr %x) {
; CHECK-LE-LABEL: strh16_4:
; CHECK-LE:       @ %bb.0: @ %entry
; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #4]!
; CHECK-LE-NEXT:    bx lr
;
; CHECK-BE-LABEL: strh16_4:
; CHECK-BE:       @ %bb.0: @ %entry
; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
; CHECK-BE-NEXT:    vstrh.16 q0, [r0, #4]!
; CHECK-BE-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 4
  %0 = load <8 x i16>, ptr %x, align 2
  store <8 x i16> %0, ptr %z, align 2
  ret ptr %z
}

define ptr @strh16_3(ptr %y, ptr %x) {
; CHECK-LE-LABEL: strh16_3:
; CHECK-LE:       @ %bb.0: @ %entry
; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #3]!
; CHECK-LE-NEXT:    bx lr
;
; CHECK-BE-LABEL: strh16_3:
; CHECK-BE:       @ %bb.0: @ %entry
; CHECK-BE-NEXT:    adds r0, #3
; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
; CHECK-BE-NEXT:    vstrh.16 q0, [r0]
; CHECK-BE-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 3
  %0 = load <8 x i16>, ptr %x, align 2
  store <8 x i16> %0, ptr %z, align 2
  ret ptr %z
}

define ptr @strh16_2(ptr %y, ptr %x) {
; CHECK-LE-LABEL: strh16_2:
; CHECK-LE:       @ %bb.0: @ %entry
; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #2]!
; CHECK-LE-NEXT:    bx lr
;
; CHECK-BE-LABEL: strh16_2:
; CHECK-BE:       @ %bb.0: @ %entry
; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
; CHECK-BE-NEXT:    vstrh.16 q0, [r0, #2]!
; CHECK-BE-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 2
  %0 = load <8 x i16>, ptr %x, align 2
  store <8 x i16> %0, ptr %z, align 2
  ret ptr %z
}

define ptr @strh16_254(ptr %y, ptr %x) {
; CHECK-LABEL: strh16_254:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrh.u16 q0, [r1]
; CHECK-NEXT:    vstrh.16 q0, [r0, #254]!
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 254
  %0 = load <8 x i16>, ptr %x, align 2
  store <8 x i16> %0, ptr %z, align 2
  ret ptr %z
}

define ptr @strh16_256(ptr %y, ptr %x) {
; CHECK-LABEL: strh16_256:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    add.w r0, r0, #256
; CHECK-NEXT:    vldrh.u16 q0, [r1]
; CHECK-NEXT:    vstrh.16 q0, [r0]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 256
  %0 = load <8 x i16>, ptr %x, align 2
  store <8 x i16> %0, ptr %z, align 2
  ret ptr %z
}

define ptr @strh16_m254(ptr %y, ptr %x) {
; CHECK-LABEL: strh16_m254:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrh.u16 q0, [r1]
; CHECK-NEXT:    vstrh.16 q0, [r0, #-254]!
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 -254
  %0 = load <8 x i16>, ptr %x, align 2
  store <8 x i16> %0, ptr %z, align 2
  ret ptr %z
}

define ptr @strh16_m256(ptr %y, ptr %x) {
; CHECK-LABEL: strh16_m256:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    sub.w r0, r0, #256
; CHECK-NEXT:    vldrh.u16 q0, [r1]
; CHECK-NEXT:    vstrh.16 q0, [r0]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 -256
  %0 = load <8 x i16>, ptr %x, align 2
  store <8 x i16> %0, ptr %z, align 2
  ret ptr %z
}


define ptr @strb32_4(ptr %y, ptr %x) {
; CHECK-LABEL: strb32_4:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.u32 q0, [r1]
; CHECK-NEXT:    vstrb.32 q0, [r0, #4]!
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 4
  %0 = load <4 x i8>, ptr %x, align 1
  store <4 x i8> %0, ptr %z, align 1
  ret ptr %z
}

define ptr @strb32_3(ptr %y, ptr %x) {
; CHECK-LABEL: strb32_3:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.u32 q0, [r1]
; CHECK-NEXT:    vstrb.32 q0, [r0, #3]!
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 3
  %0 = load <4 x i8>, ptr %x, align 1
  store <4 x i8> %0, ptr %z, align 1
  ret ptr %z
}

define ptr @strb32_127(ptr %y, ptr %x) {
; CHECK-LABEL: strb32_127:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.u32 q0, [r1]
; CHECK-NEXT:    vstrb.32 q0, [r0, #127]!
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 127
  %0 = load <4 x i8>, ptr %x, align 1
  store <4 x i8> %0, ptr %z, align 1
  ret ptr %z
}

define ptr @strb32_128(ptr %y, ptr %x) {
; CHECK-LABEL: strb32_128:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    adds r0, #128
; CHECK-NEXT:    vldrb.u32 q0, [r1]
; CHECK-NEXT:    vstrb.32 q0, [r0]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 128
  %0 = load <4 x i8>, ptr %x, align 1
  store <4 x i8> %0, ptr %z, align 1
  ret ptr %z
}

define ptr @strb32_m127(ptr %y, ptr %x) {
; CHECK-LABEL: strb32_m127:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.u32 q0, [r1]
; CHECK-NEXT:    vstrb.32 q0, [r0, #-127]!
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 -127
  %0 = load <4 x i8>, ptr %x, align 1
  store <4 x i8> %0, ptr %z, align 1
  ret ptr %z
}

define ptr @strb32_m128(ptr %y, ptr %x) {
; CHECK-LABEL: strb32_m128:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    subs r0, #128
; CHECK-NEXT:    vldrb.u32 q0, [r1]
; CHECK-NEXT:    vstrb.32 q0, [r0]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 -128
  %0 = load <4 x i8>, ptr %x, align 1
  store <4 x i8> %0, ptr %z, align 1
  ret ptr %z
}


define ptr @strb16_4(ptr %y, ptr %x) {
; CHECK-LABEL: strb16_4:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.u16 q0, [r1]
; CHECK-NEXT:    vstrb.16 q0, [r0, #4]!
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 4
  %0 = load <8 x i8>, ptr %x, align 1
  store <8 x i8> %0, ptr %z, align 1
  ret ptr %z
}

define ptr @strb16_3(ptr %y, ptr %x) {
; CHECK-LABEL: strb16_3:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.u16 q0, [r1]
; CHECK-NEXT:    vstrb.16 q0, [r0, #3]!
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 3
  %0 = load <8 x i8>, ptr %x, align 1
  store <8 x i8> %0, ptr %z, align 1
  ret ptr %z
}

define ptr @strb16_127(ptr %y, ptr %x) {
; CHECK-LABEL: strb16_127:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.u16 q0, [r1]
; CHECK-NEXT:    vstrb.16 q0, [r0, #127]!
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 127
  %0 = load <8 x i8>, ptr %x, align 1
  store <8 x i8> %0, ptr %z, align 1
  ret ptr %z
}

define ptr @strb16_128(ptr %y, ptr %x) {
; CHECK-LABEL: strb16_128:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    adds r0, #128
; CHECK-NEXT:    vldrb.u16 q0, [r1]
; CHECK-NEXT:    vstrb.16 q0, [r0]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 128
  %0 = load <8 x i8>, ptr %x, align 1
  store <8 x i8> %0, ptr %z, align 1
  ret ptr %z
}

define ptr @strb16_m127(ptr %y, ptr %x) {
; CHECK-LABEL: strb16_m127:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.u16 q0, [r1]
; CHECK-NEXT:    vstrb.16 q0, [r0, #-127]!
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 -127
  %0 = load <8 x i8>, ptr %x, align 1
  store <8 x i8> %0, ptr %z, align 1
  ret ptr %z
}

define ptr @strb16_m128(ptr %y, ptr %x) {
; CHECK-LABEL: strb16_m128:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    subs r0, #128
; CHECK-NEXT:    vldrb.u16 q0, [r1]
; CHECK-NEXT:    vstrb.16 q0, [r0]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 -128
  %0 = load <8 x i8>, ptr %x, align 1
  store <8 x i8> %0, ptr %z, align 1
  ret ptr %z
}


define ptr @strb8_4(ptr %y, ptr %x) {
; CHECK-LABEL: strb8_4:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.u8 q0, [r1]
; CHECK-NEXT:    vstrb.8 q0, [r0, #4]!
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 4
  %0 = load <16 x i8>, ptr %x, align 1
  store <16 x i8> %0, ptr %z, align 1
  ret ptr %z
}

define ptr @strb8_3(ptr %y, ptr %x) {
; CHECK-LABEL: strb8_3:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.u8 q0, [r1]
; CHECK-NEXT:    vstrb.8 q0, [r0, #3]!
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 3
  %0 = load <16 x i8>, ptr %x, align 1
  store <16 x i8> %0, ptr %z, align 1
  ret ptr %z
}

define ptr @strb8_127(ptr %y, ptr %x) {
; CHECK-LABEL: strb8_127:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.u8 q0, [r1]
; CHECK-NEXT:    vstrb.8 q0, [r0, #127]!
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 127
  %0 = load <16 x i8>, ptr %x, align 1
  store <16 x i8> %0, ptr %z, align 1
  ret ptr %z
}

define ptr @strb8_128(ptr %y, ptr %x) {
; CHECK-LABEL: strb8_128:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    adds r0, #128
; CHECK-NEXT:    vldrb.u8 q0, [r1]
; CHECK-NEXT:    vstrb.8 q0, [r0]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 128
  %0 = load <16 x i8>, ptr %x, align 1
  store <16 x i8> %0, ptr %z, align 1
  ret ptr %z
}

define ptr @strb8_m127(ptr %y, ptr %x) {
; CHECK-LABEL: strb8_m127:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vldrb.u8 q0, [r1]
; CHECK-NEXT:    vstrb.8 q0, [r0, #-127]!
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 -127
  %0 = load <16 x i8>, ptr %x, align 1
  store <16 x i8> %0, ptr %z, align 1
  ret ptr %z
}

define ptr @strb8_m128(ptr %y, ptr %x) {
; CHECK-LABEL: strb8_m128:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    subs r0, #128
; CHECK-NEXT:    vldrb.u8 q0, [r1]
; CHECK-NEXT:    vstrb.8 q0, [r0]
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 -128
  %0 = load <16 x i8>, ptr %x, align 1
  store <16 x i8> %0, ptr %z, align 1
  ret ptr %z
}


define ptr @strf32_4(ptr %y, ptr %x) {
; CHECK-LE-LABEL: strf32_4:
; CHECK-LE:       @ %bb.0: @ %entry
; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #4]!
; CHECK-LE-NEXT:    bx lr
;
; CHECK-BE-LABEL: strf32_4:
; CHECK-BE:       @ %bb.0: @ %entry
; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
; CHECK-BE-NEXT:    vstrw.32 q0, [r0, #4]!
; CHECK-BE-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 4
  %0 = load <4 x float>, ptr %x, align 4
  store <4 x float> %0, ptr %z, align 4
  ret ptr %z
}

define ptr @strf16_4(ptr %y, ptr %x) {
; CHECK-LE-LABEL: strf16_4:
; CHECK-LE:       @ %bb.0: @ %entry
; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #4]!
; CHECK-LE-NEXT:    bx lr
;
; CHECK-BE-LABEL: strf16_4:
; CHECK-BE:       @ %bb.0: @ %entry
; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
; CHECK-BE-NEXT:    vstrh.16 q0, [r0, #4]!
; CHECK-BE-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 4
  %0 = load <8 x half>, ptr %x, align 2
  store <8 x half> %0, ptr %z, align 2
  ret ptr %z
}

define ptr @strwi32_align1(ptr %y, ptr %x) {
; CHECK-LE-LABEL: strwi32_align1:
; CHECK-LE:       @ %bb.0: @ %entry
; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #3]!
; CHECK-LE-NEXT:    bx lr
;
; CHECK-BE-LABEL: strwi32_align1:
; CHECK-BE:       @ %bb.0: @ %entry
; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
; CHECK-BE-NEXT:    vrev32.8 q0, q0
; CHECK-BE-NEXT:    vstrb.8 q0, [r0, #3]
; CHECK-BE-NEXT:    adds r0, #3
; CHECK-BE-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 3
  %0 = load <4 x i32>, ptr %x, align 4
  store <4 x i32> %0, ptr %z, align 1
  ret ptr %z
}

define ptr @strhi16_align1(ptr %y, ptr %x) {
; CHECK-LE-LABEL: strhi16_align1:
; CHECK-LE:       @ %bb.0: @ %entry
; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #3]!
; CHECK-LE-NEXT:    bx lr
;
; CHECK-BE-LABEL: strhi16_align1:
; CHECK-BE:       @ %bb.0: @ %entry
; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
; CHECK-BE-NEXT:    vrev16.8 q0, q0
; CHECK-BE-NEXT:    vstrb.8 q0, [r0, #3]
; CHECK-BE-NEXT:    adds r0, #3
; CHECK-BE-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 3
  %0 = load <8 x i16>, ptr %x, align 2
  store <8 x i16> %0, ptr %z, align 1
  ret ptr %z
}

define ptr @strhi32_align1(ptr %y, ptr %x) {
; CHECK-LABEL: strhi32_align1:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    .pad #8
; CHECK-NEXT:    sub sp, #8
; CHECK-NEXT:    vldrw.u32 q0, [r1]
; CHECK-NEXT:    mov r1, sp
; CHECK-NEXT:    vstrh.32 q0, [r1]
; CHECK-NEXT:    ldrd r1, r2, [sp]
; CHECK-NEXT:    str r1, [r0, #3]!
; CHECK-NEXT:    str r2, [r0, #4]
; CHECK-NEXT:    add sp, #8
; CHECK-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 3
  %0 = load <4 x i32>, ptr %x, align 4
  %1 = trunc <4 x i32> %0 to <4 x i16>
  store <4 x i16> %1, ptr %z, align 1
  ret ptr %z
}

define ptr @strf32_align1(ptr %y, ptr %x) {
; CHECK-LE-LABEL: strf32_align1:
; CHECK-LE:       @ %bb.0: @ %entry
; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #3]!
; CHECK-LE-NEXT:    bx lr
;
; CHECK-BE-LABEL: strf32_align1:
; CHECK-BE:       @ %bb.0: @ %entry
; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
; CHECK-BE-NEXT:    vrev32.8 q0, q0
; CHECK-BE-NEXT:    vstrb.8 q0, [r0, #3]
; CHECK-BE-NEXT:    adds r0, #3
; CHECK-BE-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 3
  %0 = load <4 x float>, ptr %x, align 4
  store <4 x float> %0, ptr %z, align 1
  ret ptr %z
}

define ptr @strf16_align1(ptr %y, ptr %x) {
; CHECK-LE-LABEL: strf16_align1:
; CHECK-LE:       @ %bb.0: @ %entry
; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #3]!
; CHECK-LE-NEXT:    bx lr
;
; CHECK-BE-LABEL: strf16_align1:
; CHECK-BE:       @ %bb.0: @ %entry
; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
; CHECK-BE-NEXT:    vrev16.8 q0, q0
; CHECK-BE-NEXT:    vstrb.8 q0, [r0, #3]
; CHECK-BE-NEXT:    adds r0, #3
; CHECK-BE-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 3
  %0 = load <8 x half>, ptr %x, align 2
  store <8 x half> %0, ptr %z, align 1
  ret ptr %z
}

define ptr @strf16_align8(ptr %y, ptr %x) {
; CHECK-LE-LABEL: strf16_align8:
; CHECK-LE:       @ %bb.0: @ %entry
; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
; CHECK-LE-NEXT:    vstrb.8 q0, [r0, #16]!
; CHECK-LE-NEXT:    bx lr
;
; CHECK-BE-LABEL: strf16_align8:
; CHECK-BE:       @ %bb.0: @ %entry
; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
; CHECK-BE-NEXT:    vstrh.16 q0, [r0, #16]!
; CHECK-BE-NEXT:    bx lr
entry:
  %z = getelementptr inbounds i8, ptr %y, i32 16
  %0 = load <8 x i16>, ptr %x, align 2
  store <8 x i16> %0, ptr %z, align 8
  ret ptr %z
}