llvm/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefix CHECK-LE
; RUN: llc -mtriple=aarch64_be-unknown-linux-gnu < %s | FileCheck %s --check-prefix CHECK-BE

define <2 x i16> @test0(ptr %i16_ptr, i64 %inc) {
; CHECK-LE-LABEL: test0:
; CHECK-LE:       // %bb.0:
; CHECK-LE-NEXT:    ld1 { v0.h }[0], [x0]
; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-LE-NEXT:    ret
;
; CHECK-BE-LABEL: test0:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ld1 { v0.h }[0], [x0]
; CHECK-BE-NEXT:    rev64 v0.2s, v0.2s
; CHECK-BE-NEXT:    ret
  %i_0 = load i16, ptr %i16_ptr
  %v0 = insertelement <2 x i16> undef, i16 %i_0, i32 0
  ret <2 x i16> %v0
}

define <2 x i16> @test1(ptr %v2i16_ptr) {
; CHECK-LE-LABEL: test1:
; CHECK-LE:       // %bb.0:
; CHECK-LE-NEXT:    ld1 { v0.h }[0], [x0]
; CHECK-LE-NEXT:    add x8, x0, #2
; CHECK-LE-NEXT:    ld1 { v0.h }[2], [x8]
; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-LE-NEXT:    ret
;
; CHECK-BE-LABEL: test1:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ld1 { v0.h }[0], [x0]
; CHECK-BE-NEXT:    add x8, x0, #2
; CHECK-BE-NEXT:    ld1 { v0.h }[2], [x8]
; CHECK-BE-NEXT:    rev64 v0.2s, v0.2s
; CHECK-BE-NEXT:    ret
  %v2i16 = load <2 x i16>, ptr %v2i16_ptr
  ret <2 x i16> %v2i16
}

define <2 x i16> @test2(ptr %i16_ptr, i64 %inc) {
; CHECK-LE-LABEL: test2:
; CHECK-LE:       // %bb.0:
; CHECK-LE-NEXT:    ld1 { v0.h }[0], [x0]
; CHECK-LE-NEXT:    add x8, x0, x1, lsl #1
; CHECK-LE-NEXT:    ld1 { v0.h }[2], [x8]
; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-LE-NEXT:    ret
;
; CHECK-BE-LABEL: test2:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ld1 { v0.h }[0], [x0]
; CHECK-BE-NEXT:    add x8, x0, x1, lsl #1
; CHECK-BE-NEXT:    ld1 { v0.h }[2], [x8]
; CHECK-BE-NEXT:    rev64 v0.2s, v0.2s
; CHECK-BE-NEXT:    ret
  %i_0 = load i16, ptr %i16_ptr
  %i16_ptr_inc = getelementptr i16, ptr %i16_ptr, i64 %inc
  %i_1 = load i16, ptr %i16_ptr_inc
  %v0 = insertelement <2 x i16> undef, i16 %i_0, i32 0
  %v1 = insertelement <2 x i16> %v0, i16 %i_1, i32 1
  ret <2 x i16> %v1
}

define <2 x i8> @test3(ptr %v2i8_ptr) {
; CHECK-LE-LABEL: test3:
; CHECK-LE:       // %bb.0:
; CHECK-LE-NEXT:    ld1 { v0.b }[0], [x0]
; CHECK-LE-NEXT:    add x8, x0, #1
; CHECK-LE-NEXT:    ld1 { v0.b }[4], [x8]
; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-LE-NEXT:    ret
;
; CHECK-BE-LABEL: test3:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ld1 { v0.b }[0], [x0]
; CHECK-BE-NEXT:    add x8, x0, #1
; CHECK-BE-NEXT:    ld1 { v0.b }[4], [x8]
; CHECK-BE-NEXT:    rev64 v0.2s, v0.2s
; CHECK-BE-NEXT:    ret
  %v2i8 = load <2 x i8>, ptr %v2i8_ptr
  ret <2 x i8> %v2i8
}

define <4 x i8> @test4(ptr %v4i8_ptr) {
; CHECK-LE-LABEL: test4:
; CHECK-LE:       // %bb.0:
; CHECK-LE-NEXT:    ldr s0, [x0]
; CHECK-LE-NEXT:    ushll v0.8h, v0.8b, #0
; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-LE-NEXT:    ret
;
; CHECK-BE-LABEL: test4:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldr s0, [x0]
; CHECK-BE-NEXT:    rev32 v0.8b, v0.8b
; CHECK-BE-NEXT:    ushll v0.8h, v0.8b, #0
; CHECK-BE-NEXT:    rev64 v0.4h, v0.4h
; CHECK-BE-NEXT:    ret
  %v4i8 = load <4 x i8>, ptr %v4i8_ptr
  ret <4 x i8> %v4i8
}

define <2 x i32> @fsext_v2i32(ptr %a) {
; CHECK-LE-LABEL: fsext_v2i32:
; CHECK-LE:       // %bb.0:
; CHECK-LE-NEXT:    ldrsb w8, [x0]
; CHECK-LE-NEXT:    ldrsb w9, [x0, #1]
; CHECK-LE-NEXT:    fmov s0, w8
; CHECK-LE-NEXT:    mov v0.s[1], w9
; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-LE-NEXT:    ret
;
; CHECK-BE-LABEL: fsext_v2i32:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldrsb w8, [x0]
; CHECK-BE-NEXT:    ldrsb w9, [x0, #1]
; CHECK-BE-NEXT:    fmov s0, w8
; CHECK-BE-NEXT:    mov v0.s[1], w9
; CHECK-BE-NEXT:    rev64 v0.2s, v0.2s
; CHECK-BE-NEXT:    ret
  %x = load <2 x i8>, ptr %a
  %y = sext <2 x i8> %x to <2 x i32>
  ret <2 x i32> %y
}

define <3 x i32> @fsext_v3i32(ptr %a) {
; CHECK-LE-LABEL: fsext_v3i32:
; CHECK-LE:       // %bb.0:
; CHECK-LE-NEXT:    ldr s0, [x0]
; CHECK-LE-NEXT:    zip1 v0.8b, v0.8b, v0.8b
; CHECK-LE-NEXT:    ushll v0.4s, v0.4h, #0
; CHECK-LE-NEXT:    shl v0.4s, v0.4s, #24
; CHECK-LE-NEXT:    sshr v0.4s, v0.4s, #24
; CHECK-LE-NEXT:    ret
;
; CHECK-BE-LABEL: fsext_v3i32:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldr s0, [x0]
; CHECK-BE-NEXT:    rev32 v0.8b, v0.8b
; CHECK-BE-NEXT:    zip1 v0.8b, v0.8b, v0.8b
; CHECK-BE-NEXT:    ushll v0.4s, v0.4h, #0
; CHECK-BE-NEXT:    shl v0.4s, v0.4s, #24
; CHECK-BE-NEXT:    sshr v0.4s, v0.4s, #24
; CHECK-BE-NEXT:    rev64 v0.4s, v0.4s
; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
; CHECK-BE-NEXT:    ret
  %x = load <3 x i8>, ptr %a
  %y = sext <3 x i8> %x to <3 x i32>
  ret <3 x i32> %y
}

define <4 x i32> @fsext_v4i32(ptr %a) {
; CHECK-LE-LABEL: fsext_v4i32:
; CHECK-LE:       // %bb.0:
; CHECK-LE-NEXT:    ldr s0, [x0]
; CHECK-LE-NEXT:    sshll v0.8h, v0.8b, #0
; CHECK-LE-NEXT:    sshll v0.4s, v0.4h, #0
; CHECK-LE-NEXT:    ret
;
; CHECK-BE-LABEL: fsext_v4i32:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldr s0, [x0]
; CHECK-BE-NEXT:    rev32 v0.8b, v0.8b
; CHECK-BE-NEXT:    sshll v0.8h, v0.8b, #0
; CHECK-BE-NEXT:    sshll v0.4s, v0.4h, #0
; CHECK-BE-NEXT:    rev64 v0.4s, v0.4s
; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
; CHECK-BE-NEXT:    ret
  %x = load <4 x i8>, ptr %a
  %y = sext <4 x i8> %x to <4 x i32>
  ret <4 x i32> %y
}

define <8 x i32> @fsext_v8i32(ptr %a) {
; CHECK-LE-LABEL: fsext_v8i32:
; CHECK-LE:       // %bb.0:
; CHECK-LE-NEXT:    ldr d0, [x0]
; CHECK-LE-NEXT:    sshll v0.8h, v0.8b, #0
; CHECK-LE-NEXT:    sshll2 v1.4s, v0.8h, #0
; CHECK-LE-NEXT:    sshll v0.4s, v0.4h, #0
; CHECK-LE-NEXT:    ret
;
; CHECK-BE-LABEL: fsext_v8i32:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ld1 { v0.8b }, [x0]
; CHECK-BE-NEXT:    sshll v0.8h, v0.8b, #0
; CHECK-BE-NEXT:    sshll v1.4s, v0.4h, #0
; CHECK-BE-NEXT:    sshll2 v0.4s, v0.8h, #0
; CHECK-BE-NEXT:    rev64 v0.4s, v0.4s
; CHECK-BE-NEXT:    rev64 v2.4s, v1.4s
; CHECK-BE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
; CHECK-BE-NEXT:    ext v0.16b, v2.16b, v2.16b, #8
; CHECK-BE-NEXT:    ret
  %x = load <8 x i8>, ptr %a
  %y = sext <8 x i8> %x to <8 x i32>
  ret <8 x i32> %y
}

define <4 x i32> @fzext_v4i32(ptr %a) {
; CHECK-LE-LABEL: fzext_v4i32:
; CHECK-LE:       // %bb.0:
; CHECK-LE-NEXT:    ldr s0, [x0]
; CHECK-LE-NEXT:    ushll v0.8h, v0.8b, #0
; CHECK-LE-NEXT:    ushll v0.4s, v0.4h, #0
; CHECK-LE-NEXT:    ret
;
; CHECK-BE-LABEL: fzext_v4i32:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldr s0, [x0]
; CHECK-BE-NEXT:    rev32 v0.8b, v0.8b
; CHECK-BE-NEXT:    ushll v0.8h, v0.8b, #0
; CHECK-BE-NEXT:    ushll v0.4s, v0.4h, #0
; CHECK-BE-NEXT:    rev64 v0.4s, v0.4s
; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
; CHECK-BE-NEXT:    ret
  %x = load <4 x i8>, ptr %a
  %y = zext <4 x i8> %x to <4 x i32>
  ret <4 x i32> %y
}

; TODO: This codegen could just be:
;   ldrb w0, [x0]
;
define i32 @loadExti32(ptr %ref) {
; CHECK-LE-LABEL: loadExti32:
; CHECK-LE:       // %bb.0:
; CHECK-LE-NEXT:    ldr s0, [x0]
; CHECK-LE-NEXT:    ushll v0.8h, v0.8b, #0
; CHECK-LE-NEXT:    umov w8, v0.h[0]
; CHECK-LE-NEXT:    and w0, w8, #0xff
; CHECK-LE-NEXT:    ret
;
; CHECK-BE-LABEL: loadExti32:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldr s0, [x0]
; CHECK-BE-NEXT:    rev32 v0.8b, v0.8b
; CHECK-BE-NEXT:    ushll v0.8h, v0.8b, #0
; CHECK-BE-NEXT:    umov w8, v0.h[0]
; CHECK-BE-NEXT:    and w0, w8, #0xff
; CHECK-BE-NEXT:    ret
  %a = load <4 x i8>, ptr %ref
  %vecext = extractelement <4 x i8> %a, i32 0
  %conv = zext i8 %vecext to i32
  ret i32 %conv
}

define <2 x i16> @fsext_v2i16(ptr %a) {
; CHECK-LE-LABEL: fsext_v2i16:
; CHECK-LE:       // %bb.0:
; CHECK-LE-NEXT:    ldrsb w8, [x0]
; CHECK-LE-NEXT:    ldrsb w9, [x0, #1]
; CHECK-LE-NEXT:    fmov s0, w8
; CHECK-LE-NEXT:    mov v0.s[1], w9
; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-LE-NEXT:    ret
;
; CHECK-BE-LABEL: fsext_v2i16:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldrsb w8, [x0]
; CHECK-BE-NEXT:    ldrsb w9, [x0, #1]
; CHECK-BE-NEXT:    fmov s0, w8
; CHECK-BE-NEXT:    mov v0.s[1], w9
; CHECK-BE-NEXT:    rev64 v0.2s, v0.2s
; CHECK-BE-NEXT:    ret
  %x = load <2 x i8>, ptr %a
  %y = sext <2 x i8> %x to <2 x i16>
  ret <2 x i16> %y
}

define <3 x i16> @fsext_v3i16(ptr %a) {
; CHECK-LE-LABEL: fsext_v3i16:
; CHECK-LE:       // %bb.0:
; CHECK-LE-NEXT:    ldr s0, [x0]
; CHECK-LE-NEXT:    zip1 v0.8b, v0.8b, v0.8b
; CHECK-LE-NEXT:    shl v0.4h, v0.4h, #8
; CHECK-LE-NEXT:    sshr v0.4h, v0.4h, #8
; CHECK-LE-NEXT:    ret
;
; CHECK-BE-LABEL: fsext_v3i16:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldr s0, [x0]
; CHECK-BE-NEXT:    rev32 v0.8b, v0.8b
; CHECK-BE-NEXT:    zip1 v0.8b, v0.8b, v0.8b
; CHECK-BE-NEXT:    shl v0.4h, v0.4h, #8
; CHECK-BE-NEXT:    sshr v0.4h, v0.4h, #8
; CHECK-BE-NEXT:    rev64 v0.4h, v0.4h
; CHECK-BE-NEXT:    ret
  %x = load <3 x i8>, ptr %a
  %y = sext <3 x i8> %x to <3 x i16>
  ret <3 x i16> %y
}

define <4 x i16> @fsext_v4i16(ptr %a) {
; CHECK-LE-LABEL: fsext_v4i16:
; CHECK-LE:       // %bb.0:
; CHECK-LE-NEXT:    ldr s0, [x0]
; CHECK-LE-NEXT:    sshll v0.8h, v0.8b, #0
; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-LE-NEXT:    ret
;
; CHECK-BE-LABEL: fsext_v4i16:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldr s0, [x0]
; CHECK-BE-NEXT:    rev32 v0.8b, v0.8b
; CHECK-BE-NEXT:    sshll v0.8h, v0.8b, #0
; CHECK-BE-NEXT:    rev64 v0.4h, v0.4h
; CHECK-BE-NEXT:    ret
  %x = load <4 x i8>, ptr %a
  %y = sext <4 x i8> %x to <4 x i16>
  ret <4 x i16> %y
}

define <8 x i16> @fsext_v8i16(ptr %a) {
; CHECK-LE-LABEL: fsext_v8i16:
; CHECK-LE:       // %bb.0:
; CHECK-LE-NEXT:    ldr d0, [x0]
; CHECK-LE-NEXT:    sshll v0.8h, v0.8b, #0
; CHECK-LE-NEXT:    ret
;
; CHECK-BE-LABEL: fsext_v8i16:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ld1 { v0.8b }, [x0]
; CHECK-BE-NEXT:    sshll v0.8h, v0.8b, #0
; CHECK-BE-NEXT:    rev64 v0.8h, v0.8h
; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
; CHECK-BE-NEXT:    ret
  %x = load <8 x i8>, ptr %a
  %y = sext <8 x i8> %x to <8 x i16>
  ret <8 x i16> %y
}

define <16 x i16> @fsext_v16i16(ptr %a) {
; CHECK-LE-LABEL: fsext_v16i16:
; CHECK-LE:       // %bb.0:
; CHECK-LE-NEXT:    ldr q0, [x0]
; CHECK-LE-NEXT:    sshll2 v1.8h, v0.16b, #0
; CHECK-LE-NEXT:    sshll v0.8h, v0.8b, #0
; CHECK-LE-NEXT:    ret
;
; CHECK-BE-LABEL: fsext_v16i16:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ld1 { v0.16b }, [x0]
; CHECK-BE-NEXT:    sshll v1.8h, v0.8b, #0
; CHECK-BE-NEXT:    sshll2 v0.8h, v0.16b, #0
; CHECK-BE-NEXT:    rev64 v0.8h, v0.8h
; CHECK-BE-NEXT:    rev64 v2.8h, v1.8h
; CHECK-BE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
; CHECK-BE-NEXT:    ext v0.16b, v2.16b, v2.16b, #8
; CHECK-BE-NEXT:    ret
  %x = load <16 x i8>, ptr %a
  %y = sext <16 x i8> %x to <16 x i16>
  ret <16 x i16> %y
}

define <4 x i16> @fzext_v4i16(ptr %a) {
; CHECK-LE-LABEL: fzext_v4i16:
; CHECK-LE:       // %bb.0:
; CHECK-LE-NEXT:    ldr s0, [x0]
; CHECK-LE-NEXT:    ushll v0.8h, v0.8b, #0
; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-LE-NEXT:    ret
;
; CHECK-BE-LABEL: fzext_v4i16:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldr s0, [x0]
; CHECK-BE-NEXT:    rev32 v0.8b, v0.8b
; CHECK-BE-NEXT:    ushll v0.8h, v0.8b, #0
; CHECK-BE-NEXT:    rev64 v0.4h, v0.4h
; CHECK-BE-NEXT:    ret
  %x = load <4 x i8>, ptr %a
  %y = zext <4 x i8> %x to <4 x i16>
  ret <4 x i16> %y
}

define <4 x i16> @anyext_v4i16(ptr %a, ptr %b) {
; CHECK-LE-LABEL: anyext_v4i16:
; CHECK-LE:       // %bb.0:
; CHECK-LE-NEXT:    ldr s0, [x0]
; CHECK-LE-NEXT:    ldr s1, [x1]
; CHECK-LE-NEXT:    uaddl v0.8h, v0.8b, v1.8b
; CHECK-LE-NEXT:    shl v0.4h, v0.4h, #8
; CHECK-LE-NEXT:    sshr v0.4h, v0.4h, #8
; CHECK-LE-NEXT:    ret
;
; CHECK-BE-LABEL: anyext_v4i16:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldr s0, [x0]
; CHECK-BE-NEXT:    ldr s1, [x1]
; CHECK-BE-NEXT:    rev32 v0.8b, v0.8b
; CHECK-BE-NEXT:    rev32 v1.8b, v1.8b
; CHECK-BE-NEXT:    uaddl v0.8h, v0.8b, v1.8b
; CHECK-BE-NEXT:    shl v0.4h, v0.4h, #8
; CHECK-BE-NEXT:    sshr v0.4h, v0.4h, #8
; CHECK-BE-NEXT:    rev64 v0.4h, v0.4h
; CHECK-BE-NEXT:    ret
  %x = load <4 x i8>, ptr %a, align 4
  %y = load <4 x i8>, ptr %b, align 4
  %z = add <4 x i8> %x, %y
  %s = sext <4 x i8> %z to <4 x i16>
  ret <4 x i16> %s
}

define <4 x i32> @anyext_v4i32(ptr %a, ptr %b) {
; CHECK-LE-LABEL: anyext_v4i32:
; CHECK-LE:       // %bb.0:
; CHECK-LE-NEXT:    ldr s0, [x0]
; CHECK-LE-NEXT:    ldr s1, [x1]
; CHECK-LE-NEXT:    uaddl v0.8h, v0.8b, v1.8b
; CHECK-LE-NEXT:    ushll v0.4s, v0.4h, #0
; CHECK-LE-NEXT:    shl v0.4s, v0.4s, #24
; CHECK-LE-NEXT:    sshr v0.4s, v0.4s, #24
; CHECK-LE-NEXT:    ret
;
; CHECK-BE-LABEL: anyext_v4i32:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldr s0, [x0]
; CHECK-BE-NEXT:    ldr s1, [x1]
; CHECK-BE-NEXT:    rev32 v0.8b, v0.8b
; CHECK-BE-NEXT:    rev32 v1.8b, v1.8b
; CHECK-BE-NEXT:    uaddl v0.8h, v0.8b, v1.8b
; CHECK-BE-NEXT:    ushll v0.4s, v0.4h, #0
; CHECK-BE-NEXT:    shl v0.4s, v0.4s, #24
; CHECK-BE-NEXT:    sshr v0.4s, v0.4s, #24
; CHECK-BE-NEXT:    rev64 v0.4s, v0.4s
; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
; CHECK-BE-NEXT:    ret
  %x = load <4 x i8>, ptr %a, align 4
  %y = load <4 x i8>, ptr %b, align 4
  %z = add <4 x i8> %x, %y
  %s = sext <4 x i8> %z to <4 x i32>
  ret <4 x i32> %s
}

define <4 x i8> @bitcast(i32 %0) {
; CHECK-LE-LABEL: bitcast:
; CHECK-LE:       // %bb.0:
; CHECK-LE-NEXT:    fmov s0, w0
; CHECK-LE-NEXT:    zip1 v0.8b, v0.8b, v0.8b
; CHECK-LE-NEXT:    ret
;
; CHECK-BE-LABEL: bitcast:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    fmov s0, w0
; CHECK-BE-NEXT:    rev32 v0.8b, v0.8b
; CHECK-BE-NEXT:    zip1 v0.8b, v0.8b, v0.8b
; CHECK-BE-NEXT:    rev64 v0.4h, v0.4h
; CHECK-BE-NEXT:    ret
  %2 = bitcast i32 %0 to <4 x i8>
  ret <4 x i8> %2
}

define <4 x i8> @strict_align_aligned(ptr %v4i8_ptr) "target-features"="+strict-align" {
; CHECK-LE-LABEL: strict_align_aligned:
; CHECK-LE:       // %bb.0:
; CHECK-LE-NEXT:    ldr s0, [x0]
; CHECK-LE-NEXT:    ushll v0.8h, v0.8b, #0
; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-LE-NEXT:    ret
;
; CHECK-BE-LABEL: strict_align_aligned:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ldr s0, [x0]
; CHECK-BE-NEXT:    rev32 v0.8b, v0.8b
; CHECK-BE-NEXT:    ushll v0.8h, v0.8b, #0
; CHECK-BE-NEXT:    rev64 v0.4h, v0.4h
; CHECK-BE-NEXT:    ret
  %v4i8 = load <4 x i8>, ptr %v4i8_ptr, align 4
  ret <4 x i8> %v4i8
}

define <4 x i8> @strict_align_unaligned(ptr %v4i8_ptr) "target-features"="+strict-align" {
; CHECK-LE-LABEL: strict_align_unaligned:
; CHECK-LE:       // %bb.0:
; CHECK-LE-NEXT:    ld1 { v0.b }[0], [x0]
; CHECK-LE-NEXT:    add x8, x0, #1
; CHECK-LE-NEXT:    ld1 { v0.b }[2], [x8]
; CHECK-LE-NEXT:    add x8, x0, #2
; CHECK-LE-NEXT:    ld1 { v0.b }[4], [x8]
; CHECK-LE-NEXT:    add x8, x0, #3
; CHECK-LE-NEXT:    ld1 { v0.b }[6], [x8]
; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-LE-NEXT:    ret
;
; CHECK-BE-LABEL: strict_align_unaligned:
; CHECK-BE:       // %bb.0:
; CHECK-BE-NEXT:    ld1 { v0.b }[0], [x0]
; CHECK-BE-NEXT:    add x8, x0, #1
; CHECK-BE-NEXT:    ld1 { v0.b }[2], [x8]
; CHECK-BE-NEXT:    add x8, x0, #2
; CHECK-BE-NEXT:    ld1 { v0.b }[4], [x8]
; CHECK-BE-NEXT:    add x8, x0, #3
; CHECK-BE-NEXT:    ld1 { v0.b }[6], [x8]
; CHECK-BE-NEXT:    rev64 v0.4h, v0.4h
; CHECK-BE-NEXT:    ret
  %v4i8 = load <4 x i8>, ptr %v4i8_ptr, align 1
  ret <4 x i8> %v4i8
}