; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=arm64-apple-macosx -o - %s | FileCheck %s
; RUN: llc -mtriple=aarch64_be -o - %s | FileCheck --check-prefix BE %s
define <16 x i8> @load_v3i8(ptr %src) {
; CHECK-LABEL: load_v3i8:
; CHECK: ; %bb.0:
; CHECK-NEXT: ldrb w8, [x0, #2]
; CHECK-NEXT: ldrh w9, [x0]
; CHECK-NEXT: orr w8, w9, w8, lsl #16
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: ret
;
; BE-LABEL: load_v3i8:
; BE: // %bb.0:
; BE-NEXT: sub sp, sp, #16
; BE-NEXT: .cfi_def_cfa_offset 16
; BE-NEXT: ldrh w8, [x0]
; BE-NEXT: strh w8, [sp, #12]
; BE-NEXT: ldr s0, [sp, #12]
; BE-NEXT: rev32 v0.8b, v0.8b
; BE-NEXT: ushll v0.8h, v0.8b, #0
; BE-NEXT: umov w8, v0.h[0]
; BE-NEXT: umov w9, v0.h[1]
; BE-NEXT: fmov s0, w8
; BE-NEXT: add x8, x0, #2
; BE-NEXT: mov v0.b[1], w9
; BE-NEXT: ld1 { v0.b }[2], [x8]
; BE-NEXT: rev64 v0.16b, v0.16b
; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; BE-NEXT: add sp, sp, #16
; BE-NEXT: ret
%l = load <3 x i8>, ptr %src, align 1
%s = shufflevector <3 x i8> poison, <3 x i8> %l, <16 x i32> <i32 3, i32 4, i32 5, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <16 x i8> %s
}
define <4 x i32> @load_v3i8_to_4xi32(ptr %src) {
; CHECK-LABEL: load_v3i8_to_4xi32:
; CHECK: ; %bb.0:
; CHECK-NEXT: ldrb w8, [x0, #2]
; CHECK-NEXT: ldrh w9, [x0]
; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
; CHECK-NEXT: orr w8, w9, w8, lsl #16
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: zip1.8b v0, v0, v0
; CHECK-NEXT: ushll.4s v0, v0, #0
; CHECK-NEXT: and.16b v0, v0, v1
; CHECK-NEXT: ret
;
; BE-LABEL: load_v3i8_to_4xi32:
; BE: // %bb.0:
; BE-NEXT: sub sp, sp, #16
; BE-NEXT: .cfi_def_cfa_offset 16
; BE-NEXT: ldrh w8, [x0]
; BE-NEXT: movi v1.2d, #0x0000ff000000ff
; BE-NEXT: strh w8, [sp, #12]
; BE-NEXT: ldr s0, [sp, #12]
; BE-NEXT: ldrsb w8, [x0, #2]
; BE-NEXT: rev32 v0.8b, v0.8b
; BE-NEXT: ushll v0.8h, v0.8b, #0
; BE-NEXT: mov v0.h[1], v0.h[1]
; BE-NEXT: mov v0.h[2], w8
; BE-NEXT: ushll v0.4s, v0.4h, #0
; BE-NEXT: and v0.16b, v0.16b, v1.16b
; BE-NEXT: rev64 v0.4s, v0.4s
; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; BE-NEXT: add sp, sp, #16
; BE-NEXT: ret
%l = load <3 x i8>, ptr %src, align 1
%s = shufflevector <3 x i8> poison, <3 x i8> %l, <4 x i32> <i32 3, i32 4, i32 5, i32 undef>
%e = zext <4 x i8> %s to <4 x i32>
ret <4 x i32> %e
}
define <4 x i32> @load_v3i8_to_4xi32_align_2(ptr %src) {
; CHECK-LABEL: load_v3i8_to_4xi32_align_2:
; CHECK: ; %bb.0:
; CHECK-NEXT: ldrb w8, [x0, #2]
; CHECK-NEXT: ldrh w9, [x0]
; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
; CHECK-NEXT: orr w8, w9, w8, lsl #16
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: zip1.8b v0, v0, v0
; CHECK-NEXT: ushll.4s v0, v0, #0
; CHECK-NEXT: and.16b v0, v0, v1
; CHECK-NEXT: ret
;
; BE-LABEL: load_v3i8_to_4xi32_align_2:
; BE: // %bb.0:
; BE-NEXT: sub sp, sp, #16
; BE-NEXT: .cfi_def_cfa_offset 16
; BE-NEXT: ldrh w8, [x0]
; BE-NEXT: movi v1.2d, #0x0000ff000000ff
; BE-NEXT: strh w8, [sp, #12]
; BE-NEXT: ldr s0, [sp, #12]
; BE-NEXT: ldrsb w8, [x0, #2]
; BE-NEXT: rev32 v0.8b, v0.8b
; BE-NEXT: ushll v0.8h, v0.8b, #0
; BE-NEXT: mov v0.h[1], v0.h[1]
; BE-NEXT: mov v0.h[2], w8
; BE-NEXT: ushll v0.4s, v0.4h, #0
; BE-NEXT: and v0.16b, v0.16b, v1.16b
; BE-NEXT: rev64 v0.4s, v0.4s
; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; BE-NEXT: add sp, sp, #16
; BE-NEXT: ret
%l = load <3 x i8>, ptr %src, align 2
%s = shufflevector <3 x i8> poison, <3 x i8> %l, <4 x i32> <i32 3, i32 4, i32 5, i32 undef>
%e = zext <4 x i8> %s to <4 x i32>
ret <4 x i32> %e
}
define <4 x i32> @load_v3i8_to_4xi32_align_4(ptr %src) {
; CHECK-LABEL: load_v3i8_to_4xi32_align_4:
; CHECK: ; %bb.0:
; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
; CHECK-NEXT: zip1.8b v0, v0, v0
; CHECK-NEXT: ushll.4s v0, v0, #0
; CHECK-NEXT: and.16b v0, v0, v1
; CHECK-NEXT: ret
;
; BE-LABEL: load_v3i8_to_4xi32_align_4:
; BE: // %bb.0:
; BE-NEXT: ldr s0, [x0]
; BE-NEXT: movi v1.2d, #0x0000ff000000ff
; BE-NEXT: rev32 v0.8b, v0.8b
; BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
; BE-NEXT: ushll v0.4s, v0.4h, #0
; BE-NEXT: and v0.16b, v0.16b, v1.16b
; BE-NEXT: rev64 v0.4s, v0.4s
; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; BE-NEXT: ret
%l = load <3 x i8>, ptr %src, align 4
%s = shufflevector <3 x i8> poison, <3 x i8> %l, <4 x i32> <i32 3, i32 4, i32 5, i32 undef>
%e = zext <4 x i8> %s to <4 x i32>
ret <4 x i32> %e
}
define <4 x i32> @load_v3i8_to_4xi32_const_offset_1(ptr %src) {
; CHECK-LABEL: load_v3i8_to_4xi32_const_offset_1:
; CHECK: ; %bb.0:
; CHECK-NEXT: ldrb w8, [x0, #3]
; CHECK-NEXT: ldurh w9, [x0, #1]
; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
; CHECK-NEXT: orr w8, w9, w8, lsl #16
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: zip1.8b v0, v0, v0
; CHECK-NEXT: ushll.4s v0, v0, #0
; CHECK-NEXT: and.16b v0, v0, v1
; CHECK-NEXT: ret
;
; BE-LABEL: load_v3i8_to_4xi32_const_offset_1:
; BE: // %bb.0:
; BE-NEXT: sub sp, sp, #16
; BE-NEXT: .cfi_def_cfa_offset 16
; BE-NEXT: ldurh w8, [x0, #1]
; BE-NEXT: movi v1.2d, #0x0000ff000000ff
; BE-NEXT: strh w8, [sp, #12]
; BE-NEXT: ldr s0, [sp, #12]
; BE-NEXT: ldrsb w8, [x0, #3]
; BE-NEXT: rev32 v0.8b, v0.8b
; BE-NEXT: ushll v0.8h, v0.8b, #0
; BE-NEXT: mov v0.h[1], v0.h[1]
; BE-NEXT: mov v0.h[2], w8
; BE-NEXT: ushll v0.4s, v0.4h, #0
; BE-NEXT: and v0.16b, v0.16b, v1.16b
; BE-NEXT: rev64 v0.4s, v0.4s
; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; BE-NEXT: add sp, sp, #16
; BE-NEXT: ret
%src.1 = getelementptr inbounds i8, ptr %src, i64 1
%l = load <3 x i8>, ptr %src.1, align 1
%s = shufflevector <3 x i8> poison, <3 x i8> %l, <4 x i32> <i32 3, i32 4, i32 5, i32 undef>
%e = zext <4 x i8> %s to <4 x i32>
ret <4 x i32> %e
}
define <4 x i32> @load_v3i8_to_4xi32_const_offset_3(ptr %src) {
; CHECK-LABEL: load_v3i8_to_4xi32_const_offset_3:
; CHECK: ; %bb.0:
; CHECK-NEXT: ldrb w8, [x0, #5]
; CHECK-NEXT: ldurh w9, [x0, #3]
; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
; CHECK-NEXT: orr w8, w9, w8, lsl #16
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: zip1.8b v0, v0, v0
; CHECK-NEXT: ushll.4s v0, v0, #0
; CHECK-NEXT: and.16b v0, v0, v1
; CHECK-NEXT: ret
;
; BE-LABEL: load_v3i8_to_4xi32_const_offset_3:
; BE: // %bb.0:
; BE-NEXT: sub sp, sp, #16
; BE-NEXT: .cfi_def_cfa_offset 16
; BE-NEXT: ldurh w8, [x0, #3]
; BE-NEXT: movi v1.2d, #0x0000ff000000ff
; BE-NEXT: strh w8, [sp, #12]
; BE-NEXT: ldr s0, [sp, #12]
; BE-NEXT: ldrsb w8, [x0, #5]
; BE-NEXT: rev32 v0.8b, v0.8b
; BE-NEXT: ushll v0.8h, v0.8b, #0
; BE-NEXT: mov v0.h[1], v0.h[1]
; BE-NEXT: mov v0.h[2], w8
; BE-NEXT: ushll v0.4s, v0.4h, #0
; BE-NEXT: and v0.16b, v0.16b, v1.16b
; BE-NEXT: rev64 v0.4s, v0.4s
; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; BE-NEXT: add sp, sp, #16
; BE-NEXT: ret
%src.3 = getelementptr inbounds i8, ptr %src, i64 3
%l = load <3 x i8>, ptr %src.3, align 1
%s = shufflevector <3 x i8> poison, <3 x i8> %l, <4 x i32> <i32 3, i32 4, i32 5, i32 undef>
%e = zext <4 x i8> %s to <4 x i32>
ret <4 x i32> %e
}
define <4 x i32> @volatile_load_v3i8_to_4xi32(ptr %src) {
; CHECK-LABEL: volatile_load_v3i8_to_4xi32:
; CHECK: ; %bb.0:
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldrh w8, [x0]
; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
; CHECK-NEXT: strh w8, [sp, #12]
; CHECK-NEXT: ldr s0, [sp, #12]
; CHECK-NEXT: ldrsb w8, [x0, #2]
; CHECK-NEXT: ushll.8h v0, v0, #0
; CHECK-NEXT: mov.h v0[1], v0[1]
; CHECK-NEXT: mov.h v0[2], w8
; CHECK-NEXT: ushll.4s v0, v0, #0
; CHECK-NEXT: and.16b v0, v0, v1
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
;
; BE-LABEL: volatile_load_v3i8_to_4xi32:
; BE: // %bb.0:
; BE-NEXT: sub sp, sp, #16
; BE-NEXT: .cfi_def_cfa_offset 16
; BE-NEXT: ldrh w8, [x0]
; BE-NEXT: movi v1.2d, #0x0000ff000000ff
; BE-NEXT: strh w8, [sp, #12]
; BE-NEXT: ldr s0, [sp, #12]
; BE-NEXT: ldrsb w8, [x0, #2]
; BE-NEXT: rev32 v0.8b, v0.8b
; BE-NEXT: ushll v0.8h, v0.8b, #0
; BE-NEXT: mov v0.h[1], v0.h[1]
; BE-NEXT: mov v0.h[2], w8
; BE-NEXT: ushll v0.4s, v0.4h, #0
; BE-NEXT: and v0.16b, v0.16b, v1.16b
; BE-NEXT: rev64 v0.4s, v0.4s
; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; BE-NEXT: add sp, sp, #16
; BE-NEXT: ret
%l = load volatile <3 x i8>, ptr %src, align 1
%s = shufflevector <3 x i8> poison, <3 x i8> %l, <4 x i32> <i32 3, i32 4, i32 5, i32 undef>
%e = zext <4 x i8> %s to <4 x i32>
ret <4 x i32> %e
}
define <3 x i32> @load_v3i32(ptr %src) {
; CHECK-LABEL: load_v3i32:
; CHECK: ; %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: add x8, x0, #8
; CHECK-NEXT: ld1.s { v0 }[2], [x8]
; CHECK-NEXT: ret
;
; BE-LABEL: load_v3i32:
; BE: // %bb.0:
; BE-NEXT: ldr d0, [x0]
; BE-NEXT: add x8, x0, #8
; BE-NEXT: rev64 v0.4s, v0.4s
; BE-NEXT: ld1 { v0.s }[2], [x8]
; BE-NEXT: rev64 v0.4s, v0.4s
; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; BE-NEXT: ret
%l = load <3 x i32>, ptr %src, align 1
ret <3 x i32> %l
}
define <3 x i32> @load_v3i8_zext_to_3xi32(ptr %src) {
; CHECK-LABEL: load_v3i8_zext_to_3xi32:
; CHECK: ; %bb.0:
; CHECK-NEXT: ldrb w8, [x0, #2]
; CHECK-NEXT: ldrh w9, [x0]
; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
; CHECK-NEXT: orr w8, w9, w8, lsl #16
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: zip1.8b v0, v0, v0
; CHECK-NEXT: ushll.4s v0, v0, #0
; CHECK-NEXT: and.16b v0, v0, v1
; CHECK-NEXT: ret
;
; BE-LABEL: load_v3i8_zext_to_3xi32:
; BE: // %bb.0:
; BE-NEXT: sub sp, sp, #16
; BE-NEXT: .cfi_def_cfa_offset 16
; BE-NEXT: ldrh w8, [x0]
; BE-NEXT: movi v1.2d, #0x0000ff000000ff
; BE-NEXT: strh w8, [sp, #12]
; BE-NEXT: add x8, x0, #2
; BE-NEXT: ldr s0, [sp, #12]
; BE-NEXT: rev32 v0.8b, v0.8b
; BE-NEXT: ushll v0.8h, v0.8b, #0
; BE-NEXT: ld1 { v0.b }[4], [x8]
; BE-NEXT: ushll v0.4s, v0.4h, #0
; BE-NEXT: and v0.16b, v0.16b, v1.16b
; BE-NEXT: rev64 v0.4s, v0.4s
; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; BE-NEXT: add sp, sp, #16
; BE-NEXT: ret
%l = load <3 x i8>, ptr %src, align 1
%e = zext <3 x i8> %l to <3 x i32>
ret <3 x i32> %e
}
define <3 x i32> @load_v3i8_sext_to_3xi32(ptr %src) {
; CHECK-LABEL: load_v3i8_sext_to_3xi32:
; CHECK: ; %bb.0:
; CHECK-NEXT: ldrb w8, [x0, #2]
; CHECK-NEXT: ldrh w9, [x0]
; CHECK-NEXT: orr w8, w9, w8, lsl #16
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: zip1.8b v0, v0, v0
; CHECK-NEXT: ushll.4s v0, v0, #0
; CHECK-NEXT: shl.4s v0, v0, #24
; CHECK-NEXT: sshr.4s v0, v0, #24
; CHECK-NEXT: ret
;
; BE-LABEL: load_v3i8_sext_to_3xi32:
; BE: // %bb.0:
; BE-NEXT: sub sp, sp, #16
; BE-NEXT: .cfi_def_cfa_offset 16
; BE-NEXT: ldrh w8, [x0]
; BE-NEXT: strh w8, [sp, #12]
; BE-NEXT: add x8, x0, #2
; BE-NEXT: ldr s0, [sp, #12]
; BE-NEXT: rev32 v0.8b, v0.8b
; BE-NEXT: ushll v0.8h, v0.8b, #0
; BE-NEXT: ld1 { v0.b }[4], [x8]
; BE-NEXT: ushll v0.4s, v0.4h, #0
; BE-NEXT: shl v0.4s, v0.4s, #24
; BE-NEXT: sshr v0.4s, v0.4s, #24
; BE-NEXT: rev64 v0.4s, v0.4s
; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; BE-NEXT: add sp, sp, #16
; BE-NEXT: ret
%l = load <3 x i8>, ptr %src, align 1
%e = sext <3 x i8> %l to <3 x i32>
ret <3 x i32> %e
}
define void @store_trunc_from_64bits(ptr %src, ptr %dst) {
; CHECK-LABEL: store_trunc_from_64bits:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: ldr w8, [x0]
; CHECK-NEXT: add x9, x0, #4
; CHECK-NEXT: ld1r.4h { v0 }, [x9]
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: strb w8, [x1]
; CHECK-NEXT: add x8, x1, #1
; CHECK-NEXT: st1.b { v1 }[2], [x8]
; CHECK-NEXT: add x8, x1, #2
; CHECK-NEXT: st1.b { v0 }[4], [x8]
; CHECK-NEXT: ret
;
; BE-LABEL: store_trunc_from_64bits:
; BE: // %bb.0: // %entry
; BE-NEXT: sub sp, sp, #16
; BE-NEXT: .cfi_def_cfa_offset 16
; BE-NEXT: ldr s0, [x0]
; BE-NEXT: ldrh w8, [x0, #4]
; BE-NEXT: rev32 v0.4h, v0.4h
; BE-NEXT: mov v0.h[2], w8
; BE-NEXT: uzp1 v0.8b, v0.8b, v0.8b
; BE-NEXT: rev32 v0.16b, v0.16b
; BE-NEXT: str s0, [sp, #12]
; BE-NEXT: ldrh w9, [sp, #12]
; BE-NEXT: strb w8, [x1, #2]
; BE-NEXT: strh w9, [x1]
; BE-NEXT: add sp, sp, #16
; BE-NEXT: ret
entry:
%l = load <3 x i16>, ptr %src, align 1
%t = trunc <3 x i16> %l to <3 x i8>
store <3 x i8> %t, ptr %dst, align 1
ret void
}
define void @store_trunc_add_from_64bits(ptr %src, ptr %dst) {
; CHECK-LABEL: store_trunc_add_from_64bits:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: add x9, x0, #4
; CHECK-NEXT: Lloh0:
; CHECK-NEXT: adrp x8, lCPI11_0@PAGE
; CHECK-NEXT: Lloh1:
; CHECK-NEXT: ldr d1, [x8, lCPI11_0@PAGEOFF]
; CHECK-NEXT: add x8, x1, #1
; CHECK-NEXT: ld1.h { v0 }[2], [x9]
; CHECK-NEXT: add x9, x1, #2
; CHECK-NEXT: add.4h v0, v0, v1
; CHECK-NEXT: st1.b { v0 }[2], [x8]
; CHECK-NEXT: st1.b { v0 }[4], [x9]
; CHECK-NEXT: st1.b { v0 }[0], [x1]
; CHECK-NEXT: ret
; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1
;
; BE-LABEL: store_trunc_add_from_64bits:
; BE: // %bb.0: // %entry
; BE-NEXT: sub sp, sp, #16
; BE-NEXT: .cfi_def_cfa_offset 16
; BE-NEXT: ldr s0, [x0]
; BE-NEXT: add x8, x0, #4
; BE-NEXT: rev32 v0.4h, v0.4h
; BE-NEXT: ld1 { v0.h }[2], [x8]
; BE-NEXT: adrp x8, .LCPI11_0
; BE-NEXT: add x8, x8, :lo12:.LCPI11_0
; BE-NEXT: ld1 { v1.4h }, [x8]
; BE-NEXT: add v0.4h, v0.4h, v1.4h
; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b
; BE-NEXT: umov w8, v0.h[2]
; BE-NEXT: rev32 v1.16b, v1.16b
; BE-NEXT: str s1, [sp, #12]
; BE-NEXT: ldrh w9, [sp, #12]
; BE-NEXT: strb w8, [x1, #2]
; BE-NEXT: strh w9, [x1]
; BE-NEXT: add sp, sp, #16
; BE-NEXT: ret
entry:
%l = load <3 x i16>, ptr %src, align 1
%a = add <3 x i16> %l, <i16 3, i16 4, i16 5>
%t = trunc <3 x i16> %a to <3 x i8>
store <3 x i8> %t, ptr %dst, align 1
ret void
}
define void @load_ext_to_64bits(ptr %src, ptr %dst) {
; CHECK-LABEL: load_ext_to_64bits:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: ldrb w8, [x0, #2]
; CHECK-NEXT: ldrh w9, [x0]
; CHECK-NEXT: orr w8, w9, w8, lsl #16
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: add x8, x1, #4
; CHECK-NEXT: zip1.8b v0, v0, v0
; CHECK-NEXT: bic.4h v0, #255, lsl #8
; CHECK-NEXT: st1.h { v0 }[2], [x8]
; CHECK-NEXT: str s0, [x1]
; CHECK-NEXT: ret
;
; BE-LABEL: load_ext_to_64bits:
; BE: // %bb.0: // %entry
; BE-NEXT: sub sp, sp, #16
; BE-NEXT: .cfi_def_cfa_offset 16
; BE-NEXT: ldrh w8, [x0]
; BE-NEXT: strh w8, [sp, #12]
; BE-NEXT: add x8, x0, #2
; BE-NEXT: ldr s0, [sp, #12]
; BE-NEXT: rev32 v0.8b, v0.8b
; BE-NEXT: ushll v0.8h, v0.8b, #0
; BE-NEXT: ld1 { v0.b }[4], [x8]
; BE-NEXT: add x8, x1, #4
; BE-NEXT: bic v0.4h, #255, lsl #8
; BE-NEXT: rev32 v1.8h, v0.8h
; BE-NEXT: st1 { v0.h }[2], [x8]
; BE-NEXT: str s1, [x1]
; BE-NEXT: add sp, sp, #16
; BE-NEXT: ret
entry:
%l = load <3 x i8>, ptr %src, align 1
%e = zext <3 x i8> %l to <3 x i16>
store <3 x i16> %e, ptr %dst, align 1
ret void
}
define void @load_ext_to_64bits_default_align(ptr %src, ptr %dst) {
; CHECK-LABEL: load_ext_to_64bits_default_align:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: add x8, x1, #4
; CHECK-NEXT: zip1.8b v0, v0, v0
; CHECK-NEXT: bic.4h v0, #255, lsl #8
; CHECK-NEXT: st1.h { v0 }[2], [x8]
; CHECK-NEXT: str s0, [x1]
; CHECK-NEXT: ret
;
; BE-LABEL: load_ext_to_64bits_default_align:
; BE: // %bb.0: // %entry
; BE-NEXT: ldr s0, [x0]
; BE-NEXT: add x8, x1, #4
; BE-NEXT: rev32 v0.8b, v0.8b
; BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
; BE-NEXT: bic v0.4h, #255, lsl #8
; BE-NEXT: rev32 v1.8h, v0.8h
; BE-NEXT: st1 { v0.h }[2], [x8]
; BE-NEXT: str s1, [x1]
; BE-NEXT: ret
entry:
%l = load <3 x i8>, ptr %src
%e = zext <3 x i8> %l to <3 x i16>
store <3 x i16> %e, ptr %dst, align 1
ret void
}
define void @load_ext_to_64bits_align_4(ptr %src, ptr %dst) {
; CHECK-LABEL: load_ext_to_64bits_align_4:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: add x8, x1, #4
; CHECK-NEXT: zip1.8b v0, v0, v0
; CHECK-NEXT: bic.4h v0, #255, lsl #8
; CHECK-NEXT: st1.h { v0 }[2], [x8]
; CHECK-NEXT: str s0, [x1]
; CHECK-NEXT: ret
;
; BE-LABEL: load_ext_to_64bits_align_4:
; BE: // %bb.0: // %entry
; BE-NEXT: ldr s0, [x0]
; BE-NEXT: add x8, x1, #4
; BE-NEXT: rev32 v0.8b, v0.8b
; BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
; BE-NEXT: bic v0.4h, #255, lsl #8
; BE-NEXT: rev32 v1.8h, v0.8h
; BE-NEXT: st1 { v0.h }[2], [x8]
; BE-NEXT: str s1, [x1]
; BE-NEXT: ret
entry:
%l = load <3 x i8>, ptr %src, align 4
%e = zext <3 x i8> %l to <3 x i16>
store <3 x i16> %e, ptr %dst, align 1
ret void
}
define void @load_ext_add_to_64bits(ptr %src, ptr %dst) {
; CHECK-LABEL: load_ext_add_to_64bits:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: ldrb w9, [x0, #2]
; CHECK-NEXT: ldrh w10, [x0]
; CHECK-NEXT: Lloh2:
; CHECK-NEXT: adrp x8, lCPI15_0@PAGE
; CHECK-NEXT: Lloh3:
; CHECK-NEXT: ldr d1, [x8, lCPI15_0@PAGEOFF]
; CHECK-NEXT: add x8, x1, #4
; CHECK-NEXT: orr w9, w10, w9, lsl #16
; CHECK-NEXT: fmov s0, w9
; CHECK-NEXT: zip1.8b v0, v0, v0
; CHECK-NEXT: bic.4h v0, #255, lsl #8
; CHECK-NEXT: add.4h v0, v0, v1
; CHECK-NEXT: st1.h { v0 }[2], [x8]
; CHECK-NEXT: str s0, [x1]
; CHECK-NEXT: ret
; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh3
;
; BE-LABEL: load_ext_add_to_64bits:
; BE: // %bb.0: // %entry
; BE-NEXT: sub sp, sp, #16
; BE-NEXT: .cfi_def_cfa_offset 16
; BE-NEXT: ldrh w8, [x0]
; BE-NEXT: strh w8, [sp, #12]
; BE-NEXT: add x8, x0, #2
; BE-NEXT: ldr s0, [sp, #12]
; BE-NEXT: rev32 v0.8b, v0.8b
; BE-NEXT: ushll v0.8h, v0.8b, #0
; BE-NEXT: ld1 { v0.b }[4], [x8]
; BE-NEXT: adrp x8, .LCPI15_0
; BE-NEXT: add x8, x8, :lo12:.LCPI15_0
; BE-NEXT: ld1 { v1.4h }, [x8]
; BE-NEXT: add x8, x1, #4
; BE-NEXT: bic v0.4h, #255, lsl #8
; BE-NEXT: add v0.4h, v0.4h, v1.4h
; BE-NEXT: rev32 v1.8h, v0.8h
; BE-NEXT: st1 { v0.h }[2], [x8]
; BE-NEXT: str s1, [x1]
; BE-NEXT: add sp, sp, #16
; BE-NEXT: ret
entry:
%l = load <3 x i8>, ptr %src, align 1
%e = zext <3 x i8> %l to <3 x i16>
%a = add <3 x i16> %e, <i16 3, i16 4, i16 5>
store <3 x i16> %a, ptr %dst, align 1
ret void
}
define void @shift_trunc_store(ptr %src, ptr %dst) {
; CHECK-LABEL: shift_trunc_store:
; CHECK: ; %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: add x8, x1, #1
; CHECK-NEXT: add x9, x1, #2
; CHECK-NEXT: ushr.4s v0, v0, #16
; CHECK-NEXT: st1.b { v0 }[4], [x8]
; CHECK-NEXT: st1.b { v0 }[8], [x9]
; CHECK-NEXT: st1.b { v0 }[0], [x1]
; CHECK-NEXT: ret
;
; BE-LABEL: shift_trunc_store:
; BE: // %bb.0:
; BE-NEXT: sub sp, sp, #16
; BE-NEXT: .cfi_def_cfa_offset 16
; BE-NEXT: ld1 { v0.4s }, [x0]
; BE-NEXT: shrn v0.4h, v0.4s, #16
; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b
; BE-NEXT: umov w8, v0.h[2]
; BE-NEXT: rev32 v1.16b, v1.16b
; BE-NEXT: str s1, [sp, #12]
; BE-NEXT: ldrh w9, [sp, #12]
; BE-NEXT: strb w8, [x1, #2]
; BE-NEXT: strh w9, [x1]
; BE-NEXT: add sp, sp, #16
; BE-NEXT: ret
%l = load <3 x i32>, ptr %src
%s = lshr <3 x i32> %l, <i32 16, i32 16, i32 16>
%t = trunc <3 x i32> %s to <3 x i8>
store <3 x i8> %t, ptr %dst, align 1
ret void
}
define void @shift_trunc_store_default_align(ptr %src, ptr %dst) {
; CHECK-LABEL: shift_trunc_store_default_align:
; CHECK: ; %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: add x8, x1, #1
; CHECK-NEXT: add x9, x1, #2
; CHECK-NEXT: ushr.4s v0, v0, #16
; CHECK-NEXT: st1.b { v0 }[4], [x8]
; CHECK-NEXT: st1.b { v0 }[8], [x9]
; CHECK-NEXT: st1.b { v0 }[0], [x1]
; CHECK-NEXT: ret
;
; BE-LABEL: shift_trunc_store_default_align:
; BE: // %bb.0:
; BE-NEXT: sub sp, sp, #16
; BE-NEXT: .cfi_def_cfa_offset 16
; BE-NEXT: ld1 { v0.4s }, [x0]
; BE-NEXT: shrn v0.4h, v0.4s, #16
; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b
; BE-NEXT: umov w8, v0.h[2]
; BE-NEXT: rev32 v1.16b, v1.16b
; BE-NEXT: str s1, [sp, #12]
; BE-NEXT: ldrh w9, [sp, #12]
; BE-NEXT: strb w8, [x1, #2]
; BE-NEXT: strh w9, [x1]
; BE-NEXT: add sp, sp, #16
; BE-NEXT: ret
%l = load <3 x i32>, ptr %src
%s = lshr <3 x i32> %l, <i32 16, i32 16, i32 16>
%t = trunc <3 x i32> %s to <3 x i8>
store <3 x i8> %t, ptr %dst
ret void
}
define void @shift_trunc_store_align_4(ptr %src, ptr %dst) {
; CHECK-LABEL: shift_trunc_store_align_4:
; CHECK: ; %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: add x8, x1, #1
; CHECK-NEXT: add x9, x1, #2
; CHECK-NEXT: ushr.4s v0, v0, #16
; CHECK-NEXT: st1.b { v0 }[4], [x8]
; CHECK-NEXT: st1.b { v0 }[8], [x9]
; CHECK-NEXT: st1.b { v0 }[0], [x1]
; CHECK-NEXT: ret
;
; BE-LABEL: shift_trunc_store_align_4:
; BE: // %bb.0:
; BE-NEXT: sub sp, sp, #16
; BE-NEXT: .cfi_def_cfa_offset 16
; BE-NEXT: ld1 { v0.4s }, [x0]
; BE-NEXT: shrn v0.4h, v0.4s, #16
; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b
; BE-NEXT: umov w8, v0.h[2]
; BE-NEXT: rev32 v1.16b, v1.16b
; BE-NEXT: str s1, [sp, #12]
; BE-NEXT: ldrh w9, [sp, #12]
; BE-NEXT: strb w8, [x1, #2]
; BE-NEXT: strh w9, [x1]
; BE-NEXT: add sp, sp, #16
; BE-NEXT: ret
%l = load <3 x i32>, ptr %src
%s = lshr <3 x i32> %l, <i32 16, i32 16, i32 16>
%t = trunc <3 x i32> %s to <3 x i8>
store <3 x i8> %t, ptr %dst, align 4
ret void
}
define void @shift_trunc_store_const_offset_1(ptr %src, ptr %dst) {
; CHECK-LABEL: shift_trunc_store_const_offset_1:
; CHECK: ; %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: add x8, x1, #2
; CHECK-NEXT: add x9, x1, #3
; CHECK-NEXT: ushr.4s v0, v0, #16
; CHECK-NEXT: st1.b { v0 }[4], [x8]
; CHECK-NEXT: add x8, x1, #1
; CHECK-NEXT: st1.b { v0 }[8], [x9]
; CHECK-NEXT: st1.b { v0 }[0], [x8]
; CHECK-NEXT: ret
;
; BE-LABEL: shift_trunc_store_const_offset_1:
; BE: // %bb.0:
; BE-NEXT: sub sp, sp, #16
; BE-NEXT: .cfi_def_cfa_offset 16
; BE-NEXT: ld1 { v0.4s }, [x0]
; BE-NEXT: shrn v0.4h, v0.4s, #16
; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b
; BE-NEXT: umov w8, v0.h[2]
; BE-NEXT: rev32 v1.16b, v1.16b
; BE-NEXT: str s1, [sp, #12]
; BE-NEXT: ldrh w9, [sp, #12]
; BE-NEXT: strb w8, [x1, #3]
; BE-NEXT: sturh w9, [x1, #1]
; BE-NEXT: add sp, sp, #16
; BE-NEXT: ret
%l = load <3 x i32>, ptr %src
%s = lshr <3 x i32> %l, <i32 16, i32 16, i32 16>
%t = trunc <3 x i32> %s to <3 x i8>
%dst.1 = getelementptr inbounds i8, ptr %dst, i64 1
store <3 x i8> %t, ptr %dst.1, align 1
ret void
}
define void @shift_trunc_store_const_offset_3(ptr %src, ptr %dst) {
; CHECK-LABEL: shift_trunc_store_const_offset_3:
; CHECK: ; %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: add x8, x1, #4
; CHECK-NEXT: add x9, x1, #5
; CHECK-NEXT: ushr.4s v0, v0, #16
; CHECK-NEXT: st1.b { v0 }[4], [x8]
; CHECK-NEXT: add x8, x1, #3
; CHECK-NEXT: st1.b { v0 }[8], [x9]
; CHECK-NEXT: st1.b { v0 }[0], [x8]
; CHECK-NEXT: ret
;
; BE-LABEL: shift_trunc_store_const_offset_3:
; BE: // %bb.0:
; BE-NEXT: sub sp, sp, #16
; BE-NEXT: .cfi_def_cfa_offset 16
; BE-NEXT: ld1 { v0.4s }, [x0]
; BE-NEXT: shrn v0.4h, v0.4s, #16
; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b
; BE-NEXT: umov w8, v0.h[2]
; BE-NEXT: rev32 v1.16b, v1.16b
; BE-NEXT: str s1, [sp, #12]
; BE-NEXT: ldrh w9, [sp, #12]
; BE-NEXT: strb w8, [x1, #5]
; BE-NEXT: sturh w9, [x1, #3]
; BE-NEXT: add sp, sp, #16
; BE-NEXT: ret
%l = load <3 x i32>, ptr %src
%s = lshr <3 x i32> %l, <i32 16, i32 16, i32 16>
%t = trunc <3 x i32> %s to <3 x i8>
%dst.3 = getelementptr inbounds i8, ptr %dst, i64 3
store <3 x i8> %t, ptr %dst.3, align 1
ret void
}
define void @shift_trunc_volatile_store(ptr %src, ptr %dst) {
; CHECK-LABEL: shift_trunc_volatile_store:
; CHECK: ; %bb.0:
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: shrn.4h v0, v0, #16
; CHECK-NEXT: uzp1.8b v1, v0, v0
; CHECK-NEXT: umov.h w8, v0[2]
; CHECK-NEXT: str s1, [sp, #12]
; CHECK-NEXT: ldrh w9, [sp, #12]
; CHECK-NEXT: strb w8, [x1, #2]
; CHECK-NEXT: strh w9, [x1]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
;
; BE-LABEL: shift_trunc_volatile_store:
; BE: // %bb.0:
; BE-NEXT: sub sp, sp, #16
; BE-NEXT: .cfi_def_cfa_offset 16
; BE-NEXT: ld1 { v0.4s }, [x0]
; BE-NEXT: shrn v0.4h, v0.4s, #16
; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b
; BE-NEXT: umov w8, v0.h[2]
; BE-NEXT: rev32 v1.16b, v1.16b
; BE-NEXT: str s1, [sp, #12]
; BE-NEXT: ldrh w9, [sp, #12]
; BE-NEXT: strb w8, [x1, #2]
; BE-NEXT: strh w9, [x1]
; BE-NEXT: add sp, sp, #16
; BE-NEXT: ret
%l = load <3 x i32>, ptr %src
%s = lshr <3 x i32> %l, <i32 16, i32 16, i32 16>
%t = trunc <3 x i32> %s to <3 x i8>
store volatile <3 x i8> %t, ptr %dst, align 1
ret void
}
define void @load_v3i8_zext_to_3xi32_add_trunc_store(ptr %src) {
; CHECK-LABEL: load_v3i8_zext_to_3xi32_add_trunc_store:
; CHECK: ; %bb.0:
; CHECK-NEXT: ldrb w9, [x0, #2]
; CHECK-NEXT: ldrh w10, [x0]
; CHECK-NEXT: Lloh4:
; CHECK-NEXT: adrp x8, lCPI22_0@PAGE
; CHECK-NEXT: Lloh5:
; CHECK-NEXT: ldr q1, [x8, lCPI22_0@PAGEOFF]
; CHECK-NEXT: add x8, x0, #2
; CHECK-NEXT: orr w9, w10, w9, lsl #16
; CHECK-NEXT: fmov s0, w9
; CHECK-NEXT: zip1.8b v0, v0, v0
; CHECK-NEXT: uaddw.4s v0, v1, v0
; CHECK-NEXT: st1.b { v0 }[8], [x8]
; CHECK-NEXT: add x8, x0, #1
; CHECK-NEXT: st1.b { v0 }[0], [x0]
; CHECK-NEXT: st1.b { v0 }[4], [x8]
; CHECK-NEXT: ret
; CHECK-NEXT: .loh AdrpLdr Lloh4, Lloh5
;
; BE-LABEL: load_v3i8_zext_to_3xi32_add_trunc_store:
; BE: // %bb.0:
; BE-NEXT: sub sp, sp, #16
; BE-NEXT: .cfi_def_cfa_offset 16
; BE-NEXT: ldrh w9, [x0]
; BE-NEXT: adrp x8, .LCPI22_0
; BE-NEXT: add x8, x8, :lo12:.LCPI22_0
; BE-NEXT: ld1 { v1.4h }, [x8]
; BE-NEXT: strh w9, [sp, #12]
; BE-NEXT: add x9, x0, #2
; BE-NEXT: ldr s0, [sp, #12]
; BE-NEXT: rev32 v0.8b, v0.8b
; BE-NEXT: ushll v0.8h, v0.8b, #0
; BE-NEXT: ld1 { v0.b }[4], [x9]
; BE-NEXT: add v0.4h, v0.4h, v1.4h
; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b
; BE-NEXT: umov w8, v0.h[2]
; BE-NEXT: rev32 v1.16b, v1.16b
; BE-NEXT: str s1, [sp, #8]
; BE-NEXT: ldrh w9, [sp, #8]
; BE-NEXT: strb w8, [x0, #2]
; BE-NEXT: strh w9, [x0]
; BE-NEXT: add sp, sp, #16
; BE-NEXT: ret
%l = load <3 x i8>, ptr %src, align 1
%e = zext <3 x i8> %l to <3 x i32>
%add = add <3 x i32> %e, <i32 1, i32 2, i32 3>
%t = trunc <3 x i32> %add to <3 x i8>
store <3 x i8> %t, ptr %src
ret void
}
define void @load_v3i8_sext_to_3xi32_add_trunc_store(ptr %src) {
; CHECK-LABEL: load_v3i8_sext_to_3xi32_add_trunc_store:
; CHECK: ; %bb.0:
; CHECK-NEXT: ldrb w9, [x0, #2]
; CHECK-NEXT: ldrh w10, [x0]
; CHECK-NEXT: Lloh6:
; CHECK-NEXT: adrp x8, lCPI23_0@PAGE
; CHECK-NEXT: Lloh7:
; CHECK-NEXT: ldr q1, [x8, lCPI23_0@PAGEOFF]
; CHECK-NEXT: add x8, x0, #2
; CHECK-NEXT: orr w9, w10, w9, lsl #16
; CHECK-NEXT: fmov s0, w9
; CHECK-NEXT: zip1.8b v0, v0, v0
; CHECK-NEXT: uaddw.4s v0, v1, v0
; CHECK-NEXT: st1.b { v0 }[8], [x8]
; CHECK-NEXT: add x8, x0, #1
; CHECK-NEXT: st1.b { v0 }[0], [x0]
; CHECK-NEXT: st1.b { v0 }[4], [x8]
; CHECK-NEXT: ret
; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh7
;
; BE-LABEL: load_v3i8_sext_to_3xi32_add_trunc_store:
; BE: // %bb.0:
; BE-NEXT: sub sp, sp, #16
; BE-NEXT: .cfi_def_cfa_offset 16
; BE-NEXT: ldrh w9, [x0]
; BE-NEXT: adrp x8, .LCPI23_0
; BE-NEXT: add x8, x8, :lo12:.LCPI23_0
; BE-NEXT: ld1 { v1.4h }, [x8]
; BE-NEXT: strh w9, [sp, #12]
; BE-NEXT: add x9, x0, #2
; BE-NEXT: ldr s0, [sp, #12]
; BE-NEXT: rev32 v0.8b, v0.8b
; BE-NEXT: ushll v0.8h, v0.8b, #0
; BE-NEXT: ld1 { v0.b }[4], [x9]
; BE-NEXT: add v0.4h, v0.4h, v1.4h
; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b
; BE-NEXT: umov w8, v0.h[2]
; BE-NEXT: rev32 v1.16b, v1.16b
; BE-NEXT: str s1, [sp, #8]
; BE-NEXT: ldrh w9, [sp, #8]
; BE-NEXT: strb w8, [x0, #2]
; BE-NEXT: strh w9, [x0]
; BE-NEXT: add sp, sp, #16
; BE-NEXT: ret
%l = load <3 x i8>, ptr %src, align 1
%e = sext <3 x i8> %l to <3 x i32>
%add = add <3 x i32> %e, <i32 1, i32 2, i32 3>
%t = trunc <3 x i32> %add to <3 x i8>
store <3 x i8> %t, ptr %src
ret void
}