; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64 -mattr=+sve -aarch64-sve-vector-bits-min=256 | FileCheck %s
define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) {
; CHECK-LABEL: llrint_v1i64_v1f16:
; CHECK: // %bb.0:
; CHECK-NEXT: frintx h0, h0
; CHECK-NEXT: fcvtzs x8, h0
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: ret
%a = call <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half> %x)
ret <1 x i64> %a
}
declare <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half>)
define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) {
; CHECK-LABEL: llrint_v1i64_v2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov h1, v0.h[1]
; CHECK-NEXT: frintx h0, h0
; CHECK-NEXT: frintx h1, h1
; CHECK-NEXT: fcvtzs x8, h0
; CHECK-NEXT: fcvtzs x9, h1
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: mov v0.d[1], x9
; CHECK-NEXT: ret
%a = call <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half> %x)
ret <2 x i64> %a
}
declare <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half>)
define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
; CHECK-LABEL: llrint_v4i64_v4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: frintx v0.4h, v0.4h
; CHECK-NEXT: mov h1, v0.h[2]
; CHECK-NEXT: mov h2, v0.h[3]
; CHECK-NEXT: mov h3, v0.h[1]
; CHECK-NEXT: fcvtzs x9, h0
; CHECK-NEXT: fcvtzs x8, h1
; CHECK-NEXT: fcvtzs x10, h2
; CHECK-NEXT: fcvtzs x11, h3
; CHECK-NEXT: fmov d0, x9
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: mov v0.d[1], x11
; CHECK-NEXT: mov v1.d[1], x10
; CHECK-NEXT: ret
%a = call <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half> %x)
ret <4 x i64> %a
}
declare <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half>)
define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
; CHECK-LABEL: llrint_v8i64_v8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: frintx v0.4h, v0.4h
; CHECK-NEXT: frintx v1.4h, v1.4h
; CHECK-NEXT: mov h4, v0.h[2]
; CHECK-NEXT: mov h2, v0.h[1]
; CHECK-NEXT: mov h7, v0.h[3]
; CHECK-NEXT: fcvtzs x8, h0
; CHECK-NEXT: mov h3, v1.h[2]
; CHECK-NEXT: mov h5, v1.h[3]
; CHECK-NEXT: mov h6, v1.h[1]
; CHECK-NEXT: fcvtzs x11, h1
; CHECK-NEXT: fcvtzs x12, h4
; CHECK-NEXT: fcvtzs x9, h2
; CHECK-NEXT: fcvtzs x15, h7
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: fcvtzs x10, h3
; CHECK-NEXT: fcvtzs x13, h5
; CHECK-NEXT: fcvtzs x14, h6
; CHECK-NEXT: fmov d1, x12
; CHECK-NEXT: fmov d2, x11
; CHECK-NEXT: mov v0.d[1], x9
; CHECK-NEXT: fmov d3, x10
; CHECK-NEXT: mov v1.d[1], x15
; CHECK-NEXT: mov v2.d[1], x14
; CHECK-NEXT: mov v3.d[1], x13
; CHECK-NEXT: ret
%a = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> %x)
ret <8 x i64> %a
}
declare <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half>)
define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
; CHECK-LABEL: llrint_v16i64_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8
; CHECK-NEXT: frintx v1.4h, v1.4h
; CHECK-NEXT: frintx v3.4h, v0.4h
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: frintx v2.4h, v2.4h
; CHECK-NEXT: mov h4, v1.h[2]
; CHECK-NEXT: mov h5, v3.h[2]
; CHECK-NEXT: frintx v0.4h, v0.4h
; CHECK-NEXT: mov h6, v3.h[1]
; CHECK-NEXT: fcvtzs x9, h3
; CHECK-NEXT: mov h16, v1.h[1]
; CHECK-NEXT: fcvtzs x12, h1
; CHECK-NEXT: mov h3, v3.h[3]
; CHECK-NEXT: mov h17, v1.h[3]
; CHECK-NEXT: mov h7, v2.h[3]
; CHECK-NEXT: fcvtzs x8, h4
; CHECK-NEXT: fcvtzs x10, h5
; CHECK-NEXT: mov h4, v2.h[2]
; CHECK-NEXT: mov h5, v0.h[2]
; CHECK-NEXT: fcvtzs x11, h6
; CHECK-NEXT: mov h6, v0.h[3]
; CHECK-NEXT: fcvtzs x15, h2
; CHECK-NEXT: mov h2, v2.h[1]
; CHECK-NEXT: fcvtzs x14, h0
; CHECK-NEXT: fcvtzs x17, h3
; CHECK-NEXT: fcvtzs x0, h17
; CHECK-NEXT: fcvtzs x13, h7
; CHECK-NEXT: mov h7, v0.h[1]
; CHECK-NEXT: fmov d0, x9
; CHECK-NEXT: fcvtzs x16, h4
; CHECK-NEXT: fcvtzs x9, h5
; CHECK-NEXT: fmov d4, x12
; CHECK-NEXT: fcvtzs x12, h16
; CHECK-NEXT: fmov d1, x10
; CHECK-NEXT: fcvtzs x10, h6
; CHECK-NEXT: fmov d5, x8
; CHECK-NEXT: fcvtzs x8, h2
; CHECK-NEXT: fmov d2, x14
; CHECK-NEXT: fcvtzs x18, h7
; CHECK-NEXT: fmov d6, x15
; CHECK-NEXT: mov v0.d[1], x11
; CHECK-NEXT: fmov d3, x9
; CHECK-NEXT: fmov d7, x16
; CHECK-NEXT: mov v1.d[1], x17
; CHECK-NEXT: mov v4.d[1], x12
; CHECK-NEXT: mov v5.d[1], x0
; CHECK-NEXT: mov v6.d[1], x8
; CHECK-NEXT: mov v2.d[1], x18
; CHECK-NEXT: mov v3.d[1], x10
; CHECK-NEXT: mov v7.d[1], x13
; CHECK-NEXT: ret
%a = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> %x)
ret <16 x i64> %a
}
declare <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half>)
define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
; CHECK-LABEL: llrint_v32i64_v32f16:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: sub x9, sp, #272
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0
; CHECK-NEXT: .cfi_def_cfa w29, 16
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: frintx v5.4h, v0.4h
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8
; CHECK-NEXT: ext v17.16b, v2.16b, v2.16b, #8
; CHECK-NEXT: frintx v1.4h, v1.4h
; CHECK-NEXT: frintx v2.4h, v2.4h
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: mov h6, v5.h[3]
; CHECK-NEXT: frintx v0.4h, v0.4h
; CHECK-NEXT: mov h7, v5.h[2]
; CHECK-NEXT: mov h16, v5.h[1]
; CHECK-NEXT: frintx v4.4h, v4.4h
; CHECK-NEXT: fcvtzs x12, h5
; CHECK-NEXT: ext v5.16b, v3.16b, v3.16b, #8
; CHECK-NEXT: frintx v17.4h, v17.4h
; CHECK-NEXT: frintx v3.4h, v3.4h
; CHECK-NEXT: fcvtzs x9, h6
; CHECK-NEXT: mov h6, v0.h[3]
; CHECK-NEXT: fcvtzs x10, h7
; CHECK-NEXT: mov h7, v0.h[2]
; CHECK-NEXT: fcvtzs x11, h16
; CHECK-NEXT: mov h16, v0.h[1]
; CHECK-NEXT: fcvtzs x13, h6
; CHECK-NEXT: mov h6, v4.h[3]
; CHECK-NEXT: stp x10, x9, [sp, #48]
; CHECK-NEXT: fcvtzs x9, h7
; CHECK-NEXT: mov h7, v4.h[2]
; CHECK-NEXT: fcvtzs x10, h16
; CHECK-NEXT: mov h16, v4.h[1]
; CHECK-NEXT: stp x12, x11, [sp, #32]
; CHECK-NEXT: fcvtzs x11, h0
; CHECK-NEXT: frintx v0.4h, v5.4h
; CHECK-NEXT: mov h5, v17.h[3]
; CHECK-NEXT: fcvtzs x12, h6
; CHECK-NEXT: mov h6, v17.h[2]
; CHECK-NEXT: stp x9, x13, [sp, #16]
; CHECK-NEXT: fcvtzs x13, h7
; CHECK-NEXT: mov h7, v17.h[1]
; CHECK-NEXT: fcvtzs x9, h16
; CHECK-NEXT: stp x11, x10, [sp]
; CHECK-NEXT: fcvtzs x10, h4
; CHECK-NEXT: fcvtzs x11, h5
; CHECK-NEXT: mov h4, v0.h[3]
; CHECK-NEXT: mov h5, v0.h[2]
; CHECK-NEXT: stp x13, x12, [sp, #80]
; CHECK-NEXT: fcvtzs x12, h6
; CHECK-NEXT: fcvtzs x13, h7
; CHECK-NEXT: mov h6, v0.h[1]
; CHECK-NEXT: stp x10, x9, [sp, #64]
; CHECK-NEXT: fcvtzs x9, h17
; CHECK-NEXT: mov h7, v1.h[3]
; CHECK-NEXT: fcvtzs x10, h4
; CHECK-NEXT: mov h4, v1.h[2]
; CHECK-NEXT: stp x12, x11, [sp, #144]
; CHECK-NEXT: fcvtzs x11, h5
; CHECK-NEXT: mov h5, v1.h[1]
; CHECK-NEXT: fcvtzs x12, h6
; CHECK-NEXT: stp x9, x13, [sp, #128]
; CHECK-NEXT: fcvtzs x9, h0
; CHECK-NEXT: fcvtzs x13, h7
; CHECK-NEXT: mov h0, v2.h[3]
; CHECK-NEXT: stp x11, x10, [sp, #208]
; CHECK-NEXT: fcvtzs x10, h4
; CHECK-NEXT: mov h4, v2.h[2]
; CHECK-NEXT: fcvtzs x11, h5
; CHECK-NEXT: mov h5, v2.h[1]
; CHECK-NEXT: stp x9, x12, [sp, #192]
; CHECK-NEXT: fcvtzs x9, h1
; CHECK-NEXT: fcvtzs x12, h0
; CHECK-NEXT: mov h0, v3.h[3]
; CHECK-NEXT: mov h1, v3.h[2]
; CHECK-NEXT: stp x10, x13, [sp, #112]
; CHECK-NEXT: fcvtzs x10, h4
; CHECK-NEXT: mov h4, v3.h[1]
; CHECK-NEXT: fcvtzs x13, h5
; CHECK-NEXT: stp x9, x11, [sp, #96]
; CHECK-NEXT: fcvtzs x9, h2
; CHECK-NEXT: fcvtzs x11, h0
; CHECK-NEXT: stp x10, x12, [sp, #176]
; CHECK-NEXT: fcvtzs x10, h1
; CHECK-NEXT: fcvtzs x12, h4
; CHECK-NEXT: stp x9, x13, [sp, #160]
; CHECK-NEXT: fcvtzs x9, h3
; CHECK-NEXT: stp x10, x11, [sp, #240]
; CHECK-NEXT: add x10, sp, #64
; CHECK-NEXT: stp x9, x12, [sp, #224]
; CHECK-NEXT: add x9, sp, #32
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9]
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x10]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x9]
; CHECK-NEXT: add x9, sp, #224
; CHECK-NEXT: add x10, sp, #128
; CHECK-NEXT: ld1d { z3.d }, p0/z, [x9]
; CHECK-NEXT: add x9, sp, #160
; CHECK-NEXT: ld1d { z4.d }, p0/z, [x10]
; CHECK-NEXT: add x10, sp, #96
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x9]
; CHECK-NEXT: add x9, sp, #192
; CHECK-NEXT: ld1d { z6.d }, p0/z, [x10]
; CHECK-NEXT: mov x10, #24 // =0x18
; CHECK-NEXT: ld1d { z7.d }, p0/z, [x9]
; CHECK-NEXT: mov x9, #16 // =0x10
; CHECK-NEXT: st1d { z3.d }, p0, [x8, x10, lsl #3]
; CHECK-NEXT: st1d { z5.d }, p0, [x8, x9, lsl #3]
; CHECK-NEXT: mov x9, #8 // =0x8
; CHECK-NEXT: st1d { z6.d }, p0, [x8, x9, lsl #3]
; CHECK-NEXT: mov x9, #28 // =0x1c
; CHECK-NEXT: st1d { z7.d }, p0, [x8, x9, lsl #3]
; CHECK-NEXT: mov x9, #20 // =0x14
; CHECK-NEXT: st1d { z4.d }, p0, [x8, x9, lsl #3]
; CHECK-NEXT: mov x9, #12 // =0xc
; CHECK-NEXT: st1d { z2.d }, p0, [x8, x9, lsl #3]
; CHECK-NEXT: mov x9, #4 // =0x4
; CHECK-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3]
; CHECK-NEXT: st1d { z0.d }, p0, [x8]
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: ret
%a = call <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half> %x)
ret <32 x i64> %a
}
declare <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half>)
define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
; CHECK-LABEL: llrint_v1i64_v1f32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: frintx s0, s0
; CHECK-NEXT: fcvtzs x8, s0
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: ret
%a = call <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float> %x)
ret <1 x i64> %a
}
declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>)
define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
; CHECK-LABEL: llrint_v2i64_v2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: frintx v0.2s, v0.2s
; CHECK-NEXT: fcvtl v0.2d, v0.2s
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-NEXT: ret
%a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x)
ret <2 x i64> %a
}
declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>)
define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
; CHECK-LABEL: llrint_v4i64_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: frintx v0.4s, v0.4s
; CHECK-NEXT: mov s1, v0.s[2]
; CHECK-NEXT: mov s2, v0.s[3]
; CHECK-NEXT: mov s3, v0.s[1]
; CHECK-NEXT: fcvtzs x9, s0
; CHECK-NEXT: fcvtzs x8, s1
; CHECK-NEXT: fcvtzs x10, s2
; CHECK-NEXT: fcvtzs x11, s3
; CHECK-NEXT: fmov d0, x9
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: mov v0.d[1], x11
; CHECK-NEXT: mov v1.d[1], x10
; CHECK-NEXT: ret
%a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x)
ret <4 x i64> %a
}
declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>)
define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
; CHECK-LABEL: llrint_v8i64_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: frintx v0.4s, v0.4s
; CHECK-NEXT: frintx v1.4s, v1.4s
; CHECK-NEXT: mov s3, v1.s[2]
; CHECK-NEXT: mov s4, v0.s[2]
; CHECK-NEXT: mov s2, v0.s[1]
; CHECK-NEXT: mov s5, v1.s[3]
; CHECK-NEXT: mov s6, v1.s[1]
; CHECK-NEXT: mov s7, v0.s[3]
; CHECK-NEXT: fcvtzs x8, s0
; CHECK-NEXT: fcvtzs x10, s1
; CHECK-NEXT: fcvtzs x11, s3
; CHECK-NEXT: fcvtzs x12, s4
; CHECK-NEXT: fcvtzs x9, s2
; CHECK-NEXT: fcvtzs x13, s5
; CHECK-NEXT: fcvtzs x14, s6
; CHECK-NEXT: fcvtzs x15, s7
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: fmov d2, x10
; CHECK-NEXT: fmov d1, x12
; CHECK-NEXT: fmov d3, x11
; CHECK-NEXT: mov v0.d[1], x9
; CHECK-NEXT: mov v2.d[1], x14
; CHECK-NEXT: mov v1.d[1], x15
; CHECK-NEXT: mov v3.d[1], x13
; CHECK-NEXT: ret
%a = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> %x)
ret <8 x i64> %a
}
declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>)
define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
; CHECK-LABEL: llrint_v16i64_v16f32:
; CHECK: // %bb.0:
; CHECK-NEXT: frintx v3.4s, v3.4s
; CHECK-NEXT: frintx v2.4s, v2.4s
; CHECK-NEXT: frintx v1.4s, v1.4s
; CHECK-NEXT: frintx v0.4s, v0.4s
; CHECK-NEXT: mov s4, v3.s[2]
; CHECK-NEXT: mov s5, v2.s[2]
; CHECK-NEXT: mov s6, v1.s[2]
; CHECK-NEXT: mov s7, v0.s[2]
; CHECK-NEXT: fcvtzs x10, s1
; CHECK-NEXT: fcvtzs x11, s0
; CHECK-NEXT: mov s16, v0.s[1]
; CHECK-NEXT: mov s17, v1.s[1]
; CHECK-NEXT: mov s18, v3.s[1]
; CHECK-NEXT: fcvtzs x14, s3
; CHECK-NEXT: fcvtzs x16, s2
; CHECK-NEXT: fcvtzs x8, s4
; CHECK-NEXT: mov s4, v2.s[1]
; CHECK-NEXT: fcvtzs x9, s5
; CHECK-NEXT: mov s5, v1.s[3]
; CHECK-NEXT: fcvtzs x12, s6
; CHECK-NEXT: mov s6, v0.s[3]
; CHECK-NEXT: fcvtzs x13, s7
; CHECK-NEXT: mov s7, v3.s[3]
; CHECK-NEXT: fmov d0, x11
; CHECK-NEXT: fcvtzs x17, s16
; CHECK-NEXT: fcvtzs x18, s18
; CHECK-NEXT: fcvtzs x15, s4
; CHECK-NEXT: mov s4, v2.s[3]
; CHECK-NEXT: fmov d2, x10
; CHECK-NEXT: fcvtzs x11, s5
; CHECK-NEXT: fcvtzs x10, s6
; CHECK-NEXT: fmov d3, x12
; CHECK-NEXT: fmov d1, x13
; CHECK-NEXT: fcvtzs x12, s17
; CHECK-NEXT: fcvtzs x13, s7
; CHECK-NEXT: fmov d5, x9
; CHECK-NEXT: fmov d6, x14
; CHECK-NEXT: fmov d7, x8
; CHECK-NEXT: fcvtzs x0, s4
; CHECK-NEXT: fmov d4, x16
; CHECK-NEXT: mov v0.d[1], x17
; CHECK-NEXT: mov v1.d[1], x10
; CHECK-NEXT: mov v3.d[1], x11
; CHECK-NEXT: mov v2.d[1], x12
; CHECK-NEXT: mov v6.d[1], x18
; CHECK-NEXT: mov v7.d[1], x13
; CHECK-NEXT: mov v4.d[1], x15
; CHECK-NEXT: mov v5.d[1], x0
; CHECK-NEXT: ret
%a = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> %x)
ret <16 x i64> %a
}
declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>)
define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) {
; CHECK-LABEL: llrint_v32i64_v32f32:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: sub x9, sp, #272
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0
; CHECK-NEXT: .cfi_def_cfa w29, 16
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: frintx v0.4s, v0.4s
; CHECK-NEXT: frintx v1.4s, v1.4s
; CHECK-NEXT: frintx v2.4s, v2.4s
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: mov s16, v0.s[3]
; CHECK-NEXT: mov s17, v0.s[2]
; CHECK-NEXT: mov s18, v0.s[1]
; CHECK-NEXT: fcvtzs x12, s0
; CHECK-NEXT: frintx v0.4s, v3.4s
; CHECK-NEXT: mov s3, v2.s[3]
; CHECK-NEXT: fcvtzs x9, s16
; CHECK-NEXT: mov s16, v1.s[3]
; CHECK-NEXT: fcvtzs x10, s17
; CHECK-NEXT: mov s17, v1.s[2]
; CHECK-NEXT: fcvtzs x11, s18
; CHECK-NEXT: mov s18, v1.s[1]
; CHECK-NEXT: fcvtzs x13, s16
; CHECK-NEXT: stp x10, x9, [sp, #16]
; CHECK-NEXT: mov s16, v2.s[2]
; CHECK-NEXT: fcvtzs x9, s17
; CHECK-NEXT: fcvtzs x10, s18
; CHECK-NEXT: mov s17, v2.s[1]
; CHECK-NEXT: stp x12, x11, [sp]
; CHECK-NEXT: fcvtzs x11, s1
; CHECK-NEXT: frintx v1.4s, v4.4s
; CHECK-NEXT: fcvtzs x12, s3
; CHECK-NEXT: mov s3, v0.s[3]
; CHECK-NEXT: mov s4, v0.s[2]
; CHECK-NEXT: stp x9, x13, [sp, #48]
; CHECK-NEXT: fcvtzs x13, s16
; CHECK-NEXT: fcvtzs x9, s17
; CHECK-NEXT: mov s16, v0.s[1]
; CHECK-NEXT: stp x11, x10, [sp, #32]
; CHECK-NEXT: fcvtzs x10, s2
; CHECK-NEXT: frintx v2.4s, v5.4s
; CHECK-NEXT: fcvtzs x11, s3
; CHECK-NEXT: mov s3, v1.s[3]
; CHECK-NEXT: mov s5, v1.s[1]
; CHECK-NEXT: stp x13, x12, [sp, #80]
; CHECK-NEXT: fcvtzs x12, s4
; CHECK-NEXT: mov s4, v1.s[2]
; CHECK-NEXT: fcvtzs x13, s16
; CHECK-NEXT: stp x10, x9, [sp, #64]
; CHECK-NEXT: fcvtzs x9, s0
; CHECK-NEXT: mov s0, v2.s[3]
; CHECK-NEXT: fcvtzs x10, s3
; CHECK-NEXT: frintx v3.4s, v6.4s
; CHECK-NEXT: stp x12, x11, [sp, #112]
; CHECK-NEXT: fcvtzs x11, s4
; CHECK-NEXT: mov s4, v2.s[2]
; CHECK-NEXT: fcvtzs x12, s5
; CHECK-NEXT: mov s5, v2.s[1]
; CHECK-NEXT: stp x9, x13, [sp, #96]
; CHECK-NEXT: fcvtzs x9, s1
; CHECK-NEXT: fcvtzs x13, s0
; CHECK-NEXT: mov s0, v3.s[3]
; CHECK-NEXT: frintx v1.4s, v7.4s
; CHECK-NEXT: stp x11, x10, [sp, #144]
; CHECK-NEXT: fcvtzs x10, s4
; CHECK-NEXT: mov s4, v3.s[2]
; CHECK-NEXT: fcvtzs x11, s5
; CHECK-NEXT: mov s5, v3.s[1]
; CHECK-NEXT: stp x9, x12, [sp, #128]
; CHECK-NEXT: fcvtzs x9, s2
; CHECK-NEXT: fcvtzs x12, s0
; CHECK-NEXT: mov s0, v1.s[3]
; CHECK-NEXT: mov s2, v1.s[2]
; CHECK-NEXT: stp x10, x13, [sp, #176]
; CHECK-NEXT: fcvtzs x10, s4
; CHECK-NEXT: mov s4, v1.s[1]
; CHECK-NEXT: fcvtzs x13, s5
; CHECK-NEXT: stp x9, x11, [sp, #160]
; CHECK-NEXT: fcvtzs x9, s3
; CHECK-NEXT: fcvtzs x11, s0
; CHECK-NEXT: stp x10, x12, [sp, #208]
; CHECK-NEXT: fcvtzs x10, s2
; CHECK-NEXT: fcvtzs x12, s4
; CHECK-NEXT: stp x9, x13, [sp, #192]
; CHECK-NEXT: fcvtzs x9, s1
; CHECK-NEXT: stp x10, x11, [sp, #240]
; CHECK-NEXT: add x10, sp, #64
; CHECK-NEXT: stp x9, x12, [sp, #224]
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9]
; CHECK-NEXT: add x9, sp, #32
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x10]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x9]
; CHECK-NEXT: add x9, sp, #224
; CHECK-NEXT: add x10, sp, #96
; CHECK-NEXT: ld1d { z3.d }, p0/z, [x9]
; CHECK-NEXT: add x9, sp, #192
; CHECK-NEXT: ld1d { z4.d }, p0/z, [x10]
; CHECK-NEXT: add x10, sp, #160
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x9]
; CHECK-NEXT: add x9, sp, #128
; CHECK-NEXT: ld1d { z6.d }, p0/z, [x10]
; CHECK-NEXT: mov x10, #28 // =0x1c
; CHECK-NEXT: ld1d { z7.d }, p0/z, [x9]
; CHECK-NEXT: mov x9, #24 // =0x18
; CHECK-NEXT: st1d { z3.d }, p0, [x8, x10, lsl #3]
; CHECK-NEXT: st1d { z5.d }, p0, [x8, x9, lsl #3]
; CHECK-NEXT: mov x9, #20 // =0x14
; CHECK-NEXT: st1d { z6.d }, p0, [x8, x9, lsl #3]
; CHECK-NEXT: mov x9, #16 // =0x10
; CHECK-NEXT: st1d { z7.d }, p0, [x8, x9, lsl #3]
; CHECK-NEXT: mov x9, #12 // =0xc
; CHECK-NEXT: st1d { z4.d }, p0, [x8, x9, lsl #3]
; CHECK-NEXT: mov x9, #8 // =0x8
; CHECK-NEXT: st1d { z2.d }, p0, [x8, x9, lsl #3]
; CHECK-NEXT: mov x9, #4 // =0x4
; CHECK-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3]
; CHECK-NEXT: st1d { z0.d }, p0, [x8]
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: ret
%a = call <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float> %x)
ret <32 x i64> %a
}
declare <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float>)
define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
; CHECK-LABEL: llrint_v1i64_v1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: frintx d0, d0
; CHECK-NEXT: fcvtzs x8, d0
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: ret
%a = call <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double> %x)
ret <1 x i64> %a
}
declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>)
define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
; CHECK-LABEL: llrint_v2i64_v2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: frintx v0.2d, v0.2d
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
; CHECK-NEXT: ret
%a = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> %x)
ret <2 x i64> %a
}
declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>)
define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
; CHECK-LABEL: llrint_v4i64_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: frintx z0.d, p0/m, z0.d
; CHECK-NEXT: mov z1.d, z0.d[2]
; CHECK-NEXT: mov z2.d, z0.d[3]
; CHECK-NEXT: mov z3.d, z0.d[1]
; CHECK-NEXT: fcvtzs x9, d0
; CHECK-NEXT: fcvtzs x8, d1
; CHECK-NEXT: fcvtzs x10, d2
; CHECK-NEXT: fcvtzs x11, d3
; CHECK-NEXT: fmov d0, x9
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: mov v0.d[1], x11
; CHECK-NEXT: mov v1.d[1], x10
; CHECK-NEXT: ret
%a = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> %x)
ret <4 x i64> %a
}
declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>)
define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
; CHECK-LABEL: llrint_v8i64_v8f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
; CHECK-NEXT: splice z2.d, p0, z2.d, z3.d
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: frintx z0.d, p0/m, z0.d
; CHECK-NEXT: movprfx z1, z2
; CHECK-NEXT: frintx z1.d, p0/m, z2.d
; CHECK-NEXT: mov z4.d, z1.d[2]
; CHECK-NEXT: mov z5.d, z0.d[2]
; CHECK-NEXT: mov z2.d, z0.d[1]
; CHECK-NEXT: mov z3.d, z1.d[3]
; CHECK-NEXT: mov z6.d, z0.d[3]
; CHECK-NEXT: fcvtzs x8, d0
; CHECK-NEXT: mov z0.d, z1.d[1]
; CHECK-NEXT: fcvtzs x10, d1
; CHECK-NEXT: fcvtzs x11, d4
; CHECK-NEXT: fcvtzs x12, d5
; CHECK-NEXT: fcvtzs x9, d2
; CHECK-NEXT: fcvtzs x13, d3
; CHECK-NEXT: fcvtzs x14, d6
; CHECK-NEXT: fcvtzs x15, d0
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: fmov d2, x10
; CHECK-NEXT: fmov d1, x12
; CHECK-NEXT: fmov d3, x11
; CHECK-NEXT: mov v0.d[1], x9
; CHECK-NEXT: mov v2.d[1], x15
; CHECK-NEXT: mov v1.d[1], x14
; CHECK-NEXT: mov v3.d[1], x13
; CHECK-NEXT: ret
%a = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> %x)
ret <8 x i64> %a
}
declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>)
define <16 x i64> @llrint_v16f64(<16 x double> %x) {
; CHECK-LABEL: llrint_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.d, vl2
; CHECK-NEXT: // kill: def $q6 killed $q6 def $z6
; CHECK-NEXT: // kill: def $q4 killed $q4 def $z4
; CHECK-NEXT: // kill: def $q7 killed $q7 def $z7
; CHECK-NEXT: // kill: def $q5 killed $q5 def $z5
; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: splice z6.d, p1, z6.d, z7.d
; CHECK-NEXT: splice z4.d, p1, z4.d, z5.d
; CHECK-NEXT: splice z2.d, p1, z2.d, z3.d
; CHECK-NEXT: splice z0.d, p1, z0.d, z1.d
; CHECK-NEXT: movprfx z3, z6
; CHECK-NEXT: frintx z3.d, p0/m, z6.d
; CHECK-NEXT: movprfx z1, z4
; CHECK-NEXT: frintx z1.d, p0/m, z4.d
; CHECK-NEXT: frintx z2.d, p0/m, z2.d
; CHECK-NEXT: frintx z0.d, p0/m, z0.d
; CHECK-NEXT: mov z4.d, z3.d[2]
; CHECK-NEXT: mov z5.d, z1.d[2]
; CHECK-NEXT: mov z6.d, z2.d[3]
; CHECK-NEXT: fcvtzs x11, d0
; CHECK-NEXT: fcvtzs x12, d1
; CHECK-NEXT: fcvtzs x13, d2
; CHECK-NEXT: fcvtzs x14, d3
; CHECK-NEXT: mov z7.d, z3.d[3]
; CHECK-NEXT: mov z16.d, z1.d[3]
; CHECK-NEXT: fcvtzs x9, d4
; CHECK-NEXT: fcvtzs x10, d5
; CHECK-NEXT: mov z4.d, z2.d[2]
; CHECK-NEXT: mov z5.d, z0.d[2]
; CHECK-NEXT: fcvtzs x8, d6
; CHECK-NEXT: mov z2.d, z2.d[1]
; CHECK-NEXT: mov z6.d, z0.d[3]
; CHECK-NEXT: mov z1.d, z1.d[1]
; CHECK-NEXT: mov z3.d, z3.d[1]
; CHECK-NEXT: fcvtzs x15, d4
; CHECK-NEXT: mov z4.d, z0.d[1]
; CHECK-NEXT: fmov d0, x11
; CHECK-NEXT: fcvtzs x16, d5
; CHECK-NEXT: fcvtzs x11, d2
; CHECK-NEXT: fmov d2, x13
; CHECK-NEXT: fcvtzs x17, d7
; CHECK-NEXT: fcvtzs x18, d16
; CHECK-NEXT: fcvtzs x0, d3
; CHECK-NEXT: fcvtzs x13, d4
; CHECK-NEXT: fmov d4, x12
; CHECK-NEXT: fcvtzs x12, d6
; CHECK-NEXT: fmov d6, x14
; CHECK-NEXT: fcvtzs x14, d1
; CHECK-NEXT: fmov d3, x15
; CHECK-NEXT: fmov d1, x16
; CHECK-NEXT: fmov d5, x10
; CHECK-NEXT: fmov d7, x9
; CHECK-NEXT: mov v2.d[1], x11
; CHECK-NEXT: mov v0.d[1], x13
; CHECK-NEXT: mov v3.d[1], x8
; CHECK-NEXT: mov v6.d[1], x0
; CHECK-NEXT: mov v4.d[1], x14
; CHECK-NEXT: mov v1.d[1], x12
; CHECK-NEXT: mov v5.d[1], x18
; CHECK-NEXT: mov v7.d[1], x17
; CHECK-NEXT: ret
%a = call <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double> %x)
ret <16 x i64> %a
}
declare <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double>)
define <32 x i64> @llrint_v32f64(<32 x double> %x) {
; CHECK-LABEL: llrint_v32f64:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: sub x9, sp, #272
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0
; CHECK-NEXT: .cfi_def_cfa w29, 16
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ptrue p1.d, vl2
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3
; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
; CHECK-NEXT: // kill: def $q7 killed $q7 def $z7
; CHECK-NEXT: // kill: def $q6 killed $q6 def $z6
; CHECK-NEXT: // kill: def $q4 killed $q4 def $z4
; CHECK-NEXT: // kill: def $q5 killed $q5 def $z5
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: splice z0.d, p1, z0.d, z1.d
; CHECK-NEXT: splice z2.d, p1, z2.d, z3.d
; CHECK-NEXT: splice z4.d, p1, z4.d, z5.d
; CHECK-NEXT: splice z6.d, p1, z6.d, z7.d
; CHECK-NEXT: ldp q5, q19, [x29, #16]
; CHECK-NEXT: movprfx z3, z0
; CHECK-NEXT: frintx z3.d, p0/m, z0.d
; CHECK-NEXT: movprfx z16, z2
; CHECK-NEXT: frintx z16.d, p0/m, z2.d
; CHECK-NEXT: frintx z4.d, p0/m, z4.d
; CHECK-NEXT: splice z5.d, p1, z5.d, z19.d
; CHECK-NEXT: frintx z6.d, p0/m, z6.d
; CHECK-NEXT: ldp q2, q17, [x29, #48]
; CHECK-NEXT: ldp q0, q1, [x29, #112]
; CHECK-NEXT: mov z18.d, z3.d[3]
; CHECK-NEXT: mov z7.d, z3.d[2]
; CHECK-NEXT: fcvtzs x9, d3
; CHECK-NEXT: mov z3.d, z3.d[1]
; CHECK-NEXT: mov z20.d, z16.d[3]
; CHECK-NEXT: fcvtzs x12, d16
; CHECK-NEXT: splice z2.d, p1, z2.d, z17.d
; CHECK-NEXT: frintx z5.d, p0/m, z5.d
; CHECK-NEXT: splice z0.d, p1, z0.d, z1.d
; CHECK-NEXT: fcvtzs x10, d18
; CHECK-NEXT: fcvtzs x11, d7
; CHECK-NEXT: mov z18.d, z16.d[2]
; CHECK-NEXT: mov z7.d, z16.d[1]
; CHECK-NEXT: fcvtzs x13, d3
; CHECK-NEXT: fcvtzs x14, d20
; CHECK-NEXT: str x9, [sp, #128]
; CHECK-NEXT: mov z16.d, z4.d[3]
; CHECK-NEXT: fcvtzs x9, d18
; CHECK-NEXT: mov z18.d, z4.d[2]
; CHECK-NEXT: frintx z2.d, p0/m, z2.d
; CHECK-NEXT: stp x11, x10, [sp, #144]
; CHECK-NEXT: fcvtzs x10, d7
; CHECK-NEXT: mov z7.d, z4.d[1]
; CHECK-NEXT: str x13, [sp, #136]
; CHECK-NEXT: fcvtzs x11, d16
; CHECK-NEXT: mov z16.d, z6.d[3]
; CHECK-NEXT: fcvtzs x13, d18
; CHECK-NEXT: ldp q3, q19, [x29, #80]
; CHECK-NEXT: stp x9, x14, [sp, #176]
; CHECK-NEXT: fcvtzs x9, d4
; CHECK-NEXT: mov z4.d, z6.d[2]
; CHECK-NEXT: stp x12, x10, [sp, #160]
; CHECK-NEXT: fcvtzs x10, d7
; CHECK-NEXT: mov z7.d, z6.d[1]
; CHECK-NEXT: fcvtzs x12, d6
; CHECK-NEXT: splice z3.d, p1, z3.d, z19.d
; CHECK-NEXT: mov z6.d, z5.d[2]
; CHECK-NEXT: stp x13, x11, [sp, #208]
; CHECK-NEXT: fcvtzs x11, d16
; CHECK-NEXT: fcvtzs x13, d4
; CHECK-NEXT: mov z4.d, z5.d[3]
; CHECK-NEXT: mov z1.d, z5.d[1]
; CHECK-NEXT: frintx z0.d, p0/m, z0.d
; CHECK-NEXT: stp x9, x10, [sp, #192]
; CHECK-NEXT: fcvtzs x9, d7
; CHECK-NEXT: frintx z3.d, p0/m, z3.d
; CHECK-NEXT: fcvtzs x10, d4
; CHECK-NEXT: stp x13, x11, [sp, #240]
; CHECK-NEXT: fcvtzs x11, d6
; CHECK-NEXT: mov z4.d, z2.d[3]
; CHECK-NEXT: fcvtzs x13, d2
; CHECK-NEXT: stp x12, x9, [sp, #224]
; CHECK-NEXT: fcvtzs x9, d5
; CHECK-NEXT: fcvtzs x12, d1
; CHECK-NEXT: mov z5.d, z2.d[2]
; CHECK-NEXT: mov z1.d, z2.d[1]
; CHECK-NEXT: mov z2.d, z3.d[2]
; CHECK-NEXT: stp x11, x10, [sp, #16]
; CHECK-NEXT: fcvtzs x10, d4
; CHECK-NEXT: mov z4.d, z3.d[3]
; CHECK-NEXT: fcvtzs x11, d5
; CHECK-NEXT: stp x9, x12, [sp]
; CHECK-NEXT: fcvtzs x9, d1
; CHECK-NEXT: mov z1.d, z3.d[1]
; CHECK-NEXT: fcvtzs x12, d4
; CHECK-NEXT: stp x11, x10, [sp, #48]
; CHECK-NEXT: fcvtzs x10, d2
; CHECK-NEXT: fcvtzs x11, d3
; CHECK-NEXT: stp x13, x9, [sp, #32]
; CHECK-NEXT: fcvtzs x9, d1
; CHECK-NEXT: mov z2.d, z0.d[3]
; CHECK-NEXT: mov z3.d, z0.d[2]
; CHECK-NEXT: mov z1.d, z0.d[1]
; CHECK-NEXT: fcvtzs x13, d2
; CHECK-NEXT: stp x10, x12, [sp, #80]
; CHECK-NEXT: fcvtzs x12, d0
; CHECK-NEXT: fcvtzs x10, d3
; CHECK-NEXT: stp x11, x9, [sp, #64]
; CHECK-NEXT: fcvtzs x9, d1
; CHECK-NEXT: stp x10, x13, [sp, #112]
; CHECK-NEXT: add x10, sp, #192
; CHECK-NEXT: stp x12, x9, [sp, #96]
; CHECK-NEXT: add x9, sp, #128
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9]
; CHECK-NEXT: add x9, sp, #160
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x10]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x9]
; CHECK-NEXT: add x9, sp, #96
; CHECK-NEXT: add x10, sp, #224
; CHECK-NEXT: ld1d { z3.d }, p0/z, [x9]
; CHECK-NEXT: add x9, sp, #64
; CHECK-NEXT: ld1d { z4.d }, p0/z, [x10]
; CHECK-NEXT: add x10, sp, #32
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x9]
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ld1d { z6.d }, p0/z, [x10]
; CHECK-NEXT: mov x10, #28 // =0x1c
; CHECK-NEXT: ld1d { z7.d }, p0/z, [x9]
; CHECK-NEXT: mov x9, #24 // =0x18
; CHECK-NEXT: st1d { z3.d }, p0, [x8, x10, lsl #3]
; CHECK-NEXT: st1d { z5.d }, p0, [x8, x9, lsl #3]
; CHECK-NEXT: mov x9, #20 // =0x14
; CHECK-NEXT: st1d { z6.d }, p0, [x8, x9, lsl #3]
; CHECK-NEXT: mov x9, #16 // =0x10
; CHECK-NEXT: st1d { z7.d }, p0, [x8, x9, lsl #3]
; CHECK-NEXT: mov x9, #12 // =0xc
; CHECK-NEXT: st1d { z4.d }, p0, [x8, x9, lsl #3]
; CHECK-NEXT: mov x9, #8 // =0x8
; CHECK-NEXT: st1d { z2.d }, p0, [x8, x9, lsl #3]
; CHECK-NEXT: mov x9, #4 // =0x4
; CHECK-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3]
; CHECK-NEXT: st1d { z0.d }, p0, [x8]
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: ret
%a = call <32 x i64> @llvm.llrint.v32i64.v16f64(<32 x double> %x)
ret <32 x i64> %a
}
declare <32 x i64> @llvm.llrint.v32i64.v32f64(<32 x double>)