; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc -mtriple=aarch64-none-eabi -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define i8 @si8(i8 %a, i8 %b) {
; CHECK-SD-LABEL: si8:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: sxtb w8, w1
; CHECK-SD-NEXT: sxtb w9, w0
; CHECK-SD-NEXT: sdiv w10, w9, w8
; CHECK-SD-NEXT: msub w0, w10, w8, w9
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: si8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sxtb w8, w0
; CHECK-GI-NEXT: sxtb w9, w1
; CHECK-GI-NEXT: sdiv w8, w8, w9
; CHECK-GI-NEXT: msub w0, w8, w1, w0
; CHECK-GI-NEXT: ret
entry:
%s = srem i8 %a, %b
ret i8 %s
}
define i8 @ui8(i8 %a, i8 %b) {
; CHECK-SD-LABEL: ui8:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: and w8, w1, #0xff
; CHECK-SD-NEXT: and w9, w0, #0xff
; CHECK-SD-NEXT: udiv w10, w9, w8
; CHECK-SD-NEXT: msub w0, w10, w8, w9
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ui8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: and w8, w0, #0xff
; CHECK-GI-NEXT: and w9, w1, #0xff
; CHECK-GI-NEXT: udiv w8, w8, w9
; CHECK-GI-NEXT: msub w0, w8, w1, w0
; CHECK-GI-NEXT: ret
entry:
%s = urem i8 %a, %b
ret i8 %s
}
define i16 @si16(i16 %a, i16 %b) {
; CHECK-SD-LABEL: si16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: sxth w8, w1
; CHECK-SD-NEXT: sxth w9, w0
; CHECK-SD-NEXT: sdiv w10, w9, w8
; CHECK-SD-NEXT: msub w0, w10, w8, w9
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: si16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sxth w8, w0
; CHECK-GI-NEXT: sxth w9, w1
; CHECK-GI-NEXT: sdiv w8, w8, w9
; CHECK-GI-NEXT: msub w0, w8, w1, w0
; CHECK-GI-NEXT: ret
entry:
%s = srem i16 %a, %b
ret i16 %s
}
define i16 @ui16(i16 %a, i16 %b) {
; CHECK-SD-LABEL: ui16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: and w8, w1, #0xffff
; CHECK-SD-NEXT: and w9, w0, #0xffff
; CHECK-SD-NEXT: udiv w10, w9, w8
; CHECK-SD-NEXT: msub w0, w10, w8, w9
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ui16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: and w8, w0, #0xffff
; CHECK-GI-NEXT: and w9, w1, #0xffff
; CHECK-GI-NEXT: udiv w8, w8, w9
; CHECK-GI-NEXT: msub w0, w8, w1, w0
; CHECK-GI-NEXT: ret
entry:
%s = urem i16 %a, %b
ret i16 %s
}
define i32 @si32(i32 %a, i32 %b) {
; CHECK-LABEL: si32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sdiv w8, w0, w1
; CHECK-NEXT: msub w0, w8, w1, w0
; CHECK-NEXT: ret
entry:
%s = srem i32 %a, %b
ret i32 %s
}
define i32 @ui32(i32 %a, i32 %b) {
; CHECK-LABEL: ui32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: udiv w8, w0, w1
; CHECK-NEXT: msub w0, w8, w1, w0
; CHECK-NEXT: ret
entry:
%s = urem i32 %a, %b
ret i32 %s
}
define i64 @si64(i64 %a, i64 %b) {
; CHECK-LABEL: si64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sdiv x8, x0, x1
; CHECK-NEXT: msub x0, x8, x1, x0
; CHECK-NEXT: ret
entry:
%s = srem i64 %a, %b
ret i64 %s
}
define i64 @ui64(i64 %a, i64 %b) {
; CHECK-LABEL: ui64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: udiv x8, x0, x1
; CHECK-NEXT: msub x0, x8, x1, x0
; CHECK-NEXT: ret
entry:
%s = urem i64 %a, %b
ret i64 %s
}
define i128 @si128(i128 %a, i128 %b) {
; CHECK-LABEL: si128:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: bl __modti3
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
entry:
%s = srem i128 %a, %b
ret i128 %s
}
define i128 @ui128(i128 %a, i128 %b) {
; CHECK-LABEL: ui128:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: bl __umodti3
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
entry:
%s = urem i128 %a, %b
ret i128 %s
}
define <2 x i8> @sv2i8(<2 x i8> %d, <2 x i8> %e) {
; CHECK-SD-LABEL: sv2i8:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24
; CHECK-SD-NEXT: shl v1.2s, v1.2s, #24
; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #24
; CHECK-SD-NEXT: sshr v1.2s, v1.2s, #24
; CHECK-SD-NEXT: fmov w8, s1
; CHECK-SD-NEXT: fmov w9, s0
; CHECK-SD-NEXT: mov w11, v1.s[1]
; CHECK-SD-NEXT: mov w12, v0.s[1]
; CHECK-SD-NEXT: sdiv w10, w9, w8
; CHECK-SD-NEXT: sdiv w13, w12, w11
; CHECK-SD-NEXT: msub w8, w10, w8, w9
; CHECK-SD-NEXT: fmov s0, w8
; CHECK-SD-NEXT: msub w9, w13, w11, w12
; CHECK-SD-NEXT: mov v0.s[1], w9
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sv2i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: shl v0.2s, v0.2s, #24
; CHECK-GI-NEXT: shl v1.2s, v1.2s, #24
; CHECK-GI-NEXT: sshr v0.2s, v0.2s, #24
; CHECK-GI-NEXT: sshr v1.2s, v1.2s, #24
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: fmov w9, s1
; CHECK-GI-NEXT: mov w10, v1.s[1]
; CHECK-GI-NEXT: sdiv w8, w8, w9
; CHECK-GI-NEXT: mov w9, v0.s[1]
; CHECK-GI-NEXT: sdiv w9, w9, w10
; CHECK-GI-NEXT: mov v2.s[0], w8
; CHECK-GI-NEXT: mov v2.s[1], w9
; CHECK-GI-NEXT: mls v0.2s, v2.2s, v1.2s
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
entry:
%s = srem <2 x i8> %d, %e
ret <2 x i8> %s
}
define <3 x i8> @sv3i8(<3 x i8> %d, <3 x i8> %e) {
; CHECK-SD-LABEL: sv3i8:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: sxtb w8, w3
; CHECK-SD-NEXT: sxtb w9, w0
; CHECK-SD-NEXT: sxtb w11, w4
; CHECK-SD-NEXT: sxtb w12, w1
; CHECK-SD-NEXT: sxtb w14, w5
; CHECK-SD-NEXT: sxtb w15, w2
; CHECK-SD-NEXT: sdiv w10, w9, w8
; CHECK-SD-NEXT: sdiv w13, w12, w11
; CHECK-SD-NEXT: msub w0, w10, w8, w9
; CHECK-SD-NEXT: sdiv w16, w15, w14
; CHECK-SD-NEXT: msub w1, w13, w11, w12
; CHECK-SD-NEXT: msub w2, w16, w14, w15
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sv3i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sxtb w8, w0
; CHECK-GI-NEXT: sxtb w9, w3
; CHECK-GI-NEXT: sxtb w11, w1
; CHECK-GI-NEXT: sxtb w12, w4
; CHECK-GI-NEXT: sxtb w14, w2
; CHECK-GI-NEXT: sxtb w15, w5
; CHECK-GI-NEXT: sdiv w10, w8, w9
; CHECK-GI-NEXT: sdiv w13, w11, w12
; CHECK-GI-NEXT: msub w0, w10, w9, w8
; CHECK-GI-NEXT: sdiv w16, w14, w15
; CHECK-GI-NEXT: msub w1, w13, w12, w11
; CHECK-GI-NEXT: msub w2, w16, w15, w14
; CHECK-GI-NEXT: ret
entry:
%s = srem <3 x i8> %d, %e
ret <3 x i8> %s
}
define <4 x i8> @sv4i8(<4 x i8> %d, <4 x i8> %e) {
; CHECK-SD-LABEL: sv4i8:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8
; CHECK-SD-NEXT: shl v1.4h, v1.4h, #8
; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8
; CHECK-SD-NEXT: sshr v1.4h, v1.4h, #8
; CHECK-SD-NEXT: smov w11, v1.h[0]
; CHECK-SD-NEXT: smov w12, v0.h[0]
; CHECK-SD-NEXT: smov w8, v1.h[1]
; CHECK-SD-NEXT: smov w9, v0.h[1]
; CHECK-SD-NEXT: smov w14, v1.h[2]
; CHECK-SD-NEXT: smov w15, v0.h[2]
; CHECK-SD-NEXT: smov w17, v1.h[3]
; CHECK-SD-NEXT: smov w18, v0.h[3]
; CHECK-SD-NEXT: sdiv w13, w12, w11
; CHECK-SD-NEXT: sdiv w10, w9, w8
; CHECK-SD-NEXT: msub w11, w13, w11, w12
; CHECK-SD-NEXT: fmov s0, w11
; CHECK-SD-NEXT: sdiv w16, w15, w14
; CHECK-SD-NEXT: msub w8, w10, w8, w9
; CHECK-SD-NEXT: mov v0.h[1], w8
; CHECK-SD-NEXT: sdiv w9, w18, w17
; CHECK-SD-NEXT: msub w8, w16, w14, w15
; CHECK-SD-NEXT: mov v0.h[2], w8
; CHECK-SD-NEXT: msub w8, w9, w17, w18
; CHECK-SD-NEXT: mov v0.h[3], w8
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sv4i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-GI-NEXT: shl v0.4s, v0.4s, #24
; CHECK-GI-NEXT: shl v1.4s, v1.4s, #24
; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #24
; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #24
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: fmov w9, s1
; CHECK-GI-NEXT: mov w10, v1.s[1]
; CHECK-GI-NEXT: mov w11, v1.s[2]
; CHECK-GI-NEXT: mov w12, v1.s[3]
; CHECK-GI-NEXT: sdiv w8, w8, w9
; CHECK-GI-NEXT: mov w9, v0.s[1]
; CHECK-GI-NEXT: sdiv w9, w9, w10
; CHECK-GI-NEXT: mov w10, v0.s[2]
; CHECK-GI-NEXT: mov v2.s[0], w8
; CHECK-GI-NEXT: sdiv w10, w10, w11
; CHECK-GI-NEXT: mov w11, v0.s[3]
; CHECK-GI-NEXT: mov v2.s[1], w9
; CHECK-GI-NEXT: sdiv w8, w11, w12
; CHECK-GI-NEXT: mov v2.s[2], w10
; CHECK-GI-NEXT: mov v2.s[3], w8
; CHECK-GI-NEXT: mls v0.4s, v2.4s, v1.4s
; CHECK-GI-NEXT: xtn v0.4h, v0.4s
; CHECK-GI-NEXT: ret
entry:
%s = srem <4 x i8> %d, %e
ret <4 x i8> %s
}
define <8 x i8> @sv8i8(<8 x i8> %d, <8 x i8> %e) {
; CHECK-SD-LABEL: sv8i8:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: smov w11, v1.b[0]
; CHECK-SD-NEXT: smov w12, v0.b[0]
; CHECK-SD-NEXT: smov w8, v1.b[1]
; CHECK-SD-NEXT: smov w9, v0.b[1]
; CHECK-SD-NEXT: smov w14, v1.b[2]
; CHECK-SD-NEXT: smov w15, v0.b[2]
; CHECK-SD-NEXT: smov w17, v1.b[3]
; CHECK-SD-NEXT: smov w18, v0.b[3]
; CHECK-SD-NEXT: smov w1, v1.b[4]
; CHECK-SD-NEXT: smov w2, v0.b[4]
; CHECK-SD-NEXT: smov w4, v1.b[5]
; CHECK-SD-NEXT: smov w5, v0.b[5]
; CHECK-SD-NEXT: sdiv w13, w12, w11
; CHECK-SD-NEXT: sdiv w10, w9, w8
; CHECK-SD-NEXT: msub w11, w13, w11, w12
; CHECK-SD-NEXT: smov w13, v1.b[7]
; CHECK-SD-NEXT: fmov s2, w11
; CHECK-SD-NEXT: smov w11, v0.b[6]
; CHECK-SD-NEXT: sdiv w16, w15, w14
; CHECK-SD-NEXT: msub w8, w10, w8, w9
; CHECK-SD-NEXT: smov w10, v1.b[6]
; CHECK-SD-NEXT: mov v2.b[1], w8
; CHECK-SD-NEXT: sdiv w0, w18, w17
; CHECK-SD-NEXT: msub w8, w16, w14, w15
; CHECK-SD-NEXT: smov w14, v0.b[7]
; CHECK-SD-NEXT: mov v2.b[2], w8
; CHECK-SD-NEXT: sdiv w3, w2, w1
; CHECK-SD-NEXT: msub w8, w0, w17, w18
; CHECK-SD-NEXT: mov v2.b[3], w8
; CHECK-SD-NEXT: sdiv w9, w5, w4
; CHECK-SD-NEXT: msub w8, w3, w1, w2
; CHECK-SD-NEXT: mov v2.b[4], w8
; CHECK-SD-NEXT: sdiv w12, w11, w10
; CHECK-SD-NEXT: msub w8, w9, w4, w5
; CHECK-SD-NEXT: mov v2.b[5], w8
; CHECK-SD-NEXT: sdiv w9, w14, w13
; CHECK-SD-NEXT: msub w8, w12, w10, w11
; CHECK-SD-NEXT: mov v2.b[6], w8
; CHECK-SD-NEXT: msub w8, w9, w13, w14
; CHECK-SD-NEXT: mov v2.b[7], w8
; CHECK-SD-NEXT: fmov d0, d2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sv8i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: sshll v2.4s, v0.4h, #0
; CHECK-GI-NEXT: sshll v3.4s, v1.4h, #0
; CHECK-GI-NEXT: sshll2 v0.4s, v0.8h, #0
; CHECK-GI-NEXT: sshll2 v1.4s, v1.8h, #0
; CHECK-GI-NEXT: fmov w8, s2
; CHECK-GI-NEXT: fmov w9, s3
; CHECK-GI-NEXT: mov w10, v3.s[1]
; CHECK-GI-NEXT: mov w11, v3.s[2]
; CHECK-GI-NEXT: mov w12, v3.s[3]
; CHECK-GI-NEXT: fmov w13, s1
; CHECK-GI-NEXT: mov w14, v1.s[1]
; CHECK-GI-NEXT: mov w15, v1.s[2]
; CHECK-GI-NEXT: sdiv w8, w8, w9
; CHECK-GI-NEXT: mov w9, v2.s[1]
; CHECK-GI-NEXT: sdiv w9, w9, w10
; CHECK-GI-NEXT: mov w10, v2.s[2]
; CHECK-GI-NEXT: mov v4.s[0], w8
; CHECK-GI-NEXT: mov w8, v0.s[3]
; CHECK-GI-NEXT: sdiv w10, w10, w11
; CHECK-GI-NEXT: mov w11, v2.s[3]
; CHECK-GI-NEXT: mov v4.s[1], w9
; CHECK-GI-NEXT: sdiv w11, w11, w12
; CHECK-GI-NEXT: fmov w12, s0
; CHECK-GI-NEXT: mov v4.s[2], w10
; CHECK-GI-NEXT: sdiv w12, w12, w13
; CHECK-GI-NEXT: mov w13, v0.s[1]
; CHECK-GI-NEXT: mov v4.s[3], w11
; CHECK-GI-NEXT: mls v2.4s, v4.4s, v3.4s
; CHECK-GI-NEXT: sdiv w13, w13, w14
; CHECK-GI-NEXT: mov w14, v0.s[2]
; CHECK-GI-NEXT: mov v5.s[0], w12
; CHECK-GI-NEXT: mov w12, v1.s[3]
; CHECK-GI-NEXT: sdiv w14, w14, w15
; CHECK-GI-NEXT: mov v5.s[1], w13
; CHECK-GI-NEXT: sdiv w8, w8, w12
; CHECK-GI-NEXT: mov v5.s[2], w14
; CHECK-GI-NEXT: mov v5.s[3], w8
; CHECK-GI-NEXT: mls v0.4s, v5.4s, v1.4s
; CHECK-GI-NEXT: uzp1 v0.8h, v2.8h, v0.8h
; CHECK-GI-NEXT: xtn v0.8b, v0.8h
; CHECK-GI-NEXT: ret
entry:
%s = srem <8 x i8> %d, %e
ret <8 x i8> %s
}
define <16 x i8> @sv16i8(<16 x i8> %d, <16 x i8> %e) {
; CHECK-SD-LABEL: sv16i8:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: stp x28, x27, [sp, #-80]! // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
; CHECK-SD-NEXT: .cfi_def_cfa_offset 80
; CHECK-SD-NEXT: .cfi_offset w19, -8
; CHECK-SD-NEXT: .cfi_offset w20, -16
; CHECK-SD-NEXT: .cfi_offset w21, -24
; CHECK-SD-NEXT: .cfi_offset w22, -32
; CHECK-SD-NEXT: .cfi_offset w23, -40
; CHECK-SD-NEXT: .cfi_offset w24, -48
; CHECK-SD-NEXT: .cfi_offset w25, -56
; CHECK-SD-NEXT: .cfi_offset w26, -64
; CHECK-SD-NEXT: .cfi_offset w27, -72
; CHECK-SD-NEXT: .cfi_offset w28, -80
; CHECK-SD-NEXT: smov w11, v1.b[0]
; CHECK-SD-NEXT: smov w12, v0.b[0]
; CHECK-SD-NEXT: smov w8, v1.b[1]
; CHECK-SD-NEXT: smov w9, v0.b[1]
; CHECK-SD-NEXT: smov w14, v1.b[2]
; CHECK-SD-NEXT: smov w15, v0.b[2]
; CHECK-SD-NEXT: smov w17, v1.b[3]
; CHECK-SD-NEXT: smov w18, v0.b[3]
; CHECK-SD-NEXT: smov w1, v1.b[4]
; CHECK-SD-NEXT: smov w2, v0.b[4]
; CHECK-SD-NEXT: smov w4, v1.b[5]
; CHECK-SD-NEXT: smov w5, v0.b[5]
; CHECK-SD-NEXT: sdiv w13, w12, w11
; CHECK-SD-NEXT: smov w7, v1.b[6]
; CHECK-SD-NEXT: smov w19, v0.b[6]
; CHECK-SD-NEXT: smov w21, v1.b[7]
; CHECK-SD-NEXT: smov w22, v0.b[7]
; CHECK-SD-NEXT: smov w24, v1.b[8]
; CHECK-SD-NEXT: smov w25, v0.b[8]
; CHECK-SD-NEXT: smov w27, v1.b[9]
; CHECK-SD-NEXT: smov w28, v0.b[9]
; CHECK-SD-NEXT: sdiv w10, w9, w8
; CHECK-SD-NEXT: msub w11, w13, w11, w12
; CHECK-SD-NEXT: smov w13, v1.b[11]
; CHECK-SD-NEXT: fmov s2, w11
; CHECK-SD-NEXT: smov w11, v0.b[10]
; CHECK-SD-NEXT: sdiv w16, w15, w14
; CHECK-SD-NEXT: msub w8, w10, w8, w9
; CHECK-SD-NEXT: smov w10, v1.b[10]
; CHECK-SD-NEXT: mov v2.b[1], w8
; CHECK-SD-NEXT: sdiv w0, w18, w17
; CHECK-SD-NEXT: msub w8, w16, w14, w15
; CHECK-SD-NEXT: smov w14, v0.b[11]
; CHECK-SD-NEXT: smov w16, v1.b[12]
; CHECK-SD-NEXT: mov v2.b[2], w8
; CHECK-SD-NEXT: sdiv w3, w2, w1
; CHECK-SD-NEXT: msub w8, w0, w17, w18
; CHECK-SD-NEXT: smov w17, v0.b[12]
; CHECK-SD-NEXT: smov w0, v1.b[13]
; CHECK-SD-NEXT: mov v2.b[3], w8
; CHECK-SD-NEXT: sdiv w6, w5, w4
; CHECK-SD-NEXT: msub w8, w3, w1, w2
; CHECK-SD-NEXT: smov w1, v0.b[13]
; CHECK-SD-NEXT: mov v2.b[4], w8
; CHECK-SD-NEXT: sdiv w20, w19, w7
; CHECK-SD-NEXT: msub w8, w6, w4, w5
; CHECK-SD-NEXT: mov v2.b[5], w8
; CHECK-SD-NEXT: sdiv w23, w22, w21
; CHECK-SD-NEXT: msub w8, w20, w7, w19
; CHECK-SD-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v2.b[6], w8
; CHECK-SD-NEXT: sdiv w26, w25, w24
; CHECK-SD-NEXT: msub w8, w23, w21, w22
; CHECK-SD-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v2.b[7], w8
; CHECK-SD-NEXT: sdiv w9, w28, w27
; CHECK-SD-NEXT: msub w8, w26, w24, w25
; CHECK-SD-NEXT: ldp x24, x23, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x26, x25, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v2.b[8], w8
; CHECK-SD-NEXT: sdiv w12, w11, w10
; CHECK-SD-NEXT: msub w8, w9, w27, w28
; CHECK-SD-NEXT: mov v2.b[9], w8
; CHECK-SD-NEXT: sdiv w15, w14, w13
; CHECK-SD-NEXT: msub w8, w12, w10, w11
; CHECK-SD-NEXT: smov w10, v1.b[14]
; CHECK-SD-NEXT: smov w11, v0.b[14]
; CHECK-SD-NEXT: mov v2.b[10], w8
; CHECK-SD-NEXT: sdiv w18, w17, w16
; CHECK-SD-NEXT: msub w8, w15, w13, w14
; CHECK-SD-NEXT: smov w13, v1.b[15]
; CHECK-SD-NEXT: smov w14, v0.b[15]
; CHECK-SD-NEXT: mov v2.b[11], w8
; CHECK-SD-NEXT: sdiv w9, w1, w0
; CHECK-SD-NEXT: msub w8, w18, w16, w17
; CHECK-SD-NEXT: mov v2.b[12], w8
; CHECK-SD-NEXT: sdiv w12, w11, w10
; CHECK-SD-NEXT: msub w8, w9, w0, w1
; CHECK-SD-NEXT: mov v2.b[13], w8
; CHECK-SD-NEXT: sdiv w9, w14, w13
; CHECK-SD-NEXT: msub w8, w12, w10, w11
; CHECK-SD-NEXT: mov v2.b[14], w8
; CHECK-SD-NEXT: msub w8, w9, w13, w14
; CHECK-SD-NEXT: mov v2.b[15], w8
; CHECK-SD-NEXT: mov v0.16b, v2.16b
; CHECK-SD-NEXT: ldp x28, x27, [sp], #80 // 16-byte Folded Reload
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sv16i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sshll v4.8h, v0.8b, #0
; CHECK-GI-NEXT: sshll v5.8h, v1.8b, #0
; CHECK-GI-NEXT: sshll2 v6.8h, v0.16b, #0
; CHECK-GI-NEXT: sshll2 v7.8h, v1.16b, #0
; CHECK-GI-NEXT: sshll v2.4s, v4.4h, #0
; CHECK-GI-NEXT: sshll v3.4s, v5.4h, #0
; CHECK-GI-NEXT: sshll2 v4.4s, v4.8h, #0
; CHECK-GI-NEXT: sshll2 v5.4s, v5.8h, #0
; CHECK-GI-NEXT: sshll v0.4s, v6.4h, #0
; CHECK-GI-NEXT: sshll v1.4s, v7.4h, #0
; CHECK-GI-NEXT: sshll2 v6.4s, v6.8h, #0
; CHECK-GI-NEXT: sshll2 v7.4s, v7.8h, #0
; CHECK-GI-NEXT: fmov w8, s2
; CHECK-GI-NEXT: fmov w9, s3
; CHECK-GI-NEXT: mov w12, v3.s[3]
; CHECK-GI-NEXT: fmov w13, s5
; CHECK-GI-NEXT: mov w16, v5.s[3]
; CHECK-GI-NEXT: fmov w17, s1
; CHECK-GI-NEXT: mov w18, v1.s[1]
; CHECK-GI-NEXT: mov w0, v1.s[2]
; CHECK-GI-NEXT: mov w1, v1.s[3]
; CHECK-GI-NEXT: sdiv w11, w8, w9
; CHECK-GI-NEXT: mov w8, v2.s[1]
; CHECK-GI-NEXT: mov w9, v3.s[1]
; CHECK-GI-NEXT: fmov w2, s7
; CHECK-GI-NEXT: mov w3, v7.s[1]
; CHECK-GI-NEXT: mov w4, v7.s[2]
; CHECK-GI-NEXT: sdiv w10, w8, w9
; CHECK-GI-NEXT: mov w8, v2.s[2]
; CHECK-GI-NEXT: mov w9, v3.s[2]
; CHECK-GI-NEXT: mov v16.s[0], w11
; CHECK-GI-NEXT: mov w11, v6.s[3]
; CHECK-GI-NEXT: sdiv w9, w8, w9
; CHECK-GI-NEXT: mov w8, v2.s[3]
; CHECK-GI-NEXT: mov v16.s[1], w10
; CHECK-GI-NEXT: sdiv w8, w8, w12
; CHECK-GI-NEXT: fmov w12, s4
; CHECK-GI-NEXT: mov v16.s[2], w9
; CHECK-GI-NEXT: sdiv w14, w12, w13
; CHECK-GI-NEXT: mov w12, v4.s[1]
; CHECK-GI-NEXT: mov w13, v5.s[1]
; CHECK-GI-NEXT: mov v16.s[3], w8
; CHECK-GI-NEXT: mls v2.4s, v16.4s, v3.4s
; CHECK-GI-NEXT: sdiv w15, w12, w13
; CHECK-GI-NEXT: mov w12, v4.s[2]
; CHECK-GI-NEXT: mov w13, v5.s[2]
; CHECK-GI-NEXT: mov v17.s[0], w14
; CHECK-GI-NEXT: mov w14, v7.s[3]
; CHECK-GI-NEXT: sdiv w13, w12, w13
; CHECK-GI-NEXT: mov w12, v4.s[3]
; CHECK-GI-NEXT: mov v17.s[1], w15
; CHECK-GI-NEXT: sdiv w12, w12, w16
; CHECK-GI-NEXT: fmov w16, s0
; CHECK-GI-NEXT: mov v17.s[2], w13
; CHECK-GI-NEXT: sdiv w16, w16, w17
; CHECK-GI-NEXT: mov w17, v0.s[1]
; CHECK-GI-NEXT: mov v17.s[3], w12
; CHECK-GI-NEXT: mls v4.4s, v17.4s, v5.4s
; CHECK-GI-NEXT: sdiv w17, w17, w18
; CHECK-GI-NEXT: mov w18, v0.s[2]
; CHECK-GI-NEXT: mov v18.s[0], w16
; CHECK-GI-NEXT: sdiv w18, w18, w0
; CHECK-GI-NEXT: mov w0, v0.s[3]
; CHECK-GI-NEXT: mov v18.s[1], w17
; CHECK-GI-NEXT: sdiv w0, w0, w1
; CHECK-GI-NEXT: fmov w1, s6
; CHECK-GI-NEXT: mov v18.s[2], w18
; CHECK-GI-NEXT: sdiv w1, w1, w2
; CHECK-GI-NEXT: mov w2, v6.s[1]
; CHECK-GI-NEXT: mov v18.s[3], w0
; CHECK-GI-NEXT: mls v0.4s, v18.4s, v1.4s
; CHECK-GI-NEXT: uzp1 v1.8h, v2.8h, v4.8h
; CHECK-GI-NEXT: sdiv w2, w2, w3
; CHECK-GI-NEXT: mov w3, v6.s[2]
; CHECK-GI-NEXT: mov v19.s[0], w1
; CHECK-GI-NEXT: sdiv w3, w3, w4
; CHECK-GI-NEXT: mov v19.s[1], w2
; CHECK-GI-NEXT: sdiv w10, w11, w14
; CHECK-GI-NEXT: mov v19.s[2], w3
; CHECK-GI-NEXT: mov v19.s[3], w10
; CHECK-GI-NEXT: mls v6.4s, v19.4s, v7.4s
; CHECK-GI-NEXT: uzp1 v0.8h, v0.8h, v6.8h
; CHECK-GI-NEXT: uzp1 v0.16b, v1.16b, v0.16b
; CHECK-GI-NEXT: ret
entry:
%s = srem <16 x i8> %d, %e
ret <16 x i8> %s
}
define <32 x i8> @sv32i8(<32 x i8> %d, <32 x i8> %e) {
; CHECK-SD-LABEL: sv32i8:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: sub sp, sp, #304
; CHECK-SD-NEXT: stp x29, x30, [sp, #208] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x28, x27, [sp, #224] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x26, x25, [sp, #240] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x24, x23, [sp, #256] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x22, x21, [sp, #272] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x20, x19, [sp, #288] // 16-byte Folded Spill
; CHECK-SD-NEXT: .cfi_def_cfa_offset 304
; CHECK-SD-NEXT: .cfi_offset w19, -8
; CHECK-SD-NEXT: .cfi_offset w20, -16
; CHECK-SD-NEXT: .cfi_offset w21, -24
; CHECK-SD-NEXT: .cfi_offset w22, -32
; CHECK-SD-NEXT: .cfi_offset w23, -40
; CHECK-SD-NEXT: .cfi_offset w24, -48
; CHECK-SD-NEXT: .cfi_offset w25, -56
; CHECK-SD-NEXT: .cfi_offset w26, -64
; CHECK-SD-NEXT: .cfi_offset w27, -72
; CHECK-SD-NEXT: .cfi_offset w28, -80
; CHECK-SD-NEXT: .cfi_offset w30, -88
; CHECK-SD-NEXT: .cfi_offset w29, -96
; CHECK-SD-NEXT: smov w8, v2.b[1]
; CHECK-SD-NEXT: smov w9, v0.b[1]
; CHECK-SD-NEXT: smov w19, v3.b[7]
; CHECK-SD-NEXT: smov w7, v1.b[7]
; CHECK-SD-NEXT: smov w6, v3.b[8]
; CHECK-SD-NEXT: smov w3, v1.b[8]
; CHECK-SD-NEXT: smov w13, v3.b[0]
; CHECK-SD-NEXT: smov w5, v3.b[1]
; CHECK-SD-NEXT: smov w0, v1.b[1]
; CHECK-SD-NEXT: smov w12, v3.b[2]
; CHECK-SD-NEXT: smov w17, v3.b[3]
; CHECK-SD-NEXT: smov w16, v1.b[3]
; CHECK-SD-NEXT: str w8, [sp, #80] // 4-byte Folded Spill
; CHECK-SD-NEXT: sdiv w10, w9, w8
; CHECK-SD-NEXT: smov w8, v2.b[0]
; CHECK-SD-NEXT: str w9, [sp, #88] // 4-byte Folded Spill
; CHECK-SD-NEXT: smov w9, v0.b[0]
; CHECK-SD-NEXT: ldr w30, [sp, #80] // 4-byte Folded Reload
; CHECK-SD-NEXT: smov w15, v3.b[4]
; CHECK-SD-NEXT: smov w14, v1.b[4]
; CHECK-SD-NEXT: smov w4, v3.b[5]
; CHECK-SD-NEXT: smov w1, v1.b[5]
; CHECK-SD-NEXT: smov w2, v3.b[6]
; CHECK-SD-NEXT: smov w18, v1.b[6]
; CHECK-SD-NEXT: str w8, [sp, #32] // 4-byte Folded Spill
; CHECK-SD-NEXT: smov w21, v3.b[9]
; CHECK-SD-NEXT: smov w20, v1.b[9]
; CHECK-SD-NEXT: str w9, [sp, #40] // 4-byte Folded Spill
; CHECK-SD-NEXT: ldr w29, [sp, #32] // 4-byte Folded Reload
; CHECK-SD-NEXT: sdiv w11, w9, w8
; CHECK-SD-NEXT: smov w8, v2.b[2]
; CHECK-SD-NEXT: smov w9, v0.b[2]
; CHECK-SD-NEXT: str w10, [sp, #96] // 4-byte Folded Spill
; CHECK-SD-NEXT: stp w8, w9, [sp, #16] // 8-byte Folded Spill
; CHECK-SD-NEXT: sdiv w10, w9, w8
; CHECK-SD-NEXT: smov w8, v2.b[3]
; CHECK-SD-NEXT: smov w9, v0.b[3]
; CHECK-SD-NEXT: stp w11, w8, [sp, #48] // 8-byte Folded Spill
; CHECK-SD-NEXT: str w10, [sp, #24] // 4-byte Folded Spill
; CHECK-SD-NEXT: sdiv w10, w9, w8
; CHECK-SD-NEXT: smov w8, v2.b[4]
; CHECK-SD-NEXT: str w8, [sp, #28] // 4-byte Folded Spill
; CHECK-SD-NEXT: stp w9, w10, [sp, #56] // 8-byte Folded Spill
; CHECK-SD-NEXT: smov w9, v0.b[4]
; CHECK-SD-NEXT: sdiv w27, w0, w5
; CHECK-SD-NEXT: str w9, [sp, #36] // 4-byte Folded Spill
; CHECK-SD-NEXT: sdiv w10, w9, w8
; CHECK-SD-NEXT: smov w8, v2.b[5]
; CHECK-SD-NEXT: smov w9, v0.b[5]
; CHECK-SD-NEXT: str w8, [sp, #76] // 4-byte Folded Spill
; CHECK-SD-NEXT: str w9, [sp, #84] // 4-byte Folded Spill
; CHECK-SD-NEXT: str w10, [sp, #44] // 4-byte Folded Spill
; CHECK-SD-NEXT: sdiv w10, w9, w8
; CHECK-SD-NEXT: smov w8, v2.b[6]
; CHECK-SD-NEXT: smov w9, v0.b[6]
; CHECK-SD-NEXT: stp w8, w9, [sp, #64] // 8-byte Folded Spill
; CHECK-SD-NEXT: str w10, [sp, #92] // 4-byte Folded Spill
; CHECK-SD-NEXT: sdiv w10, w9, w8
; CHECK-SD-NEXT: smov w8, v2.b[7]
; CHECK-SD-NEXT: smov w9, v0.b[7]
; CHECK-SD-NEXT: stp w8, w9, [sp, #112] // 8-byte Folded Spill
; CHECK-SD-NEXT: sdiv w11, w9, w8
; CHECK-SD-NEXT: smov w8, v2.b[8]
; CHECK-SD-NEXT: smov w9, v0.b[8]
; CHECK-SD-NEXT: str w10, [sp, #72] // 4-byte Folded Spill
; CHECK-SD-NEXT: stp w8, w9, [sp, #100] // 8-byte Folded Spill
; CHECK-SD-NEXT: sdiv w10, w9, w8
; CHECK-SD-NEXT: smov w8, v2.b[9]
; CHECK-SD-NEXT: smov w9, v0.b[9]
; CHECK-SD-NEXT: stp w8, w9, [sp, #136] // 8-byte Folded Spill
; CHECK-SD-NEXT: str w10, [sp, #108] // 4-byte Folded Spill
; CHECK-SD-NEXT: sdiv w10, w9, w8
; CHECK-SD-NEXT: smov w8, v2.b[10]
; CHECK-SD-NEXT: smov w9, v0.b[10]
; CHECK-SD-NEXT: stp w11, w8, [sp, #120] // 8-byte Folded Spill
; CHECK-SD-NEXT: str w10, [sp, #144] // 4-byte Folded Spill
; CHECK-SD-NEXT: sdiv w10, w9, w8
; CHECK-SD-NEXT: smov w8, v2.b[11]
; CHECK-SD-NEXT: stp w9, w10, [sp, #128] // 8-byte Folded Spill
; CHECK-SD-NEXT: smov w9, v0.b[11]
; CHECK-SD-NEXT: sdiv w25, w16, w17
; CHECK-SD-NEXT: stp w8, w9, [sp, #172] // 8-byte Folded Spill
; CHECK-SD-NEXT: sdiv w11, w9, w8
; CHECK-SD-NEXT: smov w8, v2.b[12]
; CHECK-SD-NEXT: smov w9, v0.b[12]
; CHECK-SD-NEXT: str w8, [sp, #152] // 4-byte Folded Spill
; CHECK-SD-NEXT: str w9, [sp, #160] // 4-byte Folded Spill
; CHECK-SD-NEXT: sdiv w10, w9, w8
; CHECK-SD-NEXT: smov w8, v2.b[13]
; CHECK-SD-NEXT: smov w9, v0.b[13]
; CHECK-SD-NEXT: stp w8, w9, [sp, #196] // 8-byte Folded Spill
; CHECK-SD-NEXT: str w10, [sp, #168] // 4-byte Folded Spill
; CHECK-SD-NEXT: sdiv w10, w9, w8
; CHECK-SD-NEXT: smov w8, v2.b[14]
; CHECK-SD-NEXT: smov w9, v0.b[14]
; CHECK-SD-NEXT: stp w11, w8, [sp, #180] // 8-byte Folded Spill
; CHECK-SD-NEXT: smov w11, v1.b[2]
; CHECK-SD-NEXT: str w10, [sp, #204] // 4-byte Folded Spill
; CHECK-SD-NEXT: sdiv w10, w9, w8
; CHECK-SD-NEXT: smov w8, v2.b[15]
; CHECK-SD-NEXT: str w8, [sp, #148] // 4-byte Folded Spill
; CHECK-SD-NEXT: stp w9, w10, [sp, #188] // 8-byte Folded Spill
; CHECK-SD-NEXT: smov w9, v0.b[15]
; CHECK-SD-NEXT: sdiv w22, w11, w12
; CHECK-SD-NEXT: str w9, [sp, #156] // 4-byte Folded Spill
; CHECK-SD-NEXT: sdiv w10, w9, w8
; CHECK-SD-NEXT: str w10, [sp, #164] // 4-byte Folded Spill
; CHECK-SD-NEXT: smov w10, v1.b[0]
; CHECK-SD-NEXT: sdiv w9, w7, w19
; CHECK-SD-NEXT: sdiv w8, w3, w6
; CHECK-SD-NEXT: sdiv w23, w10, w13
; CHECK-SD-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill
; CHECK-SD-NEXT: ldr w8, [sp, #96] // 4-byte Folded Reload
; CHECK-SD-NEXT: ldr w9, [sp, #88] // 4-byte Folded Reload
; CHECK-SD-NEXT: msub w9, w8, w30, w9
; CHECK-SD-NEXT: ldr w8, [sp, #48] // 4-byte Folded Reload
; CHECK-SD-NEXT: ldr w30, [sp, #40] // 4-byte Folded Reload
; CHECK-SD-NEXT: msub w8, w8, w29, w30
; CHECK-SD-NEXT: ldp x29, x30, [sp, #208] // 16-byte Folded Reload
; CHECK-SD-NEXT: fmov s0, w8
; CHECK-SD-NEXT: msub w10, w23, w13, w10
; CHECK-SD-NEXT: sdiv w24, w14, w15
; CHECK-SD-NEXT: msub w13, w27, w5, w0
; CHECK-SD-NEXT: ldr w5, [sp, #16] // 4-byte Folded Reload
; CHECK-SD-NEXT: mov v0.b[1], w9
; CHECK-SD-NEXT: msub w9, w22, w12, w11
; CHECK-SD-NEXT: smov w11, v1.b[10]
; CHECK-SD-NEXT: fmov s2, w10
; CHECK-SD-NEXT: ldp w10, w8, [sp, #20] // 8-byte Folded Reload
; CHECK-SD-NEXT: mov v2.b[1], w13
; CHECK-SD-NEXT: msub w8, w8, w5, w10
; CHECK-SD-NEXT: ldr w5, [sp, #52] // 4-byte Folded Reload
; CHECK-SD-NEXT: smov w10, v3.b[10]
; CHECK-SD-NEXT: sdiv w28, w1, w4
; CHECK-SD-NEXT: ldp w13, w12, [sp, #56] // 8-byte Folded Reload
; CHECK-SD-NEXT: mov v2.b[2], w9
; CHECK-SD-NEXT: mov v0.b[2], w8
; CHECK-SD-NEXT: msub w8, w25, w17, w16
; CHECK-SD-NEXT: ldr w17, [sp, #28] // 4-byte Folded Reload
; CHECK-SD-NEXT: ldr w16, [sp, #36] // 4-byte Folded Reload
; CHECK-SD-NEXT: msub w12, w12, w5, w13
; CHECK-SD-NEXT: ldr w13, [sp, #44] // 4-byte Folded Reload
; CHECK-SD-NEXT: ldr w5, [sp, #136] // 4-byte Folded Reload
; CHECK-SD-NEXT: mov v2.b[3], w8
; CHECK-SD-NEXT: msub w8, w24, w15, w14
; CHECK-SD-NEXT: ldr w15, [sp, #92] // 4-byte Folded Reload
; CHECK-SD-NEXT: mov v0.b[3], w12
; CHECK-SD-NEXT: msub w13, w13, w17, w16
; CHECK-SD-NEXT: ldr w17, [sp, #76] // 4-byte Folded Reload
; CHECK-SD-NEXT: sdiv w26, w18, w2
; CHECK-SD-NEXT: ldr w16, [sp, #84] // 4-byte Folded Reload
; CHECK-SD-NEXT: smov w12, v3.b[11]
; CHECK-SD-NEXT: msub w15, w15, w17, w16
; CHECK-SD-NEXT: smov w14, v1.b[11]
; CHECK-SD-NEXT: mov v2.b[4], w8
; CHECK-SD-NEXT: msub w8, w28, w4, w1
; CHECK-SD-NEXT: ldr w1, [sp, #64] // 4-byte Folded Reload
; CHECK-SD-NEXT: mov v0.b[4], w13
; CHECK-SD-NEXT: ldr w4, [sp, #100] // 4-byte Folded Reload
; CHECK-SD-NEXT: ldp w17, w16, [sp, #68] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x24, x23, [sp, #256] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v2.b[5], w8
; CHECK-SD-NEXT: ldp x28, x27, [sp, #224] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v0.b[5], w15
; CHECK-SD-NEXT: msub w16, w16, w1, w17
; CHECK-SD-NEXT: smov w15, v3.b[12]
; CHECK-SD-NEXT: msub w8, w26, w2, w18
; CHECK-SD-NEXT: ldr w2, [sp, #112] // 4-byte Folded Reload
; CHECK-SD-NEXT: sdiv w0, w20, w21
; CHECK-SD-NEXT: ldp w1, w18, [sp, #116] // 8-byte Folded Reload
; CHECK-SD-NEXT: smov w17, v1.b[12]
; CHECK-SD-NEXT: ldp x26, x25, [sp, #240] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v2.b[6], w8
; CHECK-SD-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload
; CHECK-SD-NEXT: mov v0.b[6], w16
; CHECK-SD-NEXT: msub w18, w18, w2, w1
; CHECK-SD-NEXT: msub w8, w8, w19, w7
; CHECK-SD-NEXT: ldp w2, w1, [sp, #104] // 8-byte Folded Reload
; CHECK-SD-NEXT: mov v0.b[7], w18
; CHECK-SD-NEXT: smov w18, v3.b[13]
; CHECK-SD-NEXT: mov v2.b[7], w8
; CHECK-SD-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload
; CHECK-SD-NEXT: sdiv w9, w11, w10
; CHECK-SD-NEXT: msub w1, w1, w4, w2
; CHECK-SD-NEXT: smov w2, v1.b[13]
; CHECK-SD-NEXT: msub w8, w8, w6, w3
; CHECK-SD-NEXT: ldp w4, w3, [sp, #140] // 8-byte Folded Reload
; CHECK-SD-NEXT: mov v0.b[8], w1
; CHECK-SD-NEXT: mov v2.b[8], w8
; CHECK-SD-NEXT: msub w8, w0, w21, w20
; CHECK-SD-NEXT: msub w3, w3, w5, w4
; CHECK-SD-NEXT: ldr w5, [sp, #124] // 4-byte Folded Reload
; CHECK-SD-NEXT: ldp w4, w1, [sp, #128] // 8-byte Folded Reload
; CHECK-SD-NEXT: sdiv w13, w14, w12
; CHECK-SD-NEXT: ldp x20, x19, [sp, #288] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v2.b[9], w8
; CHECK-SD-NEXT: mov v0.b[9], w3
; CHECK-SD-NEXT: msub w8, w9, w10, w11
; CHECK-SD-NEXT: msub w1, w1, w5, w4
; CHECK-SD-NEXT: ldr w4, [sp, #172] // 4-byte Folded Reload
; CHECK-SD-NEXT: smov w9, v3.b[14]
; CHECK-SD-NEXT: ldp w3, w11, [sp, #176] // 8-byte Folded Reload
; CHECK-SD-NEXT: smov w10, v1.b[14]
; CHECK-SD-NEXT: ldp x22, x21, [sp, #272] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v2.b[10], w8
; CHECK-SD-NEXT: mov v0.b[10], w1
; CHECK-SD-NEXT: ldr w1, [sp, #152] // 4-byte Folded Reload
; CHECK-SD-NEXT: msub w11, w11, w4, w3
; CHECK-SD-NEXT: sdiv w16, w17, w15
; CHECK-SD-NEXT: msub w8, w13, w12, w14
; CHECK-SD-NEXT: ldr w13, [sp, #168] // 4-byte Folded Reload
; CHECK-SD-NEXT: ldr w14, [sp, #160] // 4-byte Folded Reload
; CHECK-SD-NEXT: mov v0.b[11], w11
; CHECK-SD-NEXT: smov w11, v3.b[15]
; CHECK-SD-NEXT: msub w13, w13, w1, w14
; CHECK-SD-NEXT: smov w14, v1.b[15]
; CHECK-SD-NEXT: mov v2.b[11], w8
; CHECK-SD-NEXT: mov v0.b[12], w13
; CHECK-SD-NEXT: sdiv w0, w2, w18
; CHECK-SD-NEXT: msub w8, w16, w15, w17
; CHECK-SD-NEXT: ldr w17, [sp, #196] // 4-byte Folded Reload
; CHECK-SD-NEXT: ldp w16, w15, [sp, #200] // 8-byte Folded Reload
; CHECK-SD-NEXT: mov v2.b[12], w8
; CHECK-SD-NEXT: msub w15, w15, w17, w16
; CHECK-SD-NEXT: ldp w17, w16, [sp, #188] // 8-byte Folded Reload
; CHECK-SD-NEXT: mov v0.b[13], w15
; CHECK-SD-NEXT: sdiv w12, w10, w9
; CHECK-SD-NEXT: msub w8, w0, w18, w2
; CHECK-SD-NEXT: ldr w18, [sp, #184] // 4-byte Folded Reload
; CHECK-SD-NEXT: msub w16, w16, w18, w17
; CHECK-SD-NEXT: mov v2.b[13], w8
; CHECK-SD-NEXT: mov v0.b[14], w16
; CHECK-SD-NEXT: sdiv w13, w14, w11
; CHECK-SD-NEXT: msub w8, w12, w9, w10
; CHECK-SD-NEXT: ldr w9, [sp, #164] // 4-byte Folded Reload
; CHECK-SD-NEXT: ldr w12, [sp, #148] // 4-byte Folded Reload
; CHECK-SD-NEXT: ldr w10, [sp, #156] // 4-byte Folded Reload
; CHECK-SD-NEXT: mov v2.b[14], w8
; CHECK-SD-NEXT: msub w9, w9, w12, w10
; CHECK-SD-NEXT: mov v0.b[15], w9
; CHECK-SD-NEXT: msub w8, w13, w11, w14
; CHECK-SD-NEXT: mov v2.b[15], w8
; CHECK-SD-NEXT: mov v1.16b, v2.16b
; CHECK-SD-NEXT: add sp, sp, #304
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sv32i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-GI-NEXT: .cfi_def_cfa_offset 96
; CHECK-GI-NEXT: .cfi_offset w19, -8
; CHECK-GI-NEXT: .cfi_offset w20, -16
; CHECK-GI-NEXT: .cfi_offset w21, -24
; CHECK-GI-NEXT: .cfi_offset w22, -32
; CHECK-GI-NEXT: .cfi_offset w23, -40
; CHECK-GI-NEXT: .cfi_offset w24, -48
; CHECK-GI-NEXT: .cfi_offset w25, -56
; CHECK-GI-NEXT: .cfi_offset w26, -64
; CHECK-GI-NEXT: .cfi_offset w27, -72
; CHECK-GI-NEXT: .cfi_offset w28, -80
; CHECK-GI-NEXT: .cfi_offset w30, -88
; CHECK-GI-NEXT: .cfi_offset w29, -96
; CHECK-GI-NEXT: sshll v4.8h, v0.8b, #0
; CHECK-GI-NEXT: sshll v5.8h, v2.8b, #0
; CHECK-GI-NEXT: sshll v16.8h, v1.8b, #0
; CHECK-GI-NEXT: sshll v17.8h, v3.8b, #0
; CHECK-GI-NEXT: sshll v6.4s, v4.4h, #0
; CHECK-GI-NEXT: sshll v7.4s, v5.4h, #0
; CHECK-GI-NEXT: sshll2 v4.4s, v4.8h, #0
; CHECK-GI-NEXT: sshll2 v5.4s, v5.8h, #0
; CHECK-GI-NEXT: sshll v18.4s, v16.4h, #0
; CHECK-GI-NEXT: sshll v19.4s, v17.4h, #0
; CHECK-GI-NEXT: sshll2 v16.4s, v16.8h, #0
; CHECK-GI-NEXT: sshll2 v17.4s, v17.8h, #0
; CHECK-GI-NEXT: fmov w8, s6
; CHECK-GI-NEXT: fmov w9, s7
; CHECK-GI-NEXT: mov w12, v7.s[3]
; CHECK-GI-NEXT: fmov w13, s5
; CHECK-GI-NEXT: mov w16, v5.s[3]
; CHECK-GI-NEXT: fmov w6, s19
; CHECK-GI-NEXT: mov w7, v19.s[3]
; CHECK-GI-NEXT: fmov w21, s17
; CHECK-GI-NEXT: mov w23, v17.s[3]
; CHECK-GI-NEXT: sdiv w11, w8, w9
; CHECK-GI-NEXT: mov w8, v6.s[1]
; CHECK-GI-NEXT: mov w9, v7.s[1]
; CHECK-GI-NEXT: sdiv w10, w8, w9
; CHECK-GI-NEXT: mov w8, v6.s[2]
; CHECK-GI-NEXT: mov w9, v7.s[2]
; CHECK-GI-NEXT: mov v20.s[0], w11
; CHECK-GI-NEXT: sdiv w9, w8, w9
; CHECK-GI-NEXT: mov w8, v6.s[3]
; CHECK-GI-NEXT: sshll2 v6.8h, v0.16b, #0
; CHECK-GI-NEXT: mov v20.s[1], w10
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: sshll v28.4s, v0.4h, #0
; CHECK-GI-NEXT: sshll2 v0.4s, v0.8h, #0
; CHECK-GI-NEXT: sdiv w8, w8, w12
; CHECK-GI-NEXT: fmov w12, s4
; CHECK-GI-NEXT: mov v20.s[2], w9
; CHECK-GI-NEXT: sdiv w15, w12, w13
; CHECK-GI-NEXT: mov w12, v4.s[1]
; CHECK-GI-NEXT: mov w13, v5.s[1]
; CHECK-GI-NEXT: mov v20.s[3], w8
; CHECK-GI-NEXT: sdiv w14, w12, w13
; CHECK-GI-NEXT: mov w12, v4.s[2]
; CHECK-GI-NEXT: mov w13, v5.s[2]
; CHECK-GI-NEXT: sshll v5.4s, v6.4h, #0
; CHECK-GI-NEXT: mov v21.s[0], w15
; CHECK-GI-NEXT: sdiv w13, w12, w13
; CHECK-GI-NEXT: mov w12, v4.s[3]
; CHECK-GI-NEXT: sshll2 v4.8h, v2.16b, #0
; CHECK-GI-NEXT: mov v21.s[1], w14
; CHECK-GI-NEXT: sshll v2.8h, v2.8b, #0
; CHECK-GI-NEXT: sshll v7.4s, v4.4h, #0
; CHECK-GI-NEXT: sshll v30.4s, v2.4h, #0
; CHECK-GI-NEXT: sshll2 v2.4s, v2.8h, #0
; CHECK-GI-NEXT: fmov w17, s7
; CHECK-GI-NEXT: mls v28.4s, v20.4s, v30.4s
; CHECK-GI-NEXT: sdiv w12, w12, w16
; CHECK-GI-NEXT: fmov w16, s5
; CHECK-GI-NEXT: mov v21.s[2], w13
; CHECK-GI-NEXT: sdiv w1, w16, w17
; CHECK-GI-NEXT: mov w16, v5.s[1]
; CHECK-GI-NEXT: mov w17, v7.s[1]
; CHECK-GI-NEXT: mov v21.s[3], w12
; CHECK-GI-NEXT: mls v0.4s, v21.4s, v2.4s
; CHECK-GI-NEXT: sdiv w0, w16, w17
; CHECK-GI-NEXT: mov w16, v5.s[2]
; CHECK-GI-NEXT: mov w17, v7.s[2]
; CHECK-GI-NEXT: mov v22.s[0], w1
; CHECK-GI-NEXT: uzp1 v0.8h, v28.8h, v0.8h
; CHECK-GI-NEXT: sdiv w18, w16, w17
; CHECK-GI-NEXT: mov w16, v5.s[3]
; CHECK-GI-NEXT: mov w17, v7.s[3]
; CHECK-GI-NEXT: sshll2 v5.4s, v6.8h, #0
; CHECK-GI-NEXT: sshll2 v7.4s, v4.8h, #0
; CHECK-GI-NEXT: mov v22.s[1], w0
; CHECK-GI-NEXT: sshll v6.4s, v6.4h, #0
; CHECK-GI-NEXT: sshll v4.4s, v4.4h, #0
; CHECK-GI-NEXT: fmov w2, s7
; CHECK-GI-NEXT: mov w4, v7.s[3]
; CHECK-GI-NEXT: sdiv w16, w16, w17
; CHECK-GI-NEXT: fmov w17, s5
; CHECK-GI-NEXT: mov v22.s[2], w18
; CHECK-GI-NEXT: sdiv w5, w17, w2
; CHECK-GI-NEXT: mov w17, v5.s[1]
; CHECK-GI-NEXT: mov w2, v7.s[1]
; CHECK-GI-NEXT: mov v22.s[3], w16
; CHECK-GI-NEXT: mls v6.4s, v22.4s, v4.4s
; CHECK-GI-NEXT: sdiv w3, w17, w2
; CHECK-GI-NEXT: mov w17, v5.s[2]
; CHECK-GI-NEXT: mov w2, v7.s[2]
; CHECK-GI-NEXT: mov v23.s[0], w5
; CHECK-GI-NEXT: sdiv w2, w17, w2
; CHECK-GI-NEXT: mov w17, v5.s[3]
; CHECK-GI-NEXT: mov v23.s[1], w3
; CHECK-GI-NEXT: sdiv w17, w17, w4
; CHECK-GI-NEXT: fmov w4, s18
; CHECK-GI-NEXT: mov v23.s[2], w2
; CHECK-GI-NEXT: sdiv w20, w4, w6
; CHECK-GI-NEXT: mov w4, v18.s[1]
; CHECK-GI-NEXT: mov w6, v19.s[1]
; CHECK-GI-NEXT: mov v23.s[3], w17
; CHECK-GI-NEXT: mls v5.4s, v23.4s, v7.4s
; CHECK-GI-NEXT: sdiv w19, w4, w6
; CHECK-GI-NEXT: mov w4, v18.s[2]
; CHECK-GI-NEXT: mov w6, v19.s[2]
; CHECK-GI-NEXT: mov v24.s[0], w20
; CHECK-GI-NEXT: uzp1 v2.8h, v6.8h, v5.8h
; CHECK-GI-NEXT: uzp1 v0.16b, v0.16b, v2.16b
; CHECK-GI-NEXT: sdiv w6, w4, w6
; CHECK-GI-NEXT: mov w4, v18.s[3]
; CHECK-GI-NEXT: mov v24.s[1], w19
; CHECK-GI-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-GI-NEXT: sdiv w4, w4, w7
; CHECK-GI-NEXT: fmov w7, s16
; CHECK-GI-NEXT: mov v24.s[2], w6
; CHECK-GI-NEXT: sdiv w24, w7, w21
; CHECK-GI-NEXT: mov w7, v16.s[1]
; CHECK-GI-NEXT: mov w21, v17.s[1]
; CHECK-GI-NEXT: mov v24.s[3], w4
; CHECK-GI-NEXT: sdiv w22, w7, w21
; CHECK-GI-NEXT: mov w7, v16.s[2]
; CHECK-GI-NEXT: mov w21, v17.s[2]
; CHECK-GI-NEXT: sshll2 v17.8h, v1.16b, #0
; CHECK-GI-NEXT: mov v25.s[0], w24
; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: sshll v18.4s, v17.4h, #0
; CHECK-GI-NEXT: sshll v29.4s, v1.4h, #0
; CHECK-GI-NEXT: sshll2 v1.4s, v1.8h, #0
; CHECK-GI-NEXT: sdiv w21, w7, w21
; CHECK-GI-NEXT: mov w7, v16.s[3]
; CHECK-GI-NEXT: sshll2 v16.8h, v3.16b, #0
; CHECK-GI-NEXT: mov v25.s[1], w22
; CHECK-GI-NEXT: sshll v3.8h, v3.8b, #0
; CHECK-GI-NEXT: sshll v19.4s, v16.4h, #0
; CHECK-GI-NEXT: sshll v31.4s, v3.4h, #0
; CHECK-GI-NEXT: sshll2 v3.4s, v3.8h, #0
; CHECK-GI-NEXT: fmov w25, s19
; CHECK-GI-NEXT: mov w26, v19.s[1]
; CHECK-GI-NEXT: mov w27, v19.s[2]
; CHECK-GI-NEXT: mov w28, v19.s[3]
; CHECK-GI-NEXT: sshll2 v19.4s, v16.8h, #0
; CHECK-GI-NEXT: sshll v16.4s, v16.4h, #0
; CHECK-GI-NEXT: sdiv w7, w7, w23
; CHECK-GI-NEXT: fmov w23, s18
; CHECK-GI-NEXT: mov v25.s[2], w21
; CHECK-GI-NEXT: mls v29.4s, v24.4s, v31.4s
; CHECK-GI-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
; CHECK-GI-NEXT: fmov w29, s19
; CHECK-GI-NEXT: mov w30, v19.s[1]
; CHECK-GI-NEXT: mov w15, v19.s[2]
; CHECK-GI-NEXT: sdiv w25, w23, w25
; CHECK-GI-NEXT: mov w23, v18.s[1]
; CHECK-GI-NEXT: mov v25.s[3], w7
; CHECK-GI-NEXT: mls v1.4s, v25.4s, v3.4s
; CHECK-GI-NEXT: sdiv w26, w23, w26
; CHECK-GI-NEXT: mov w23, v18.s[2]
; CHECK-GI-NEXT: mov v26.s[0], w25
; CHECK-GI-NEXT: uzp1 v1.8h, v29.8h, v1.8h
; CHECK-GI-NEXT: sdiv w27, w23, w27
; CHECK-GI-NEXT: mov w23, v18.s[3]
; CHECK-GI-NEXT: sshll2 v18.4s, v17.8h, #0
; CHECK-GI-NEXT: mov v26.s[1], w26
; CHECK-GI-NEXT: sshll v17.4s, v17.4h, #0
; CHECK-GI-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov w11, v18.s[2]
; CHECK-GI-NEXT: mov w9, v18.s[3]
; CHECK-GI-NEXT: sdiv w23, w23, w28
; CHECK-GI-NEXT: fmov w28, s18
; CHECK-GI-NEXT: mov v26.s[2], w27
; CHECK-GI-NEXT: sdiv w28, w28, w29
; CHECK-GI-NEXT: mov w29, v18.s[1]
; CHECK-GI-NEXT: mov v26.s[3], w23
; CHECK-GI-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: mls v17.4s, v26.4s, v16.4s
; CHECK-GI-NEXT: sdiv w29, w29, w30
; CHECK-GI-NEXT: mov v27.s[0], w28
; CHECK-GI-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: sdiv w10, w11, w15
; CHECK-GI-NEXT: mov w11, v19.s[3]
; CHECK-GI-NEXT: mov v27.s[1], w29
; CHECK-GI-NEXT: sdiv w8, w9, w11
; CHECK-GI-NEXT: mov v27.s[2], w10
; CHECK-GI-NEXT: mov v27.s[3], w8
; CHECK-GI-NEXT: mls v18.4s, v27.4s, v19.4s
; CHECK-GI-NEXT: uzp1 v3.8h, v17.8h, v18.8h
; CHECK-GI-NEXT: uzp1 v1.16b, v1.16b, v3.16b
; CHECK-GI-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload
; CHECK-GI-NEXT: ret
entry:
%s = srem <32 x i8> %d, %e
ret <32 x i8> %s
}
define <2 x i8> @uv2i8(<2 x i8> %d, <2 x i8> %e) {
; CHECK-SD-LABEL: uv2i8:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: movi d2, #0x0000ff000000ff
; CHECK-SD-NEXT: and v0.8b, v0.8b, v2.8b
; CHECK-SD-NEXT: and v1.8b, v1.8b, v2.8b
; CHECK-SD-NEXT: fmov w8, s1
; CHECK-SD-NEXT: fmov w9, s0
; CHECK-SD-NEXT: mov w11, v1.s[1]
; CHECK-SD-NEXT: mov w12, v0.s[1]
; CHECK-SD-NEXT: udiv w10, w9, w8
; CHECK-SD-NEXT: udiv w13, w12, w11
; CHECK-SD-NEXT: msub w8, w10, w8, w9
; CHECK-SD-NEXT: fmov s0, w8
; CHECK-SD-NEXT: msub w9, w13, w11, w12
; CHECK-SD-NEXT: mov v0.s[1], w9
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv2i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: movi d2, #0x0000ff000000ff
; CHECK-GI-NEXT: and v0.8b, v0.8b, v2.8b
; CHECK-GI-NEXT: and v1.8b, v1.8b, v2.8b
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: fmov w9, s1
; CHECK-GI-NEXT: mov w10, v1.s[1]
; CHECK-GI-NEXT: udiv w8, w8, w9
; CHECK-GI-NEXT: mov w9, v0.s[1]
; CHECK-GI-NEXT: udiv w9, w9, w10
; CHECK-GI-NEXT: mov v2.s[0], w8
; CHECK-GI-NEXT: mov v2.s[1], w9
; CHECK-GI-NEXT: mls v0.2s, v2.2s, v1.2s
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
entry:
%s = urem <2 x i8> %d, %e
ret <2 x i8> %s
}
define <3 x i8> @uv3i8(<3 x i8> %d, <3 x i8> %e) {
; CHECK-SD-LABEL: uv3i8:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: and w8, w3, #0xff
; CHECK-SD-NEXT: and w9, w0, #0xff
; CHECK-SD-NEXT: and w11, w4, #0xff
; CHECK-SD-NEXT: and w12, w1, #0xff
; CHECK-SD-NEXT: and w14, w5, #0xff
; CHECK-SD-NEXT: and w15, w2, #0xff
; CHECK-SD-NEXT: udiv w10, w9, w8
; CHECK-SD-NEXT: udiv w13, w12, w11
; CHECK-SD-NEXT: msub w0, w10, w8, w9
; CHECK-SD-NEXT: udiv w16, w15, w14
; CHECK-SD-NEXT: msub w1, w13, w11, w12
; CHECK-SD-NEXT: msub w2, w16, w14, w15
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv3i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: and w8, w0, #0xff
; CHECK-GI-NEXT: and w9, w3, #0xff
; CHECK-GI-NEXT: and w11, w1, #0xff
; CHECK-GI-NEXT: and w12, w4, #0xff
; CHECK-GI-NEXT: and w14, w2, #0xff
; CHECK-GI-NEXT: and w15, w5, #0xff
; CHECK-GI-NEXT: udiv w10, w8, w9
; CHECK-GI-NEXT: udiv w13, w11, w12
; CHECK-GI-NEXT: msub w0, w10, w9, w8
; CHECK-GI-NEXT: udiv w16, w14, w15
; CHECK-GI-NEXT: msub w1, w13, w12, w11
; CHECK-GI-NEXT: msub w2, w16, w15, w14
; CHECK-GI-NEXT: ret
entry:
%s = urem <3 x i8> %d, %e
ret <3 x i8> %s
}
define <4 x i8> @uv4i8(<4 x i8> %d, <4 x i8> %e) {
; CHECK-SD-LABEL: uv4i8:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
; CHECK-SD-NEXT: bic v1.4h, #255, lsl #8
; CHECK-SD-NEXT: umov w11, v1.h[0]
; CHECK-SD-NEXT: umov w12, v0.h[0]
; CHECK-SD-NEXT: umov w8, v1.h[1]
; CHECK-SD-NEXT: umov w9, v0.h[1]
; CHECK-SD-NEXT: umov w14, v1.h[2]
; CHECK-SD-NEXT: umov w15, v0.h[2]
; CHECK-SD-NEXT: umov w17, v1.h[3]
; CHECK-SD-NEXT: umov w18, v0.h[3]
; CHECK-SD-NEXT: udiv w13, w12, w11
; CHECK-SD-NEXT: udiv w10, w9, w8
; CHECK-SD-NEXT: msub w11, w13, w11, w12
; CHECK-SD-NEXT: fmov s0, w11
; CHECK-SD-NEXT: udiv w16, w15, w14
; CHECK-SD-NEXT: msub w8, w10, w8, w9
; CHECK-SD-NEXT: mov v0.h[1], w8
; CHECK-SD-NEXT: udiv w9, w18, w17
; CHECK-SD-NEXT: msub w8, w16, w14, w15
; CHECK-SD-NEXT: mov v0.h[2], w8
; CHECK-SD-NEXT: msub w8, w9, w17, w18
; CHECK-SD-NEXT: mov v0.h[3], w8
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv4i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: movi v2.2d, #0x0000ff000000ff
; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: fmov w9, s1
; CHECK-GI-NEXT: mov w10, v1.s[1]
; CHECK-GI-NEXT: mov w11, v1.s[2]
; CHECK-GI-NEXT: mov w12, v1.s[3]
; CHECK-GI-NEXT: udiv w8, w8, w9
; CHECK-GI-NEXT: mov w9, v0.s[1]
; CHECK-GI-NEXT: udiv w9, w9, w10
; CHECK-GI-NEXT: mov w10, v0.s[2]
; CHECK-GI-NEXT: mov v2.s[0], w8
; CHECK-GI-NEXT: udiv w10, w10, w11
; CHECK-GI-NEXT: mov w11, v0.s[3]
; CHECK-GI-NEXT: mov v2.s[1], w9
; CHECK-GI-NEXT: udiv w8, w11, w12
; CHECK-GI-NEXT: mov v2.s[2], w10
; CHECK-GI-NEXT: mov v2.s[3], w8
; CHECK-GI-NEXT: mls v0.4s, v2.4s, v1.4s
; CHECK-GI-NEXT: xtn v0.4h, v0.4s
; CHECK-GI-NEXT: ret
entry:
%s = urem <4 x i8> %d, %e
ret <4 x i8> %s
}
define <8 x i8> @uv8i8(<8 x i8> %d, <8 x i8> %e) {
; CHECK-SD-LABEL: uv8i8:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: umov w11, v1.b[0]
; CHECK-SD-NEXT: umov w12, v0.b[0]
; CHECK-SD-NEXT: umov w8, v1.b[1]
; CHECK-SD-NEXT: umov w9, v0.b[1]
; CHECK-SD-NEXT: umov w14, v1.b[2]
; CHECK-SD-NEXT: umov w15, v0.b[2]
; CHECK-SD-NEXT: umov w17, v1.b[3]
; CHECK-SD-NEXT: umov w18, v0.b[3]
; CHECK-SD-NEXT: umov w1, v1.b[4]
; CHECK-SD-NEXT: umov w2, v0.b[4]
; CHECK-SD-NEXT: umov w4, v1.b[5]
; CHECK-SD-NEXT: umov w5, v0.b[5]
; CHECK-SD-NEXT: udiv w13, w12, w11
; CHECK-SD-NEXT: udiv w10, w9, w8
; CHECK-SD-NEXT: msub w11, w13, w11, w12
; CHECK-SD-NEXT: umov w13, v1.b[7]
; CHECK-SD-NEXT: fmov s2, w11
; CHECK-SD-NEXT: umov w11, v0.b[6]
; CHECK-SD-NEXT: udiv w16, w15, w14
; CHECK-SD-NEXT: msub w8, w10, w8, w9
; CHECK-SD-NEXT: umov w10, v1.b[6]
; CHECK-SD-NEXT: mov v2.b[1], w8
; CHECK-SD-NEXT: udiv w0, w18, w17
; CHECK-SD-NEXT: msub w8, w16, w14, w15
; CHECK-SD-NEXT: umov w14, v0.b[7]
; CHECK-SD-NEXT: mov v2.b[2], w8
; CHECK-SD-NEXT: udiv w3, w2, w1
; CHECK-SD-NEXT: msub w8, w0, w17, w18
; CHECK-SD-NEXT: mov v2.b[3], w8
; CHECK-SD-NEXT: udiv w9, w5, w4
; CHECK-SD-NEXT: msub w8, w3, w1, w2
; CHECK-SD-NEXT: mov v2.b[4], w8
; CHECK-SD-NEXT: udiv w12, w11, w10
; CHECK-SD-NEXT: msub w8, w9, w4, w5
; CHECK-SD-NEXT: mov v2.b[5], w8
; CHECK-SD-NEXT: udiv w9, w14, w13
; CHECK-SD-NEXT: msub w8, w12, w10, w11
; CHECK-SD-NEXT: mov v2.b[6], w8
; CHECK-SD-NEXT: msub w8, w9, w13, w14
; CHECK-SD-NEXT: mov v2.b[7], w8
; CHECK-SD-NEXT: fmov d0, d2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv8i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: ushll v2.4s, v0.4h, #0
; CHECK-GI-NEXT: ushll v3.4s, v1.4h, #0
; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0
; CHECK-GI-NEXT: fmov w8, s2
; CHECK-GI-NEXT: fmov w9, s3
; CHECK-GI-NEXT: mov w10, v3.s[1]
; CHECK-GI-NEXT: mov w11, v3.s[2]
; CHECK-GI-NEXT: mov w12, v3.s[3]
; CHECK-GI-NEXT: fmov w13, s1
; CHECK-GI-NEXT: mov w14, v1.s[1]
; CHECK-GI-NEXT: mov w15, v1.s[2]
; CHECK-GI-NEXT: udiv w8, w8, w9
; CHECK-GI-NEXT: mov w9, v2.s[1]
; CHECK-GI-NEXT: udiv w9, w9, w10
; CHECK-GI-NEXT: mov w10, v2.s[2]
; CHECK-GI-NEXT: mov v4.s[0], w8
; CHECK-GI-NEXT: mov w8, v0.s[3]
; CHECK-GI-NEXT: udiv w10, w10, w11
; CHECK-GI-NEXT: mov w11, v2.s[3]
; CHECK-GI-NEXT: mov v4.s[1], w9
; CHECK-GI-NEXT: udiv w11, w11, w12
; CHECK-GI-NEXT: fmov w12, s0
; CHECK-GI-NEXT: mov v4.s[2], w10
; CHECK-GI-NEXT: udiv w12, w12, w13
; CHECK-GI-NEXT: mov w13, v0.s[1]
; CHECK-GI-NEXT: mov v4.s[3], w11
; CHECK-GI-NEXT: mls v2.4s, v4.4s, v3.4s
; CHECK-GI-NEXT: udiv w13, w13, w14
; CHECK-GI-NEXT: mov w14, v0.s[2]
; CHECK-GI-NEXT: mov v5.s[0], w12
; CHECK-GI-NEXT: mov w12, v1.s[3]
; CHECK-GI-NEXT: udiv w14, w14, w15
; CHECK-GI-NEXT: mov v5.s[1], w13
; CHECK-GI-NEXT: udiv w8, w8, w12
; CHECK-GI-NEXT: mov v5.s[2], w14
; CHECK-GI-NEXT: mov v5.s[3], w8
; CHECK-GI-NEXT: mls v0.4s, v5.4s, v1.4s
; CHECK-GI-NEXT: uzp1 v0.8h, v2.8h, v0.8h
; CHECK-GI-NEXT: xtn v0.8b, v0.8h
; CHECK-GI-NEXT: ret
entry:
%s = urem <8 x i8> %d, %e
ret <8 x i8> %s
}
define <16 x i8> @uv16i8(<16 x i8> %d, <16 x i8> %e) {
; CHECK-SD-LABEL: uv16i8:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: stp x28, x27, [sp, #-80]! // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
; CHECK-SD-NEXT: .cfi_def_cfa_offset 80
; CHECK-SD-NEXT: .cfi_offset w19, -8
; CHECK-SD-NEXT: .cfi_offset w20, -16
; CHECK-SD-NEXT: .cfi_offset w21, -24
; CHECK-SD-NEXT: .cfi_offset w22, -32
; CHECK-SD-NEXT: .cfi_offset w23, -40
; CHECK-SD-NEXT: .cfi_offset w24, -48
; CHECK-SD-NEXT: .cfi_offset w25, -56
; CHECK-SD-NEXT: .cfi_offset w26, -64
; CHECK-SD-NEXT: .cfi_offset w27, -72
; CHECK-SD-NEXT: .cfi_offset w28, -80
; CHECK-SD-NEXT: umov w11, v1.b[0]
; CHECK-SD-NEXT: umov w12, v0.b[0]
; CHECK-SD-NEXT: umov w8, v1.b[1]
; CHECK-SD-NEXT: umov w9, v0.b[1]
; CHECK-SD-NEXT: umov w14, v1.b[2]
; CHECK-SD-NEXT: umov w15, v0.b[2]
; CHECK-SD-NEXT: umov w17, v1.b[3]
; CHECK-SD-NEXT: umov w18, v0.b[3]
; CHECK-SD-NEXT: umov w1, v1.b[4]
; CHECK-SD-NEXT: umov w2, v0.b[4]
; CHECK-SD-NEXT: umov w4, v1.b[5]
; CHECK-SD-NEXT: umov w5, v0.b[5]
; CHECK-SD-NEXT: udiv w13, w12, w11
; CHECK-SD-NEXT: umov w7, v1.b[6]
; CHECK-SD-NEXT: umov w19, v0.b[6]
; CHECK-SD-NEXT: umov w21, v1.b[7]
; CHECK-SD-NEXT: umov w22, v0.b[7]
; CHECK-SD-NEXT: umov w24, v1.b[8]
; CHECK-SD-NEXT: umov w25, v0.b[8]
; CHECK-SD-NEXT: umov w27, v1.b[9]
; CHECK-SD-NEXT: umov w28, v0.b[9]
; CHECK-SD-NEXT: udiv w10, w9, w8
; CHECK-SD-NEXT: msub w11, w13, w11, w12
; CHECK-SD-NEXT: umov w13, v1.b[11]
; CHECK-SD-NEXT: fmov s2, w11
; CHECK-SD-NEXT: umov w11, v0.b[10]
; CHECK-SD-NEXT: udiv w16, w15, w14
; CHECK-SD-NEXT: msub w8, w10, w8, w9
; CHECK-SD-NEXT: umov w10, v1.b[10]
; CHECK-SD-NEXT: mov v2.b[1], w8
; CHECK-SD-NEXT: udiv w0, w18, w17
; CHECK-SD-NEXT: msub w8, w16, w14, w15
; CHECK-SD-NEXT: umov w14, v0.b[11]
; CHECK-SD-NEXT: umov w16, v1.b[12]
; CHECK-SD-NEXT: mov v2.b[2], w8
; CHECK-SD-NEXT: udiv w3, w2, w1
; CHECK-SD-NEXT: msub w8, w0, w17, w18
; CHECK-SD-NEXT: umov w17, v0.b[12]
; CHECK-SD-NEXT: umov w0, v1.b[13]
; CHECK-SD-NEXT: mov v2.b[3], w8
; CHECK-SD-NEXT: udiv w6, w5, w4
; CHECK-SD-NEXT: msub w8, w3, w1, w2
; CHECK-SD-NEXT: umov w1, v0.b[13]
; CHECK-SD-NEXT: mov v2.b[4], w8
; CHECK-SD-NEXT: udiv w20, w19, w7
; CHECK-SD-NEXT: msub w8, w6, w4, w5
; CHECK-SD-NEXT: mov v2.b[5], w8
; CHECK-SD-NEXT: udiv w23, w22, w21
; CHECK-SD-NEXT: msub w8, w20, w7, w19
; CHECK-SD-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v2.b[6], w8
; CHECK-SD-NEXT: udiv w26, w25, w24
; CHECK-SD-NEXT: msub w8, w23, w21, w22
; CHECK-SD-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v2.b[7], w8
; CHECK-SD-NEXT: udiv w9, w28, w27
; CHECK-SD-NEXT: msub w8, w26, w24, w25
; CHECK-SD-NEXT: ldp x24, x23, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x26, x25, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v2.b[8], w8
; CHECK-SD-NEXT: udiv w12, w11, w10
; CHECK-SD-NEXT: msub w8, w9, w27, w28
; CHECK-SD-NEXT: mov v2.b[9], w8
; CHECK-SD-NEXT: udiv w15, w14, w13
; CHECK-SD-NEXT: msub w8, w12, w10, w11
; CHECK-SD-NEXT: umov w10, v1.b[14]
; CHECK-SD-NEXT: umov w11, v0.b[14]
; CHECK-SD-NEXT: mov v2.b[10], w8
; CHECK-SD-NEXT: udiv w18, w17, w16
; CHECK-SD-NEXT: msub w8, w15, w13, w14
; CHECK-SD-NEXT: umov w13, v1.b[15]
; CHECK-SD-NEXT: umov w14, v0.b[15]
; CHECK-SD-NEXT: mov v2.b[11], w8
; CHECK-SD-NEXT: udiv w9, w1, w0
; CHECK-SD-NEXT: msub w8, w18, w16, w17
; CHECK-SD-NEXT: mov v2.b[12], w8
; CHECK-SD-NEXT: udiv w12, w11, w10
; CHECK-SD-NEXT: msub w8, w9, w0, w1
; CHECK-SD-NEXT: mov v2.b[13], w8
; CHECK-SD-NEXT: udiv w9, w14, w13
; CHECK-SD-NEXT: msub w8, w12, w10, w11
; CHECK-SD-NEXT: mov v2.b[14], w8
; CHECK-SD-NEXT: msub w8, w9, w13, w14
; CHECK-SD-NEXT: mov v2.b[15], w8
; CHECK-SD-NEXT: mov v0.16b, v2.16b
; CHECK-SD-NEXT: ldp x28, x27, [sp], #80 // 16-byte Folded Reload
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv16i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ushll v4.8h, v0.8b, #0
; CHECK-GI-NEXT: ushll v5.8h, v1.8b, #0
; CHECK-GI-NEXT: ushll2 v6.8h, v0.16b, #0
; CHECK-GI-NEXT: ushll2 v7.8h, v1.16b, #0
; CHECK-GI-NEXT: ushll v2.4s, v4.4h, #0
; CHECK-GI-NEXT: ushll v3.4s, v5.4h, #0
; CHECK-GI-NEXT: ushll2 v4.4s, v4.8h, #0
; CHECK-GI-NEXT: ushll2 v5.4s, v5.8h, #0
; CHECK-GI-NEXT: ushll v0.4s, v6.4h, #0
; CHECK-GI-NEXT: ushll v1.4s, v7.4h, #0
; CHECK-GI-NEXT: ushll2 v6.4s, v6.8h, #0
; CHECK-GI-NEXT: ushll2 v7.4s, v7.8h, #0
; CHECK-GI-NEXT: fmov w8, s2
; CHECK-GI-NEXT: fmov w9, s3
; CHECK-GI-NEXT: mov w12, v3.s[3]
; CHECK-GI-NEXT: fmov w13, s5
; CHECK-GI-NEXT: mov w16, v5.s[3]
; CHECK-GI-NEXT: fmov w17, s1
; CHECK-GI-NEXT: mov w18, v1.s[1]
; CHECK-GI-NEXT: mov w0, v1.s[2]
; CHECK-GI-NEXT: mov w1, v1.s[3]
; CHECK-GI-NEXT: udiv w11, w8, w9
; CHECK-GI-NEXT: mov w8, v2.s[1]
; CHECK-GI-NEXT: mov w9, v3.s[1]
; CHECK-GI-NEXT: fmov w2, s7
; CHECK-GI-NEXT: mov w3, v7.s[1]
; CHECK-GI-NEXT: mov w4, v7.s[2]
; CHECK-GI-NEXT: udiv w10, w8, w9
; CHECK-GI-NEXT: mov w8, v2.s[2]
; CHECK-GI-NEXT: mov w9, v3.s[2]
; CHECK-GI-NEXT: mov v16.s[0], w11
; CHECK-GI-NEXT: mov w11, v6.s[3]
; CHECK-GI-NEXT: udiv w9, w8, w9
; CHECK-GI-NEXT: mov w8, v2.s[3]
; CHECK-GI-NEXT: mov v16.s[1], w10
; CHECK-GI-NEXT: udiv w8, w8, w12
; CHECK-GI-NEXT: fmov w12, s4
; CHECK-GI-NEXT: mov v16.s[2], w9
; CHECK-GI-NEXT: udiv w14, w12, w13
; CHECK-GI-NEXT: mov w12, v4.s[1]
; CHECK-GI-NEXT: mov w13, v5.s[1]
; CHECK-GI-NEXT: mov v16.s[3], w8
; CHECK-GI-NEXT: mls v2.4s, v16.4s, v3.4s
; CHECK-GI-NEXT: udiv w15, w12, w13
; CHECK-GI-NEXT: mov w12, v4.s[2]
; CHECK-GI-NEXT: mov w13, v5.s[2]
; CHECK-GI-NEXT: mov v17.s[0], w14
; CHECK-GI-NEXT: mov w14, v7.s[3]
; CHECK-GI-NEXT: udiv w13, w12, w13
; CHECK-GI-NEXT: mov w12, v4.s[3]
; CHECK-GI-NEXT: mov v17.s[1], w15
; CHECK-GI-NEXT: udiv w12, w12, w16
; CHECK-GI-NEXT: fmov w16, s0
; CHECK-GI-NEXT: mov v17.s[2], w13
; CHECK-GI-NEXT: udiv w16, w16, w17
; CHECK-GI-NEXT: mov w17, v0.s[1]
; CHECK-GI-NEXT: mov v17.s[3], w12
; CHECK-GI-NEXT: mls v4.4s, v17.4s, v5.4s
; CHECK-GI-NEXT: udiv w17, w17, w18
; CHECK-GI-NEXT: mov w18, v0.s[2]
; CHECK-GI-NEXT: mov v18.s[0], w16
; CHECK-GI-NEXT: udiv w18, w18, w0
; CHECK-GI-NEXT: mov w0, v0.s[3]
; CHECK-GI-NEXT: mov v18.s[1], w17
; CHECK-GI-NEXT: udiv w0, w0, w1
; CHECK-GI-NEXT: fmov w1, s6
; CHECK-GI-NEXT: mov v18.s[2], w18
; CHECK-GI-NEXT: udiv w1, w1, w2
; CHECK-GI-NEXT: mov w2, v6.s[1]
; CHECK-GI-NEXT: mov v18.s[3], w0
; CHECK-GI-NEXT: mls v0.4s, v18.4s, v1.4s
; CHECK-GI-NEXT: uzp1 v1.8h, v2.8h, v4.8h
; CHECK-GI-NEXT: udiv w2, w2, w3
; CHECK-GI-NEXT: mov w3, v6.s[2]
; CHECK-GI-NEXT: mov v19.s[0], w1
; CHECK-GI-NEXT: udiv w3, w3, w4
; CHECK-GI-NEXT: mov v19.s[1], w2
; CHECK-GI-NEXT: udiv w10, w11, w14
; CHECK-GI-NEXT: mov v19.s[2], w3
; CHECK-GI-NEXT: mov v19.s[3], w10
; CHECK-GI-NEXT: mls v6.4s, v19.4s, v7.4s
; CHECK-GI-NEXT: uzp1 v0.8h, v0.8h, v6.8h
; CHECK-GI-NEXT: uzp1 v0.16b, v1.16b, v0.16b
; CHECK-GI-NEXT: ret
entry:
%s = urem <16 x i8> %d, %e
ret <16 x i8> %s
}
define <32 x i8> @uv32i8(<32 x i8> %d, <32 x i8> %e) {
; CHECK-SD-LABEL: uv32i8:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: sub sp, sp, #304
; CHECK-SD-NEXT: stp x29, x30, [sp, #208] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x28, x27, [sp, #224] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x26, x25, [sp, #240] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x24, x23, [sp, #256] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x22, x21, [sp, #272] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x20, x19, [sp, #288] // 16-byte Folded Spill
; CHECK-SD-NEXT: .cfi_def_cfa_offset 304
; CHECK-SD-NEXT: .cfi_offset w19, -8
; CHECK-SD-NEXT: .cfi_offset w20, -16
; CHECK-SD-NEXT: .cfi_offset w21, -24
; CHECK-SD-NEXT: .cfi_offset w22, -32
; CHECK-SD-NEXT: .cfi_offset w23, -40
; CHECK-SD-NEXT: .cfi_offset w24, -48
; CHECK-SD-NEXT: .cfi_offset w25, -56
; CHECK-SD-NEXT: .cfi_offset w26, -64
; CHECK-SD-NEXT: .cfi_offset w27, -72
; CHECK-SD-NEXT: .cfi_offset w28, -80
; CHECK-SD-NEXT: .cfi_offset w30, -88
; CHECK-SD-NEXT: .cfi_offset w29, -96
; CHECK-SD-NEXT: umov w8, v2.b[1]
; CHECK-SD-NEXT: umov w9, v0.b[1]
; CHECK-SD-NEXT: umov w19, v3.b[7]
; CHECK-SD-NEXT: umov w7, v1.b[7]
; CHECK-SD-NEXT: umov w6, v3.b[8]
; CHECK-SD-NEXT: umov w3, v1.b[8]
; CHECK-SD-NEXT: umov w13, v3.b[0]
; CHECK-SD-NEXT: umov w5, v3.b[1]
; CHECK-SD-NEXT: umov w0, v1.b[1]
; CHECK-SD-NEXT: umov w12, v3.b[2]
; CHECK-SD-NEXT: umov w17, v3.b[3]
; CHECK-SD-NEXT: umov w16, v1.b[3]
; CHECK-SD-NEXT: str w8, [sp, #80] // 4-byte Folded Spill
; CHECK-SD-NEXT: udiv w10, w9, w8
; CHECK-SD-NEXT: umov w8, v2.b[0]
; CHECK-SD-NEXT: str w9, [sp, #88] // 4-byte Folded Spill
; CHECK-SD-NEXT: umov w9, v0.b[0]
; CHECK-SD-NEXT: ldr w30, [sp, #80] // 4-byte Folded Reload
; CHECK-SD-NEXT: umov w15, v3.b[4]
; CHECK-SD-NEXT: umov w14, v1.b[4]
; CHECK-SD-NEXT: umov w4, v3.b[5]
; CHECK-SD-NEXT: umov w1, v1.b[5]
; CHECK-SD-NEXT: umov w2, v3.b[6]
; CHECK-SD-NEXT: umov w18, v1.b[6]
; CHECK-SD-NEXT: str w8, [sp, #32] // 4-byte Folded Spill
; CHECK-SD-NEXT: umov w21, v3.b[9]
; CHECK-SD-NEXT: umov w20, v1.b[9]
; CHECK-SD-NEXT: str w9, [sp, #40] // 4-byte Folded Spill
; CHECK-SD-NEXT: ldr w29, [sp, #32] // 4-byte Folded Reload
; CHECK-SD-NEXT: udiv w11, w9, w8
; CHECK-SD-NEXT: umov w8, v2.b[2]
; CHECK-SD-NEXT: umov w9, v0.b[2]
; CHECK-SD-NEXT: str w10, [sp, #96] // 4-byte Folded Spill
; CHECK-SD-NEXT: stp w8, w9, [sp, #16] // 8-byte Folded Spill
; CHECK-SD-NEXT: udiv w10, w9, w8
; CHECK-SD-NEXT: umov w8, v2.b[3]
; CHECK-SD-NEXT: umov w9, v0.b[3]
; CHECK-SD-NEXT: stp w11, w8, [sp, #48] // 8-byte Folded Spill
; CHECK-SD-NEXT: str w10, [sp, #24] // 4-byte Folded Spill
; CHECK-SD-NEXT: udiv w10, w9, w8
; CHECK-SD-NEXT: umov w8, v2.b[4]
; CHECK-SD-NEXT: str w8, [sp, #28] // 4-byte Folded Spill
; CHECK-SD-NEXT: stp w9, w10, [sp, #56] // 8-byte Folded Spill
; CHECK-SD-NEXT: umov w9, v0.b[4]
; CHECK-SD-NEXT: udiv w27, w0, w5
; CHECK-SD-NEXT: str w9, [sp, #36] // 4-byte Folded Spill
; CHECK-SD-NEXT: udiv w10, w9, w8
; CHECK-SD-NEXT: umov w8, v2.b[5]
; CHECK-SD-NEXT: umov w9, v0.b[5]
; CHECK-SD-NEXT: str w8, [sp, #76] // 4-byte Folded Spill
; CHECK-SD-NEXT: str w9, [sp, #84] // 4-byte Folded Spill
; CHECK-SD-NEXT: str w10, [sp, #44] // 4-byte Folded Spill
; CHECK-SD-NEXT: udiv w10, w9, w8
; CHECK-SD-NEXT: umov w8, v2.b[6]
; CHECK-SD-NEXT: umov w9, v0.b[6]
; CHECK-SD-NEXT: stp w8, w9, [sp, #64] // 8-byte Folded Spill
; CHECK-SD-NEXT: str w10, [sp, #92] // 4-byte Folded Spill
; CHECK-SD-NEXT: udiv w10, w9, w8
; CHECK-SD-NEXT: umov w8, v2.b[7]
; CHECK-SD-NEXT: umov w9, v0.b[7]
; CHECK-SD-NEXT: stp w8, w9, [sp, #112] // 8-byte Folded Spill
; CHECK-SD-NEXT: udiv w11, w9, w8
; CHECK-SD-NEXT: umov w8, v2.b[8]
; CHECK-SD-NEXT: umov w9, v0.b[8]
; CHECK-SD-NEXT: str w10, [sp, #72] // 4-byte Folded Spill
; CHECK-SD-NEXT: stp w8, w9, [sp, #100] // 8-byte Folded Spill
; CHECK-SD-NEXT: udiv w10, w9, w8
; CHECK-SD-NEXT: umov w8, v2.b[9]
; CHECK-SD-NEXT: umov w9, v0.b[9]
; CHECK-SD-NEXT: stp w8, w9, [sp, #136] // 8-byte Folded Spill
; CHECK-SD-NEXT: str w10, [sp, #108] // 4-byte Folded Spill
; CHECK-SD-NEXT: udiv w10, w9, w8
; CHECK-SD-NEXT: umov w8, v2.b[10]
; CHECK-SD-NEXT: umov w9, v0.b[10]
; CHECK-SD-NEXT: stp w11, w8, [sp, #120] // 8-byte Folded Spill
; CHECK-SD-NEXT: str w10, [sp, #144] // 4-byte Folded Spill
; CHECK-SD-NEXT: udiv w10, w9, w8
; CHECK-SD-NEXT: umov w8, v2.b[11]
; CHECK-SD-NEXT: stp w9, w10, [sp, #128] // 8-byte Folded Spill
; CHECK-SD-NEXT: umov w9, v0.b[11]
; CHECK-SD-NEXT: udiv w25, w16, w17
; CHECK-SD-NEXT: stp w8, w9, [sp, #172] // 8-byte Folded Spill
; CHECK-SD-NEXT: udiv w11, w9, w8
; CHECK-SD-NEXT: umov w8, v2.b[12]
; CHECK-SD-NEXT: umov w9, v0.b[12]
; CHECK-SD-NEXT: str w8, [sp, #152] // 4-byte Folded Spill
; CHECK-SD-NEXT: str w9, [sp, #160] // 4-byte Folded Spill
; CHECK-SD-NEXT: udiv w10, w9, w8
; CHECK-SD-NEXT: umov w8, v2.b[13]
; CHECK-SD-NEXT: umov w9, v0.b[13]
; CHECK-SD-NEXT: stp w8, w9, [sp, #196] // 8-byte Folded Spill
; CHECK-SD-NEXT: str w10, [sp, #168] // 4-byte Folded Spill
; CHECK-SD-NEXT: udiv w10, w9, w8
; CHECK-SD-NEXT: umov w8, v2.b[14]
; CHECK-SD-NEXT: umov w9, v0.b[14]
; CHECK-SD-NEXT: stp w11, w8, [sp, #180] // 8-byte Folded Spill
; CHECK-SD-NEXT: umov w11, v1.b[2]
; CHECK-SD-NEXT: str w10, [sp, #204] // 4-byte Folded Spill
; CHECK-SD-NEXT: udiv w10, w9, w8
; CHECK-SD-NEXT: umov w8, v2.b[15]
; CHECK-SD-NEXT: str w8, [sp, #148] // 4-byte Folded Spill
; CHECK-SD-NEXT: stp w9, w10, [sp, #188] // 8-byte Folded Spill
; CHECK-SD-NEXT: umov w9, v0.b[15]
; CHECK-SD-NEXT: udiv w22, w11, w12
; CHECK-SD-NEXT: str w9, [sp, #156] // 4-byte Folded Spill
; CHECK-SD-NEXT: udiv w10, w9, w8
; CHECK-SD-NEXT: str w10, [sp, #164] // 4-byte Folded Spill
; CHECK-SD-NEXT: umov w10, v1.b[0]
; CHECK-SD-NEXT: udiv w9, w7, w19
; CHECK-SD-NEXT: udiv w8, w3, w6
; CHECK-SD-NEXT: udiv w23, w10, w13
; CHECK-SD-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill
; CHECK-SD-NEXT: ldr w8, [sp, #96] // 4-byte Folded Reload
; CHECK-SD-NEXT: ldr w9, [sp, #88] // 4-byte Folded Reload
; CHECK-SD-NEXT: msub w9, w8, w30, w9
; CHECK-SD-NEXT: ldr w8, [sp, #48] // 4-byte Folded Reload
; CHECK-SD-NEXT: ldr w30, [sp, #40] // 4-byte Folded Reload
; CHECK-SD-NEXT: msub w8, w8, w29, w30
; CHECK-SD-NEXT: ldp x29, x30, [sp, #208] // 16-byte Folded Reload
; CHECK-SD-NEXT: fmov s0, w8
; CHECK-SD-NEXT: msub w10, w23, w13, w10
; CHECK-SD-NEXT: udiv w24, w14, w15
; CHECK-SD-NEXT: msub w13, w27, w5, w0
; CHECK-SD-NEXT: ldr w5, [sp, #16] // 4-byte Folded Reload
; CHECK-SD-NEXT: mov v0.b[1], w9
; CHECK-SD-NEXT: msub w9, w22, w12, w11
; CHECK-SD-NEXT: umov w11, v1.b[10]
; CHECK-SD-NEXT: fmov s2, w10
; CHECK-SD-NEXT: ldp w10, w8, [sp, #20] // 8-byte Folded Reload
; CHECK-SD-NEXT: mov v2.b[1], w13
; CHECK-SD-NEXT: msub w8, w8, w5, w10
; CHECK-SD-NEXT: ldr w5, [sp, #52] // 4-byte Folded Reload
; CHECK-SD-NEXT: umov w10, v3.b[10]
; CHECK-SD-NEXT: udiv w28, w1, w4
; CHECK-SD-NEXT: ldp w13, w12, [sp, #56] // 8-byte Folded Reload
; CHECK-SD-NEXT: mov v2.b[2], w9
; CHECK-SD-NEXT: mov v0.b[2], w8
; CHECK-SD-NEXT: msub w8, w25, w17, w16
; CHECK-SD-NEXT: ldr w17, [sp, #28] // 4-byte Folded Reload
; CHECK-SD-NEXT: ldr w16, [sp, #36] // 4-byte Folded Reload
; CHECK-SD-NEXT: msub w12, w12, w5, w13
; CHECK-SD-NEXT: ldr w13, [sp, #44] // 4-byte Folded Reload
; CHECK-SD-NEXT: ldr w5, [sp, #136] // 4-byte Folded Reload
; CHECK-SD-NEXT: mov v2.b[3], w8
; CHECK-SD-NEXT: msub w8, w24, w15, w14
; CHECK-SD-NEXT: ldr w15, [sp, #92] // 4-byte Folded Reload
; CHECK-SD-NEXT: mov v0.b[3], w12
; CHECK-SD-NEXT: msub w13, w13, w17, w16
; CHECK-SD-NEXT: ldr w17, [sp, #76] // 4-byte Folded Reload
; CHECK-SD-NEXT: udiv w26, w18, w2
; CHECK-SD-NEXT: ldr w16, [sp, #84] // 4-byte Folded Reload
; CHECK-SD-NEXT: umov w12, v3.b[11]
; CHECK-SD-NEXT: msub w15, w15, w17, w16
; CHECK-SD-NEXT: umov w14, v1.b[11]
; CHECK-SD-NEXT: mov v2.b[4], w8
; CHECK-SD-NEXT: msub w8, w28, w4, w1
; CHECK-SD-NEXT: ldr w1, [sp, #64] // 4-byte Folded Reload
; CHECK-SD-NEXT: mov v0.b[4], w13
; CHECK-SD-NEXT: ldr w4, [sp, #100] // 4-byte Folded Reload
; CHECK-SD-NEXT: ldp w17, w16, [sp, #68] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x24, x23, [sp, #256] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v2.b[5], w8
; CHECK-SD-NEXT: ldp x28, x27, [sp, #224] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v0.b[5], w15
; CHECK-SD-NEXT: msub w16, w16, w1, w17
; CHECK-SD-NEXT: umov w15, v3.b[12]
; CHECK-SD-NEXT: msub w8, w26, w2, w18
; CHECK-SD-NEXT: ldr w2, [sp, #112] // 4-byte Folded Reload
; CHECK-SD-NEXT: udiv w0, w20, w21
; CHECK-SD-NEXT: ldp w1, w18, [sp, #116] // 8-byte Folded Reload
; CHECK-SD-NEXT: umov w17, v1.b[12]
; CHECK-SD-NEXT: ldp x26, x25, [sp, #240] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v2.b[6], w8
; CHECK-SD-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload
; CHECK-SD-NEXT: mov v0.b[6], w16
; CHECK-SD-NEXT: msub w18, w18, w2, w1
; CHECK-SD-NEXT: msub w8, w8, w19, w7
; CHECK-SD-NEXT: ldp w2, w1, [sp, #104] // 8-byte Folded Reload
; CHECK-SD-NEXT: mov v0.b[7], w18
; CHECK-SD-NEXT: umov w18, v3.b[13]
; CHECK-SD-NEXT: mov v2.b[7], w8
; CHECK-SD-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload
; CHECK-SD-NEXT: udiv w9, w11, w10
; CHECK-SD-NEXT: msub w1, w1, w4, w2
; CHECK-SD-NEXT: umov w2, v1.b[13]
; CHECK-SD-NEXT: msub w8, w8, w6, w3
; CHECK-SD-NEXT: ldp w4, w3, [sp, #140] // 8-byte Folded Reload
; CHECK-SD-NEXT: mov v0.b[8], w1
; CHECK-SD-NEXT: mov v2.b[8], w8
; CHECK-SD-NEXT: msub w8, w0, w21, w20
; CHECK-SD-NEXT: msub w3, w3, w5, w4
; CHECK-SD-NEXT: ldr w5, [sp, #124] // 4-byte Folded Reload
; CHECK-SD-NEXT: ldp w4, w1, [sp, #128] // 8-byte Folded Reload
; CHECK-SD-NEXT: udiv w13, w14, w12
; CHECK-SD-NEXT: ldp x20, x19, [sp, #288] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v2.b[9], w8
; CHECK-SD-NEXT: mov v0.b[9], w3
; CHECK-SD-NEXT: msub w8, w9, w10, w11
; CHECK-SD-NEXT: msub w1, w1, w5, w4
; CHECK-SD-NEXT: ldr w4, [sp, #172] // 4-byte Folded Reload
; CHECK-SD-NEXT: umov w9, v3.b[14]
; CHECK-SD-NEXT: ldp w3, w11, [sp, #176] // 8-byte Folded Reload
; CHECK-SD-NEXT: umov w10, v1.b[14]
; CHECK-SD-NEXT: ldp x22, x21, [sp, #272] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v2.b[10], w8
; CHECK-SD-NEXT: mov v0.b[10], w1
; CHECK-SD-NEXT: ldr w1, [sp, #152] // 4-byte Folded Reload
; CHECK-SD-NEXT: msub w11, w11, w4, w3
; CHECK-SD-NEXT: udiv w16, w17, w15
; CHECK-SD-NEXT: msub w8, w13, w12, w14
; CHECK-SD-NEXT: ldr w13, [sp, #168] // 4-byte Folded Reload
; CHECK-SD-NEXT: ldr w14, [sp, #160] // 4-byte Folded Reload
; CHECK-SD-NEXT: mov v0.b[11], w11
; CHECK-SD-NEXT: umov w11, v3.b[15]
; CHECK-SD-NEXT: msub w13, w13, w1, w14
; CHECK-SD-NEXT: umov w14, v1.b[15]
; CHECK-SD-NEXT: mov v2.b[11], w8
; CHECK-SD-NEXT: mov v0.b[12], w13
; CHECK-SD-NEXT: udiv w0, w2, w18
; CHECK-SD-NEXT: msub w8, w16, w15, w17
; CHECK-SD-NEXT: ldr w17, [sp, #196] // 4-byte Folded Reload
; CHECK-SD-NEXT: ldp w16, w15, [sp, #200] // 8-byte Folded Reload
; CHECK-SD-NEXT: mov v2.b[12], w8
; CHECK-SD-NEXT: msub w15, w15, w17, w16
; CHECK-SD-NEXT: ldp w17, w16, [sp, #188] // 8-byte Folded Reload
; CHECK-SD-NEXT: mov v0.b[13], w15
; CHECK-SD-NEXT: udiv w12, w10, w9
; CHECK-SD-NEXT: msub w8, w0, w18, w2
; CHECK-SD-NEXT: ldr w18, [sp, #184] // 4-byte Folded Reload
; CHECK-SD-NEXT: msub w16, w16, w18, w17
; CHECK-SD-NEXT: mov v2.b[13], w8
; CHECK-SD-NEXT: mov v0.b[14], w16
; CHECK-SD-NEXT: udiv w13, w14, w11
; CHECK-SD-NEXT: msub w8, w12, w9, w10
; CHECK-SD-NEXT: ldr w9, [sp, #164] // 4-byte Folded Reload
; CHECK-SD-NEXT: ldr w12, [sp, #148] // 4-byte Folded Reload
; CHECK-SD-NEXT: ldr w10, [sp, #156] // 4-byte Folded Reload
; CHECK-SD-NEXT: mov v2.b[14], w8
; CHECK-SD-NEXT: msub w9, w9, w12, w10
; CHECK-SD-NEXT: mov v0.b[15], w9
; CHECK-SD-NEXT: msub w8, w13, w11, w14
; CHECK-SD-NEXT: mov v2.b[15], w8
; CHECK-SD-NEXT: mov v1.16b, v2.16b
; CHECK-SD-NEXT: add sp, sp, #304
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv32i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-GI-NEXT: .cfi_def_cfa_offset 96
; CHECK-GI-NEXT: .cfi_offset w19, -8
; CHECK-GI-NEXT: .cfi_offset w20, -16
; CHECK-GI-NEXT: .cfi_offset w21, -24
; CHECK-GI-NEXT: .cfi_offset w22, -32
; CHECK-GI-NEXT: .cfi_offset w23, -40
; CHECK-GI-NEXT: .cfi_offset w24, -48
; CHECK-GI-NEXT: .cfi_offset w25, -56
; CHECK-GI-NEXT: .cfi_offset w26, -64
; CHECK-GI-NEXT: .cfi_offset w27, -72
; CHECK-GI-NEXT: .cfi_offset w28, -80
; CHECK-GI-NEXT: .cfi_offset w30, -88
; CHECK-GI-NEXT: .cfi_offset w29, -96
; CHECK-GI-NEXT: ushll v4.8h, v0.8b, #0
; CHECK-GI-NEXT: ushll v5.8h, v2.8b, #0
; CHECK-GI-NEXT: ushll v16.8h, v1.8b, #0
; CHECK-GI-NEXT: ushll v17.8h, v3.8b, #0
; CHECK-GI-NEXT: ushll v6.4s, v4.4h, #0
; CHECK-GI-NEXT: ushll v7.4s, v5.4h, #0
; CHECK-GI-NEXT: ushll2 v4.4s, v4.8h, #0
; CHECK-GI-NEXT: ushll2 v5.4s, v5.8h, #0
; CHECK-GI-NEXT: ushll v18.4s, v16.4h, #0
; CHECK-GI-NEXT: ushll v19.4s, v17.4h, #0
; CHECK-GI-NEXT: ushll2 v16.4s, v16.8h, #0
; CHECK-GI-NEXT: ushll2 v17.4s, v17.8h, #0
; CHECK-GI-NEXT: fmov w8, s6
; CHECK-GI-NEXT: fmov w9, s7
; CHECK-GI-NEXT: mov w12, v7.s[3]
; CHECK-GI-NEXT: fmov w13, s5
; CHECK-GI-NEXT: mov w16, v5.s[3]
; CHECK-GI-NEXT: fmov w6, s19
; CHECK-GI-NEXT: mov w7, v19.s[3]
; CHECK-GI-NEXT: fmov w21, s17
; CHECK-GI-NEXT: mov w23, v17.s[3]
; CHECK-GI-NEXT: udiv w11, w8, w9
; CHECK-GI-NEXT: mov w8, v6.s[1]
; CHECK-GI-NEXT: mov w9, v7.s[1]
; CHECK-GI-NEXT: udiv w10, w8, w9
; CHECK-GI-NEXT: mov w8, v6.s[2]
; CHECK-GI-NEXT: mov w9, v7.s[2]
; CHECK-GI-NEXT: mov v20.s[0], w11
; CHECK-GI-NEXT: udiv w9, w8, w9
; CHECK-GI-NEXT: mov w8, v6.s[3]
; CHECK-GI-NEXT: ushll2 v6.8h, v0.16b, #0
; CHECK-GI-NEXT: mov v20.s[1], w10
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: ushll v28.4s, v0.4h, #0
; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
; CHECK-GI-NEXT: udiv w8, w8, w12
; CHECK-GI-NEXT: fmov w12, s4
; CHECK-GI-NEXT: mov v20.s[2], w9
; CHECK-GI-NEXT: udiv w15, w12, w13
; CHECK-GI-NEXT: mov w12, v4.s[1]
; CHECK-GI-NEXT: mov w13, v5.s[1]
; CHECK-GI-NEXT: mov v20.s[3], w8
; CHECK-GI-NEXT: udiv w14, w12, w13
; CHECK-GI-NEXT: mov w12, v4.s[2]
; CHECK-GI-NEXT: mov w13, v5.s[2]
; CHECK-GI-NEXT: ushll v5.4s, v6.4h, #0
; CHECK-GI-NEXT: mov v21.s[0], w15
; CHECK-GI-NEXT: udiv w13, w12, w13
; CHECK-GI-NEXT: mov w12, v4.s[3]
; CHECK-GI-NEXT: ushll2 v4.8h, v2.16b, #0
; CHECK-GI-NEXT: mov v21.s[1], w14
; CHECK-GI-NEXT: ushll v2.8h, v2.8b, #0
; CHECK-GI-NEXT: ushll v7.4s, v4.4h, #0
; CHECK-GI-NEXT: ushll v30.4s, v2.4h, #0
; CHECK-GI-NEXT: ushll2 v2.4s, v2.8h, #0
; CHECK-GI-NEXT: fmov w17, s7
; CHECK-GI-NEXT: mls v28.4s, v20.4s, v30.4s
; CHECK-GI-NEXT: udiv w12, w12, w16
; CHECK-GI-NEXT: fmov w16, s5
; CHECK-GI-NEXT: mov v21.s[2], w13
; CHECK-GI-NEXT: udiv w1, w16, w17
; CHECK-GI-NEXT: mov w16, v5.s[1]
; CHECK-GI-NEXT: mov w17, v7.s[1]
; CHECK-GI-NEXT: mov v21.s[3], w12
; CHECK-GI-NEXT: mls v0.4s, v21.4s, v2.4s
; CHECK-GI-NEXT: udiv w0, w16, w17
; CHECK-GI-NEXT: mov w16, v5.s[2]
; CHECK-GI-NEXT: mov w17, v7.s[2]
; CHECK-GI-NEXT: mov v22.s[0], w1
; CHECK-GI-NEXT: uzp1 v0.8h, v28.8h, v0.8h
; CHECK-GI-NEXT: udiv w18, w16, w17
; CHECK-GI-NEXT: mov w16, v5.s[3]
; CHECK-GI-NEXT: mov w17, v7.s[3]
; CHECK-GI-NEXT: ushll2 v5.4s, v6.8h, #0
; CHECK-GI-NEXT: ushll2 v7.4s, v4.8h, #0
; CHECK-GI-NEXT: mov v22.s[1], w0
; CHECK-GI-NEXT: ushll v6.4s, v6.4h, #0
; CHECK-GI-NEXT: ushll v4.4s, v4.4h, #0
; CHECK-GI-NEXT: fmov w2, s7
; CHECK-GI-NEXT: mov w4, v7.s[3]
; CHECK-GI-NEXT: udiv w16, w16, w17
; CHECK-GI-NEXT: fmov w17, s5
; CHECK-GI-NEXT: mov v22.s[2], w18
; CHECK-GI-NEXT: udiv w5, w17, w2
; CHECK-GI-NEXT: mov w17, v5.s[1]
; CHECK-GI-NEXT: mov w2, v7.s[1]
; CHECK-GI-NEXT: mov v22.s[3], w16
; CHECK-GI-NEXT: mls v6.4s, v22.4s, v4.4s
; CHECK-GI-NEXT: udiv w3, w17, w2
; CHECK-GI-NEXT: mov w17, v5.s[2]
; CHECK-GI-NEXT: mov w2, v7.s[2]
; CHECK-GI-NEXT: mov v23.s[0], w5
; CHECK-GI-NEXT: udiv w2, w17, w2
; CHECK-GI-NEXT: mov w17, v5.s[3]
; CHECK-GI-NEXT: mov v23.s[1], w3
; CHECK-GI-NEXT: udiv w17, w17, w4
; CHECK-GI-NEXT: fmov w4, s18
; CHECK-GI-NEXT: mov v23.s[2], w2
; CHECK-GI-NEXT: udiv w20, w4, w6
; CHECK-GI-NEXT: mov w4, v18.s[1]
; CHECK-GI-NEXT: mov w6, v19.s[1]
; CHECK-GI-NEXT: mov v23.s[3], w17
; CHECK-GI-NEXT: mls v5.4s, v23.4s, v7.4s
; CHECK-GI-NEXT: udiv w19, w4, w6
; CHECK-GI-NEXT: mov w4, v18.s[2]
; CHECK-GI-NEXT: mov w6, v19.s[2]
; CHECK-GI-NEXT: mov v24.s[0], w20
; CHECK-GI-NEXT: uzp1 v2.8h, v6.8h, v5.8h
; CHECK-GI-NEXT: uzp1 v0.16b, v0.16b, v2.16b
; CHECK-GI-NEXT: udiv w6, w4, w6
; CHECK-GI-NEXT: mov w4, v18.s[3]
; CHECK-GI-NEXT: mov v24.s[1], w19
; CHECK-GI-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-GI-NEXT: udiv w4, w4, w7
; CHECK-GI-NEXT: fmov w7, s16
; CHECK-GI-NEXT: mov v24.s[2], w6
; CHECK-GI-NEXT: udiv w24, w7, w21
; CHECK-GI-NEXT: mov w7, v16.s[1]
; CHECK-GI-NEXT: mov w21, v17.s[1]
; CHECK-GI-NEXT: mov v24.s[3], w4
; CHECK-GI-NEXT: udiv w22, w7, w21
; CHECK-GI-NEXT: mov w7, v16.s[2]
; CHECK-GI-NEXT: mov w21, v17.s[2]
; CHECK-GI-NEXT: ushll2 v17.8h, v1.16b, #0
; CHECK-GI-NEXT: mov v25.s[0], w24
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: ushll v18.4s, v17.4h, #0
; CHECK-GI-NEXT: ushll v29.4s, v1.4h, #0
; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0
; CHECK-GI-NEXT: udiv w21, w7, w21
; CHECK-GI-NEXT: mov w7, v16.s[3]
; CHECK-GI-NEXT: ushll2 v16.8h, v3.16b, #0
; CHECK-GI-NEXT: mov v25.s[1], w22
; CHECK-GI-NEXT: ushll v3.8h, v3.8b, #0
; CHECK-GI-NEXT: ushll v19.4s, v16.4h, #0
; CHECK-GI-NEXT: ushll v31.4s, v3.4h, #0
; CHECK-GI-NEXT: ushll2 v3.4s, v3.8h, #0
; CHECK-GI-NEXT: fmov w25, s19
; CHECK-GI-NEXT: mov w26, v19.s[1]
; CHECK-GI-NEXT: mov w27, v19.s[2]
; CHECK-GI-NEXT: mov w28, v19.s[3]
; CHECK-GI-NEXT: ushll2 v19.4s, v16.8h, #0
; CHECK-GI-NEXT: ushll v16.4s, v16.4h, #0
; CHECK-GI-NEXT: udiv w7, w7, w23
; CHECK-GI-NEXT: fmov w23, s18
; CHECK-GI-NEXT: mov v25.s[2], w21
; CHECK-GI-NEXT: mls v29.4s, v24.4s, v31.4s
; CHECK-GI-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
; CHECK-GI-NEXT: fmov w29, s19
; CHECK-GI-NEXT: mov w30, v19.s[1]
; CHECK-GI-NEXT: mov w15, v19.s[2]
; CHECK-GI-NEXT: udiv w25, w23, w25
; CHECK-GI-NEXT: mov w23, v18.s[1]
; CHECK-GI-NEXT: mov v25.s[3], w7
; CHECK-GI-NEXT: mls v1.4s, v25.4s, v3.4s
; CHECK-GI-NEXT: udiv w26, w23, w26
; CHECK-GI-NEXT: mov w23, v18.s[2]
; CHECK-GI-NEXT: mov v26.s[0], w25
; CHECK-GI-NEXT: uzp1 v1.8h, v29.8h, v1.8h
; CHECK-GI-NEXT: udiv w27, w23, w27
; CHECK-GI-NEXT: mov w23, v18.s[3]
; CHECK-GI-NEXT: ushll2 v18.4s, v17.8h, #0
; CHECK-GI-NEXT: mov v26.s[1], w26
; CHECK-GI-NEXT: ushll v17.4s, v17.4h, #0
; CHECK-GI-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov w11, v18.s[2]
; CHECK-GI-NEXT: mov w9, v18.s[3]
; CHECK-GI-NEXT: udiv w23, w23, w28
; CHECK-GI-NEXT: fmov w28, s18
; CHECK-GI-NEXT: mov v26.s[2], w27
; CHECK-GI-NEXT: udiv w28, w28, w29
; CHECK-GI-NEXT: mov w29, v18.s[1]
; CHECK-GI-NEXT: mov v26.s[3], w23
; CHECK-GI-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: mls v17.4s, v26.4s, v16.4s
; CHECK-GI-NEXT: udiv w29, w29, w30
; CHECK-GI-NEXT: mov v27.s[0], w28
; CHECK-GI-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: udiv w10, w11, w15
; CHECK-GI-NEXT: mov w11, v19.s[3]
; CHECK-GI-NEXT: mov v27.s[1], w29
; CHECK-GI-NEXT: udiv w8, w9, w11
; CHECK-GI-NEXT: mov v27.s[2], w10
; CHECK-GI-NEXT: mov v27.s[3], w8
; CHECK-GI-NEXT: mls v18.4s, v27.4s, v19.4s
; CHECK-GI-NEXT: uzp1 v3.8h, v17.8h, v18.8h
; CHECK-GI-NEXT: uzp1 v1.16b, v1.16b, v3.16b
; CHECK-GI-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload
; CHECK-GI-NEXT: ret
entry:
%s = urem <32 x i8> %d, %e
ret <32 x i8> %s
}
define <2 x i16> @sv2i16(<2 x i16> %d, <2 x i16> %e) {
; CHECK-SD-LABEL: sv2i16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: shl v0.2s, v0.2s, #16
; CHECK-SD-NEXT: shl v1.2s, v1.2s, #16
; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #16
; CHECK-SD-NEXT: sshr v1.2s, v1.2s, #16
; CHECK-SD-NEXT: fmov w8, s1
; CHECK-SD-NEXT: fmov w9, s0
; CHECK-SD-NEXT: mov w11, v1.s[1]
; CHECK-SD-NEXT: mov w12, v0.s[1]
; CHECK-SD-NEXT: sdiv w10, w9, w8
; CHECK-SD-NEXT: sdiv w13, w12, w11
; CHECK-SD-NEXT: msub w8, w10, w8, w9
; CHECK-SD-NEXT: fmov s0, w8
; CHECK-SD-NEXT: msub w9, w13, w11, w12
; CHECK-SD-NEXT: mov v0.s[1], w9
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sv2i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: shl v0.2s, v0.2s, #16
; CHECK-GI-NEXT: shl v1.2s, v1.2s, #16
; CHECK-GI-NEXT: sshr v0.2s, v0.2s, #16
; CHECK-GI-NEXT: sshr v1.2s, v1.2s, #16
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: fmov w9, s1
; CHECK-GI-NEXT: mov w10, v1.s[1]
; CHECK-GI-NEXT: sdiv w8, w8, w9
; CHECK-GI-NEXT: mov w9, v0.s[1]
; CHECK-GI-NEXT: sdiv w9, w9, w10
; CHECK-GI-NEXT: mov v2.s[0], w8
; CHECK-GI-NEXT: mov v2.s[1], w9
; CHECK-GI-NEXT: mls v0.2s, v2.2s, v1.2s
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
entry:
%s = srem <2 x i16> %d, %e
ret <2 x i16> %s
}
define <3 x i16> @sv3i16(<3 x i16> %d, <3 x i16> %e) {
; CHECK-SD-LABEL: sv3i16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: smov w11, v1.h[0]
; CHECK-SD-NEXT: smov w12, v0.h[0]
; CHECK-SD-NEXT: smov w8, v1.h[1]
; CHECK-SD-NEXT: smov w9, v0.h[1]
; CHECK-SD-NEXT: smov w14, v1.h[2]
; CHECK-SD-NEXT: smov w15, v0.h[2]
; CHECK-SD-NEXT: sdiv w13, w12, w11
; CHECK-SD-NEXT: sdiv w10, w9, w8
; CHECK-SD-NEXT: msub w11, w13, w11, w12
; CHECK-SD-NEXT: fmov s0, w11
; CHECK-SD-NEXT: sdiv w16, w15, w14
; CHECK-SD-NEXT: msub w8, w10, w8, w9
; CHECK-SD-NEXT: mov v0.h[1], w8
; CHECK-SD-NEXT: msub w8, w16, w14, w15
; CHECK-SD-NEXT: mov v0.h[2], w8
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sv3i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: smov w8, v0.h[0]
; CHECK-GI-NEXT: smov w9, v1.h[0]
; CHECK-GI-NEXT: smov w11, v0.h[1]
; CHECK-GI-NEXT: smov w12, v1.h[1]
; CHECK-GI-NEXT: smov w14, v0.h[2]
; CHECK-GI-NEXT: smov w15, v1.h[2]
; CHECK-GI-NEXT: sdiv w10, w8, w9
; CHECK-GI-NEXT: sdiv w13, w11, w12
; CHECK-GI-NEXT: msub w8, w10, w9, w8
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: sdiv w16, w14, w15
; CHECK-GI-NEXT: msub w9, w13, w12, w11
; CHECK-GI-NEXT: mov v0.h[1], w9
; CHECK-GI-NEXT: msub w8, w16, w15, w14
; CHECK-GI-NEXT: mov v0.h[2], w8
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
entry:
%s = srem <3 x i16> %d, %e
ret <3 x i16> %s
}
define <4 x i16> @sv4i16(<4 x i16> %d, <4 x i16> %e) {
; CHECK-SD-LABEL: sv4i16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: smov w11, v1.h[0]
; CHECK-SD-NEXT: smov w12, v0.h[0]
; CHECK-SD-NEXT: smov w8, v1.h[1]
; CHECK-SD-NEXT: smov w9, v0.h[1]
; CHECK-SD-NEXT: smov w14, v1.h[2]
; CHECK-SD-NEXT: smov w15, v0.h[2]
; CHECK-SD-NEXT: smov w17, v1.h[3]
; CHECK-SD-NEXT: smov w18, v0.h[3]
; CHECK-SD-NEXT: sdiv w13, w12, w11
; CHECK-SD-NEXT: sdiv w10, w9, w8
; CHECK-SD-NEXT: msub w11, w13, w11, w12
; CHECK-SD-NEXT: fmov s0, w11
; CHECK-SD-NEXT: sdiv w16, w15, w14
; CHECK-SD-NEXT: msub w8, w10, w8, w9
; CHECK-SD-NEXT: mov v0.h[1], w8
; CHECK-SD-NEXT: sdiv w9, w18, w17
; CHECK-SD-NEXT: msub w8, w16, w14, w15
; CHECK-SD-NEXT: mov v0.h[2], w8
; CHECK-SD-NEXT: msub w8, w9, w17, w18
; CHECK-SD-NEXT: mov v0.h[3], w8
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sv4i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: fmov w9, s1
; CHECK-GI-NEXT: mov w10, v1.s[1]
; CHECK-GI-NEXT: mov w11, v1.s[2]
; CHECK-GI-NEXT: mov w12, v1.s[3]
; CHECK-GI-NEXT: sdiv w8, w8, w9
; CHECK-GI-NEXT: mov w9, v0.s[1]
; CHECK-GI-NEXT: sdiv w9, w9, w10
; CHECK-GI-NEXT: mov w10, v0.s[2]
; CHECK-GI-NEXT: mov v2.s[0], w8
; CHECK-GI-NEXT: sdiv w10, w10, w11
; CHECK-GI-NEXT: mov w11, v0.s[3]
; CHECK-GI-NEXT: mov v2.s[1], w9
; CHECK-GI-NEXT: sdiv w8, w11, w12
; CHECK-GI-NEXT: mov v2.s[2], w10
; CHECK-GI-NEXT: mov v2.s[3], w8
; CHECK-GI-NEXT: mls v0.4s, v2.4s, v1.4s
; CHECK-GI-NEXT: xtn v0.4h, v0.4s
; CHECK-GI-NEXT: ret
entry:
%s = srem <4 x i16> %d, %e
ret <4 x i16> %s
}
define <8 x i16> @sv8i16(<8 x i16> %d, <8 x i16> %e) {
; CHECK-SD-LABEL: sv8i16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: smov w11, v1.h[0]
; CHECK-SD-NEXT: smov w12, v0.h[0]
; CHECK-SD-NEXT: smov w8, v1.h[1]
; CHECK-SD-NEXT: smov w9, v0.h[1]
; CHECK-SD-NEXT: smov w14, v1.h[2]
; CHECK-SD-NEXT: smov w15, v0.h[2]
; CHECK-SD-NEXT: smov w17, v1.h[3]
; CHECK-SD-NEXT: smov w18, v0.h[3]
; CHECK-SD-NEXT: smov w1, v1.h[4]
; CHECK-SD-NEXT: smov w2, v0.h[4]
; CHECK-SD-NEXT: smov w4, v1.h[5]
; CHECK-SD-NEXT: smov w5, v0.h[5]
; CHECK-SD-NEXT: sdiv w13, w12, w11
; CHECK-SD-NEXT: sdiv w10, w9, w8
; CHECK-SD-NEXT: msub w11, w13, w11, w12
; CHECK-SD-NEXT: smov w13, v1.h[7]
; CHECK-SD-NEXT: fmov s2, w11
; CHECK-SD-NEXT: smov w11, v0.h[6]
; CHECK-SD-NEXT: sdiv w16, w15, w14
; CHECK-SD-NEXT: msub w8, w10, w8, w9
; CHECK-SD-NEXT: smov w10, v1.h[6]
; CHECK-SD-NEXT: mov v2.h[1], w8
; CHECK-SD-NEXT: sdiv w0, w18, w17
; CHECK-SD-NEXT: msub w8, w16, w14, w15
; CHECK-SD-NEXT: smov w14, v0.h[7]
; CHECK-SD-NEXT: mov v2.h[2], w8
; CHECK-SD-NEXT: sdiv w3, w2, w1
; CHECK-SD-NEXT: msub w8, w0, w17, w18
; CHECK-SD-NEXT: mov v2.h[3], w8
; CHECK-SD-NEXT: sdiv w9, w5, w4
; CHECK-SD-NEXT: msub w8, w3, w1, w2
; CHECK-SD-NEXT: mov v2.h[4], w8
; CHECK-SD-NEXT: sdiv w12, w11, w10
; CHECK-SD-NEXT: msub w8, w9, w4, w5
; CHECK-SD-NEXT: mov v2.h[5], w8
; CHECK-SD-NEXT: sdiv w9, w14, w13
; CHECK-SD-NEXT: msub w8, w12, w10, w11
; CHECK-SD-NEXT: mov v2.h[6], w8
; CHECK-SD-NEXT: msub w8, w9, w13, w14
; CHECK-SD-NEXT: mov v2.h[7], w8
; CHECK-SD-NEXT: mov v0.16b, v2.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sv8i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sshll v2.4s, v0.4h, #0
; CHECK-GI-NEXT: sshll v3.4s, v1.4h, #0
; CHECK-GI-NEXT: sshll2 v0.4s, v0.8h, #0
; CHECK-GI-NEXT: sshll2 v1.4s, v1.8h, #0
; CHECK-GI-NEXT: fmov w8, s2
; CHECK-GI-NEXT: fmov w9, s3
; CHECK-GI-NEXT: mov w10, v3.s[1]
; CHECK-GI-NEXT: mov w11, v3.s[2]
; CHECK-GI-NEXT: mov w12, v3.s[3]
; CHECK-GI-NEXT: fmov w13, s1
; CHECK-GI-NEXT: mov w14, v1.s[1]
; CHECK-GI-NEXT: mov w15, v1.s[2]
; CHECK-GI-NEXT: sdiv w8, w8, w9
; CHECK-GI-NEXT: mov w9, v2.s[1]
; CHECK-GI-NEXT: sdiv w9, w9, w10
; CHECK-GI-NEXT: mov w10, v2.s[2]
; CHECK-GI-NEXT: mov v4.s[0], w8
; CHECK-GI-NEXT: mov w8, v0.s[3]
; CHECK-GI-NEXT: sdiv w10, w10, w11
; CHECK-GI-NEXT: mov w11, v2.s[3]
; CHECK-GI-NEXT: mov v4.s[1], w9
; CHECK-GI-NEXT: sdiv w11, w11, w12
; CHECK-GI-NEXT: fmov w12, s0
; CHECK-GI-NEXT: mov v4.s[2], w10
; CHECK-GI-NEXT: sdiv w12, w12, w13
; CHECK-GI-NEXT: mov w13, v0.s[1]
; CHECK-GI-NEXT: mov v4.s[3], w11
; CHECK-GI-NEXT: mls v2.4s, v4.4s, v3.4s
; CHECK-GI-NEXT: sdiv w13, w13, w14
; CHECK-GI-NEXT: mov w14, v0.s[2]
; CHECK-GI-NEXT: mov v5.s[0], w12
; CHECK-GI-NEXT: mov w12, v1.s[3]
; CHECK-GI-NEXT: sdiv w14, w14, w15
; CHECK-GI-NEXT: mov v5.s[1], w13
; CHECK-GI-NEXT: sdiv w8, w8, w12
; CHECK-GI-NEXT: mov v5.s[2], w14
; CHECK-GI-NEXT: mov v5.s[3], w8
; CHECK-GI-NEXT: mls v0.4s, v5.4s, v1.4s
; CHECK-GI-NEXT: uzp1 v0.8h, v2.8h, v0.8h
; CHECK-GI-NEXT: ret
entry:
%s = srem <8 x i16> %d, %e
ret <8 x i16> %s
}
define <16 x i16> @sv16i16(<16 x i16> %d, <16 x i16> %e) {
; CHECK-SD-LABEL: sv16i16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: sub sp, sp, #160
; CHECK-SD-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x28, x27, [sp, #80] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x26, x25, [sp, #96] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x24, x23, [sp, #112] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x22, x21, [sp, #128] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x20, x19, [sp, #144] // 16-byte Folded Spill
; CHECK-SD-NEXT: .cfi_def_cfa_offset 160
; CHECK-SD-NEXT: .cfi_offset w19, -8
; CHECK-SD-NEXT: .cfi_offset w20, -16
; CHECK-SD-NEXT: .cfi_offset w21, -24
; CHECK-SD-NEXT: .cfi_offset w22, -32
; CHECK-SD-NEXT: .cfi_offset w23, -40
; CHECK-SD-NEXT: .cfi_offset w24, -48
; CHECK-SD-NEXT: .cfi_offset w25, -56
; CHECK-SD-NEXT: .cfi_offset w26, -64
; CHECK-SD-NEXT: .cfi_offset w27, -72
; CHECK-SD-NEXT: .cfi_offset w28, -80
; CHECK-SD-NEXT: .cfi_offset w30, -88
; CHECK-SD-NEXT: .cfi_offset w29, -96
; CHECK-SD-NEXT: smov w8, v2.h[1]
; CHECK-SD-NEXT: smov w9, v0.h[1]
; CHECK-SD-NEXT: smov w19, v2.h[2]
; CHECK-SD-NEXT: smov w22, v0.h[2]
; CHECK-SD-NEXT: smov w1, v2.h[0]
; CHECK-SD-NEXT: smov w3, v0.h[0]
; CHECK-SD-NEXT: smov w7, v2.h[3]
; CHECK-SD-NEXT: smov w18, v0.h[3]
; CHECK-SD-NEXT: smov w4, v0.h[6]
; CHECK-SD-NEXT: smov w0, v2.h[4]
; CHECK-SD-NEXT: smov w5, v0.h[4]
; CHECK-SD-NEXT: smov w2, v2.h[7]
; CHECK-SD-NEXT: str w8, [sp, #52] // 4-byte Folded Spill
; CHECK-SD-NEXT: smov w6, v0.h[7]
; CHECK-SD-NEXT: smov w27, v3.h[0]
; CHECK-SD-NEXT: str w9, [sp, #44] // 4-byte Folded Spill
; CHECK-SD-NEXT: sdiv w9, w9, w8
; CHECK-SD-NEXT: smov w28, v1.h[0]
; CHECK-SD-NEXT: smov w24, v3.h[1]
; CHECK-SD-NEXT: smov w25, v1.h[1]
; CHECK-SD-NEXT: ldr w21, [sp, #52] // 4-byte Folded Reload
; CHECK-SD-NEXT: ldr w23, [sp, #44] // 4-byte Folded Reload
; CHECK-SD-NEXT: smov w30, v3.h[2]
; CHECK-SD-NEXT: smov w12, v3.h[3]
; CHECK-SD-NEXT: smov w11, v1.h[3]
; CHECK-SD-NEXT: smov w14, v3.h[5]
; CHECK-SD-NEXT: smov w13, v1.h[5]
; CHECK-SD-NEXT: sdiv w8, w22, w19
; CHECK-SD-NEXT: str w9, [sp, #60] // 4-byte Folded Spill
; CHECK-SD-NEXT: ldr w20, [sp, #60] // 4-byte Folded Reload
; CHECK-SD-NEXT: msub w21, w20, w21, w23
; CHECK-SD-NEXT: sdiv w9, w3, w1
; CHECK-SD-NEXT: str w8, [sp, #12] // 4-byte Folded Spill
; CHECK-SD-NEXT: sdiv w8, w18, w7
; CHECK-SD-NEXT: stp w9, w8, [sp, #24] // 8-byte Folded Spill
; CHECK-SD-NEXT: smov w8, v2.h[5]
; CHECK-SD-NEXT: smov w9, v0.h[5]
; CHECK-SD-NEXT: sdiv w10, w5, w0
; CHECK-SD-NEXT: ldr w20, [sp, #24] // 4-byte Folded Reload
; CHECK-SD-NEXT: msub w1, w20, w1, w3
; CHECK-SD-NEXT: str w9, [sp, #40] // 4-byte Folded Spill
; CHECK-SD-NEXT: str w8, [sp, #48] // 4-byte Folded Spill
; CHECK-SD-NEXT: fmov s0, w1
; CHECK-SD-NEXT: ldr w1, [sp, #12] // 4-byte Folded Reload
; CHECK-SD-NEXT: msub w1, w1, w19, w22
; CHECK-SD-NEXT: ldr w19, [sp, #28] // 4-byte Folded Reload
; CHECK-SD-NEXT: sdiv w9, w9, w8
; CHECK-SD-NEXT: smov w8, v2.h[6]
; CHECK-SD-NEXT: mov v0.h[1], w21
; CHECK-SD-NEXT: msub w18, w19, w7, w18
; CHECK-SD-NEXT: ldp x20, x19, [sp, #144] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x22, x21, [sp, #128] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v0.h[2], w1
; CHECK-SD-NEXT: str w9, [sp, #56] // 4-byte Folded Spill
; CHECK-SD-NEXT: sdiv w9, w4, w8
; CHECK-SD-NEXT: mov v0.h[3], w18
; CHECK-SD-NEXT: ldr w18, [sp, #40] // 4-byte Folded Reload
; CHECK-SD-NEXT: stp w8, w9, [sp, #32] // 8-byte Folded Spill
; CHECK-SD-NEXT: sdiv w8, w6, w2
; CHECK-SD-NEXT: smov w9, v1.h[4]
; CHECK-SD-NEXT: sdiv w29, w28, w27
; CHECK-SD-NEXT: stp w8, w10, [sp, #16] // 8-byte Folded Spill
; CHECK-SD-NEXT: smov w8, v1.h[2]
; CHECK-SD-NEXT: smov w10, v3.h[4]
; CHECK-SD-NEXT: sdiv w26, w25, w24
; CHECK-SD-NEXT: msub w3, w29, w27, w28
; CHECK-SD-NEXT: ldp x28, x27, [sp, #80] // 16-byte Folded Reload
; CHECK-SD-NEXT: fmov s2, w3
; CHECK-SD-NEXT: smov w3, v1.h[6]
; CHECK-SD-NEXT: sdiv w15, w8, w30
; CHECK-SD-NEXT: msub w24, w26, w24, w25
; CHECK-SD-NEXT: mov v2.h[1], w24
; CHECK-SD-NEXT: ldp x24, x23, [sp, #112] // 16-byte Folded Reload
; CHECK-SD-NEXT: sdiv w17, w11, w12
; CHECK-SD-NEXT: msub w8, w15, w30, w8
; CHECK-SD-NEXT: smov w15, v3.h[6]
; CHECK-SD-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v2.h[2], w8
; CHECK-SD-NEXT: sdiv w16, w9, w10
; CHECK-SD-NEXT: msub w8, w17, w12, w11
; CHECK-SD-NEXT: ldr w12, [sp, #20] // 4-byte Folded Reload
; CHECK-SD-NEXT: ldr w17, [sp, #48] // 4-byte Folded Reload
; CHECK-SD-NEXT: msub w12, w12, w0, w5
; CHECK-SD-NEXT: mov v2.h[3], w8
; CHECK-SD-NEXT: mov v0.h[4], w12
; CHECK-SD-NEXT: sdiv w25, w13, w14
; CHECK-SD-NEXT: msub w8, w16, w10, w9
; CHECK-SD-NEXT: smov w9, v3.h[7]
; CHECK-SD-NEXT: smov w10, v1.h[7]
; CHECK-SD-NEXT: ldr w16, [sp, #56] // 4-byte Folded Reload
; CHECK-SD-NEXT: mov v2.h[4], w8
; CHECK-SD-NEXT: msub w16, w16, w17, w18
; CHECK-SD-NEXT: mov v0.h[5], w16
; CHECK-SD-NEXT: sdiv w11, w3, w15
; CHECK-SD-NEXT: msub w8, w25, w14, w13
; CHECK-SD-NEXT: ldp w14, w13, [sp, #32] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x26, x25, [sp, #96] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v2.h[5], w8
; CHECK-SD-NEXT: msub w13, w13, w14, w4
; CHECK-SD-NEXT: mov v0.h[6], w13
; CHECK-SD-NEXT: sdiv w12, w10, w9
; CHECK-SD-NEXT: msub w8, w11, w15, w3
; CHECK-SD-NEXT: ldr w11, [sp, #16] // 4-byte Folded Reload
; CHECK-SD-NEXT: msub w11, w11, w2, w6
; CHECK-SD-NEXT: mov v2.h[6], w8
; CHECK-SD-NEXT: mov v0.h[7], w11
; CHECK-SD-NEXT: msub w8, w12, w9, w10
; CHECK-SD-NEXT: mov v2.h[7], w8
; CHECK-SD-NEXT: mov v1.16b, v2.16b
; CHECK-SD-NEXT: add sp, sp, #160
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sv16i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sshll v4.4s, v0.4h, #0
; CHECK-GI-NEXT: sshll v5.4s, v2.4h, #0
; CHECK-GI-NEXT: sshll v6.4s, v1.4h, #0
; CHECK-GI-NEXT: sshll v7.4s, v3.4h, #0
; CHECK-GI-NEXT: fmov w8, s4
; CHECK-GI-NEXT: fmov w9, s5
; CHECK-GI-NEXT: mov w12, v5.s[3]
; CHECK-GI-NEXT: fmov w17, s7
; CHECK-GI-NEXT: mov w18, v7.s[1]
; CHECK-GI-NEXT: mov w0, v7.s[2]
; CHECK-GI-NEXT: mov w1, v7.s[3]
; CHECK-GI-NEXT: sshll2 v7.4s, v3.8h, #0
; CHECK-GI-NEXT: sshll v3.4s, v3.4h, #0
; CHECK-GI-NEXT: sdiv w11, w8, w9
; CHECK-GI-NEXT: mov w8, v4.s[1]
; CHECK-GI-NEXT: mov w9, v5.s[1]
; CHECK-GI-NEXT: fmov w2, s7
; CHECK-GI-NEXT: mov w3, v7.s[1]
; CHECK-GI-NEXT: mov w4, v7.s[2]
; CHECK-GI-NEXT: sdiv w10, w8, w9
; CHECK-GI-NEXT: mov w8, v4.s[2]
; CHECK-GI-NEXT: mov w9, v5.s[2]
; CHECK-GI-NEXT: sshll2 v5.4s, v2.8h, #0
; CHECK-GI-NEXT: mov v16.s[0], w11
; CHECK-GI-NEXT: sshll v2.4s, v2.4h, #0
; CHECK-GI-NEXT: fmov w13, s5
; CHECK-GI-NEXT: mov w14, v5.s[1]
; CHECK-GI-NEXT: mov w15, v5.s[2]
; CHECK-GI-NEXT: mov w16, v5.s[3]
; CHECK-GI-NEXT: sdiv w9, w8, w9
; CHECK-GI-NEXT: mov w8, v4.s[3]
; CHECK-GI-NEXT: sshll2 v4.4s, v0.8h, #0
; CHECK-GI-NEXT: mov v16.s[1], w10
; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-GI-NEXT: sdiv w8, w8, w12
; CHECK-GI-NEXT: fmov w12, s4
; CHECK-GI-NEXT: mov v16.s[2], w9
; CHECK-GI-NEXT: sdiv w13, w12, w13
; CHECK-GI-NEXT: mov w12, v4.s[1]
; CHECK-GI-NEXT: mov v16.s[3], w8
; CHECK-GI-NEXT: mls v0.4s, v16.4s, v2.4s
; CHECK-GI-NEXT: sdiv w14, w12, w14
; CHECK-GI-NEXT: mov w12, v4.s[2]
; CHECK-GI-NEXT: mov v17.s[0], w13
; CHECK-GI-NEXT: mov w13, v7.s[3]
; CHECK-GI-NEXT: sdiv w15, w12, w15
; CHECK-GI-NEXT: mov w12, v4.s[3]
; CHECK-GI-NEXT: mov v17.s[1], w14
; CHECK-GI-NEXT: sdiv w12, w12, w16
; CHECK-GI-NEXT: fmov w16, s6
; CHECK-GI-NEXT: mov v17.s[2], w15
; CHECK-GI-NEXT: sdiv w16, w16, w17
; CHECK-GI-NEXT: mov w17, v6.s[1]
; CHECK-GI-NEXT: mov v17.s[3], w12
; CHECK-GI-NEXT: mls v4.4s, v17.4s, v5.4s
; CHECK-GI-NEXT: sdiv w17, w17, w18
; CHECK-GI-NEXT: mov w18, v6.s[2]
; CHECK-GI-NEXT: mov v18.s[0], w16
; CHECK-GI-NEXT: uzp1 v0.8h, v0.8h, v4.8h
; CHECK-GI-NEXT: sdiv w18, w18, w0
; CHECK-GI-NEXT: mov w0, v6.s[3]
; CHECK-GI-NEXT: sshll2 v6.4s, v1.8h, #0
; CHECK-GI-NEXT: mov v18.s[1], w17
; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0
; CHECK-GI-NEXT: mov w11, v6.s[3]
; CHECK-GI-NEXT: sdiv w0, w0, w1
; CHECK-GI-NEXT: fmov w1, s6
; CHECK-GI-NEXT: mov v18.s[2], w18
; CHECK-GI-NEXT: sdiv w1, w1, w2
; CHECK-GI-NEXT: mov w2, v6.s[1]
; CHECK-GI-NEXT: mov v18.s[3], w0
; CHECK-GI-NEXT: mls v1.4s, v18.4s, v3.4s
; CHECK-GI-NEXT: sdiv w2, w2, w3
; CHECK-GI-NEXT: mov w3, v6.s[2]
; CHECK-GI-NEXT: mov v19.s[0], w1
; CHECK-GI-NEXT: sdiv w3, w3, w4
; CHECK-GI-NEXT: mov v19.s[1], w2
; CHECK-GI-NEXT: sdiv w10, w11, w13
; CHECK-GI-NEXT: mov v19.s[2], w3
; CHECK-GI-NEXT: mov v19.s[3], w10
; CHECK-GI-NEXT: mls v6.4s, v19.4s, v7.4s
; CHECK-GI-NEXT: uzp1 v1.8h, v1.8h, v6.8h
; CHECK-GI-NEXT: ret
entry:
%s = srem <16 x i16> %d, %e
ret <16 x i16> %s
}
define <2 x i16> @uv2i16(<2 x i16> %d, <2 x i16> %e) {
; CHECK-SD-LABEL: uv2i16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: movi d2, #0x00ffff0000ffff
; CHECK-SD-NEXT: and v0.8b, v0.8b, v2.8b
; CHECK-SD-NEXT: and v1.8b, v1.8b, v2.8b
; CHECK-SD-NEXT: fmov w8, s1
; CHECK-SD-NEXT: fmov w9, s0
; CHECK-SD-NEXT: mov w11, v1.s[1]
; CHECK-SD-NEXT: mov w12, v0.s[1]
; CHECK-SD-NEXT: udiv w10, w9, w8
; CHECK-SD-NEXT: udiv w13, w12, w11
; CHECK-SD-NEXT: msub w8, w10, w8, w9
; CHECK-SD-NEXT: fmov s0, w8
; CHECK-SD-NEXT: msub w9, w13, w11, w12
; CHECK-SD-NEXT: mov v0.s[1], w9
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv2i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: movi d2, #0x00ffff0000ffff
; CHECK-GI-NEXT: and v0.8b, v0.8b, v2.8b
; CHECK-GI-NEXT: and v1.8b, v1.8b, v2.8b
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: fmov w9, s1
; CHECK-GI-NEXT: mov w10, v1.s[1]
; CHECK-GI-NEXT: udiv w8, w8, w9
; CHECK-GI-NEXT: mov w9, v0.s[1]
; CHECK-GI-NEXT: udiv w9, w9, w10
; CHECK-GI-NEXT: mov v2.s[0], w8
; CHECK-GI-NEXT: mov v2.s[1], w9
; CHECK-GI-NEXT: mls v0.2s, v2.2s, v1.2s
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
entry:
%s = urem <2 x i16> %d, %e
ret <2 x i16> %s
}
define <3 x i16> @uv3i16(<3 x i16> %d, <3 x i16> %e) {
; CHECK-SD-LABEL: uv3i16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: umov w11, v1.h[0]
; CHECK-SD-NEXT: umov w12, v0.h[0]
; CHECK-SD-NEXT: umov w8, v1.h[1]
; CHECK-SD-NEXT: umov w9, v0.h[1]
; CHECK-SD-NEXT: umov w13, v0.h[2]
; CHECK-SD-NEXT: umov w14, v1.h[0]
; CHECK-SD-NEXT: umov w16, v0.h[0]
; CHECK-SD-NEXT: udiv w11, w12, w11
; CHECK-SD-NEXT: umov w12, v1.h[2]
; CHECK-SD-NEXT: udiv w10, w9, w8
; CHECK-SD-NEXT: msub w11, w11, w14, w16
; CHECK-SD-NEXT: udiv w15, w13, w12
; CHECK-SD-NEXT: msub w8, w10, w8, w9
; CHECK-SD-NEXT: sxth w9, w11
; CHECK-SD-NEXT: fmov s0, w9
; CHECK-SD-NEXT: sxth w8, w8
; CHECK-SD-NEXT: mov v0.h[1], w8
; CHECK-SD-NEXT: msub w10, w15, w12, w13
; CHECK-SD-NEXT: sxth w8, w10
; CHECK-SD-NEXT: mov v0.h[2], w8
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv3i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: umov w8, v0.h[0]
; CHECK-GI-NEXT: umov w9, v1.h[0]
; CHECK-GI-NEXT: umov w11, v0.h[1]
; CHECK-GI-NEXT: umov w12, v1.h[1]
; CHECK-GI-NEXT: umov w14, v0.h[2]
; CHECK-GI-NEXT: umov w15, v1.h[2]
; CHECK-GI-NEXT: udiv w10, w8, w9
; CHECK-GI-NEXT: udiv w13, w11, w12
; CHECK-GI-NEXT: msub w8, w10, w9, w8
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: udiv w16, w14, w15
; CHECK-GI-NEXT: msub w9, w13, w12, w11
; CHECK-GI-NEXT: mov v0.h[1], w9
; CHECK-GI-NEXT: msub w8, w16, w15, w14
; CHECK-GI-NEXT: mov v0.h[2], w8
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
entry:
%s = urem <3 x i16> %d, %e
ret <3 x i16> %s
}
define <4 x i16> @uv4i16(<4 x i16> %d, <4 x i16> %e) {
; CHECK-SD-LABEL: uv4i16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: umov w11, v1.h[0]
; CHECK-SD-NEXT: umov w12, v0.h[0]
; CHECK-SD-NEXT: umov w8, v1.h[1]
; CHECK-SD-NEXT: umov w9, v0.h[1]
; CHECK-SD-NEXT: umov w14, v1.h[2]
; CHECK-SD-NEXT: umov w15, v0.h[2]
; CHECK-SD-NEXT: umov w17, v1.h[3]
; CHECK-SD-NEXT: umov w18, v0.h[3]
; CHECK-SD-NEXT: udiv w13, w12, w11
; CHECK-SD-NEXT: udiv w10, w9, w8
; CHECK-SD-NEXT: msub w11, w13, w11, w12
; CHECK-SD-NEXT: fmov s0, w11
; CHECK-SD-NEXT: udiv w16, w15, w14
; CHECK-SD-NEXT: msub w8, w10, w8, w9
; CHECK-SD-NEXT: mov v0.h[1], w8
; CHECK-SD-NEXT: udiv w9, w18, w17
; CHECK-SD-NEXT: msub w8, w16, w14, w15
; CHECK-SD-NEXT: mov v0.h[2], w8
; CHECK-SD-NEXT: msub w8, w9, w17, w18
; CHECK-SD-NEXT: mov v0.h[3], w8
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv4i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: fmov w9, s1
; CHECK-GI-NEXT: mov w10, v1.s[1]
; CHECK-GI-NEXT: mov w11, v1.s[2]
; CHECK-GI-NEXT: mov w12, v1.s[3]
; CHECK-GI-NEXT: udiv w8, w8, w9
; CHECK-GI-NEXT: mov w9, v0.s[1]
; CHECK-GI-NEXT: udiv w9, w9, w10
; CHECK-GI-NEXT: mov w10, v0.s[2]
; CHECK-GI-NEXT: mov v2.s[0], w8
; CHECK-GI-NEXT: udiv w10, w10, w11
; CHECK-GI-NEXT: mov w11, v0.s[3]
; CHECK-GI-NEXT: mov v2.s[1], w9
; CHECK-GI-NEXT: udiv w8, w11, w12
; CHECK-GI-NEXT: mov v2.s[2], w10
; CHECK-GI-NEXT: mov v2.s[3], w8
; CHECK-GI-NEXT: mls v0.4s, v2.4s, v1.4s
; CHECK-GI-NEXT: xtn v0.4h, v0.4s
; CHECK-GI-NEXT: ret
entry:
%s = urem <4 x i16> %d, %e
ret <4 x i16> %s
}
define <8 x i16> @uv8i16(<8 x i16> %d, <8 x i16> %e) {
; CHECK-SD-LABEL: uv8i16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: umov w11, v1.h[0]
; CHECK-SD-NEXT: umov w12, v0.h[0]
; CHECK-SD-NEXT: umov w8, v1.h[1]
; CHECK-SD-NEXT: umov w9, v0.h[1]
; CHECK-SD-NEXT: umov w14, v1.h[2]
; CHECK-SD-NEXT: umov w15, v0.h[2]
; CHECK-SD-NEXT: umov w17, v1.h[3]
; CHECK-SD-NEXT: umov w18, v0.h[3]
; CHECK-SD-NEXT: umov w1, v1.h[4]
; CHECK-SD-NEXT: umov w2, v0.h[4]
; CHECK-SD-NEXT: umov w4, v1.h[5]
; CHECK-SD-NEXT: umov w5, v0.h[5]
; CHECK-SD-NEXT: udiv w13, w12, w11
; CHECK-SD-NEXT: udiv w10, w9, w8
; CHECK-SD-NEXT: msub w11, w13, w11, w12
; CHECK-SD-NEXT: umov w13, v1.h[7]
; CHECK-SD-NEXT: fmov s2, w11
; CHECK-SD-NEXT: umov w11, v0.h[6]
; CHECK-SD-NEXT: udiv w16, w15, w14
; CHECK-SD-NEXT: msub w8, w10, w8, w9
; CHECK-SD-NEXT: umov w10, v1.h[6]
; CHECK-SD-NEXT: mov v2.h[1], w8
; CHECK-SD-NEXT: udiv w0, w18, w17
; CHECK-SD-NEXT: msub w8, w16, w14, w15
; CHECK-SD-NEXT: umov w14, v0.h[7]
; CHECK-SD-NEXT: mov v2.h[2], w8
; CHECK-SD-NEXT: udiv w3, w2, w1
; CHECK-SD-NEXT: msub w8, w0, w17, w18
; CHECK-SD-NEXT: mov v2.h[3], w8
; CHECK-SD-NEXT: udiv w9, w5, w4
; CHECK-SD-NEXT: msub w8, w3, w1, w2
; CHECK-SD-NEXT: mov v2.h[4], w8
; CHECK-SD-NEXT: udiv w12, w11, w10
; CHECK-SD-NEXT: msub w8, w9, w4, w5
; CHECK-SD-NEXT: mov v2.h[5], w8
; CHECK-SD-NEXT: udiv w9, w14, w13
; CHECK-SD-NEXT: msub w8, w12, w10, w11
; CHECK-SD-NEXT: mov v2.h[6], w8
; CHECK-SD-NEXT: msub w8, w9, w13, w14
; CHECK-SD-NEXT: mov v2.h[7], w8
; CHECK-SD-NEXT: mov v0.16b, v2.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv8i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ushll v2.4s, v0.4h, #0
; CHECK-GI-NEXT: ushll v3.4s, v1.4h, #0
; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0
; CHECK-GI-NEXT: fmov w8, s2
; CHECK-GI-NEXT: fmov w9, s3
; CHECK-GI-NEXT: mov w10, v3.s[1]
; CHECK-GI-NEXT: mov w11, v3.s[2]
; CHECK-GI-NEXT: mov w12, v3.s[3]
; CHECK-GI-NEXT: fmov w13, s1
; CHECK-GI-NEXT: mov w14, v1.s[1]
; CHECK-GI-NEXT: mov w15, v1.s[2]
; CHECK-GI-NEXT: udiv w8, w8, w9
; CHECK-GI-NEXT: mov w9, v2.s[1]
; CHECK-GI-NEXT: udiv w9, w9, w10
; CHECK-GI-NEXT: mov w10, v2.s[2]
; CHECK-GI-NEXT: mov v4.s[0], w8
; CHECK-GI-NEXT: mov w8, v0.s[3]
; CHECK-GI-NEXT: udiv w10, w10, w11
; CHECK-GI-NEXT: mov w11, v2.s[3]
; CHECK-GI-NEXT: mov v4.s[1], w9
; CHECK-GI-NEXT: udiv w11, w11, w12
; CHECK-GI-NEXT: fmov w12, s0
; CHECK-GI-NEXT: mov v4.s[2], w10
; CHECK-GI-NEXT: udiv w12, w12, w13
; CHECK-GI-NEXT: mov w13, v0.s[1]
; CHECK-GI-NEXT: mov v4.s[3], w11
; CHECK-GI-NEXT: mls v2.4s, v4.4s, v3.4s
; CHECK-GI-NEXT: udiv w13, w13, w14
; CHECK-GI-NEXT: mov w14, v0.s[2]
; CHECK-GI-NEXT: mov v5.s[0], w12
; CHECK-GI-NEXT: mov w12, v1.s[3]
; CHECK-GI-NEXT: udiv w14, w14, w15
; CHECK-GI-NEXT: mov v5.s[1], w13
; CHECK-GI-NEXT: udiv w8, w8, w12
; CHECK-GI-NEXT: mov v5.s[2], w14
; CHECK-GI-NEXT: mov v5.s[3], w8
; CHECK-GI-NEXT: mls v0.4s, v5.4s, v1.4s
; CHECK-GI-NEXT: uzp1 v0.8h, v2.8h, v0.8h
; CHECK-GI-NEXT: ret
entry:
%s = urem <8 x i16> %d, %e
ret <8 x i16> %s
}
define <16 x i16> @uv16i16(<16 x i16> %d, <16 x i16> %e) {
; CHECK-SD-LABEL: uv16i16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: sub sp, sp, #160
; CHECK-SD-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x28, x27, [sp, #80] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x26, x25, [sp, #96] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x24, x23, [sp, #112] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x22, x21, [sp, #128] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x20, x19, [sp, #144] // 16-byte Folded Spill
; CHECK-SD-NEXT: .cfi_def_cfa_offset 160
; CHECK-SD-NEXT: .cfi_offset w19, -8
; CHECK-SD-NEXT: .cfi_offset w20, -16
; CHECK-SD-NEXT: .cfi_offset w21, -24
; CHECK-SD-NEXT: .cfi_offset w22, -32
; CHECK-SD-NEXT: .cfi_offset w23, -40
; CHECK-SD-NEXT: .cfi_offset w24, -48
; CHECK-SD-NEXT: .cfi_offset w25, -56
; CHECK-SD-NEXT: .cfi_offset w26, -64
; CHECK-SD-NEXT: .cfi_offset w27, -72
; CHECK-SD-NEXT: .cfi_offset w28, -80
; CHECK-SD-NEXT: .cfi_offset w30, -88
; CHECK-SD-NEXT: .cfi_offset w29, -96
; CHECK-SD-NEXT: umov w8, v2.h[1]
; CHECK-SD-NEXT: umov w9, v0.h[1]
; CHECK-SD-NEXT: umov w19, v2.h[2]
; CHECK-SD-NEXT: umov w22, v0.h[2]
; CHECK-SD-NEXT: umov w1, v2.h[0]
; CHECK-SD-NEXT: umov w3, v0.h[0]
; CHECK-SD-NEXT: umov w7, v2.h[3]
; CHECK-SD-NEXT: umov w18, v0.h[3]
; CHECK-SD-NEXT: umov w4, v0.h[6]
; CHECK-SD-NEXT: umov w0, v2.h[4]
; CHECK-SD-NEXT: umov w5, v0.h[4]
; CHECK-SD-NEXT: umov w2, v2.h[7]
; CHECK-SD-NEXT: str w8, [sp, #52] // 4-byte Folded Spill
; CHECK-SD-NEXT: umov w6, v0.h[7]
; CHECK-SD-NEXT: umov w27, v3.h[0]
; CHECK-SD-NEXT: str w9, [sp, #44] // 4-byte Folded Spill
; CHECK-SD-NEXT: udiv w9, w9, w8
; CHECK-SD-NEXT: umov w28, v1.h[0]
; CHECK-SD-NEXT: umov w24, v3.h[1]
; CHECK-SD-NEXT: umov w25, v1.h[1]
; CHECK-SD-NEXT: ldr w21, [sp, #52] // 4-byte Folded Reload
; CHECK-SD-NEXT: ldr w23, [sp, #44] // 4-byte Folded Reload
; CHECK-SD-NEXT: umov w30, v3.h[2]
; CHECK-SD-NEXT: umov w12, v3.h[3]
; CHECK-SD-NEXT: umov w11, v1.h[3]
; CHECK-SD-NEXT: umov w14, v3.h[5]
; CHECK-SD-NEXT: umov w13, v1.h[5]
; CHECK-SD-NEXT: udiv w8, w22, w19
; CHECK-SD-NEXT: str w9, [sp, #60] // 4-byte Folded Spill
; CHECK-SD-NEXT: ldr w20, [sp, #60] // 4-byte Folded Reload
; CHECK-SD-NEXT: msub w21, w20, w21, w23
; CHECK-SD-NEXT: udiv w9, w3, w1
; CHECK-SD-NEXT: str w8, [sp, #12] // 4-byte Folded Spill
; CHECK-SD-NEXT: udiv w8, w18, w7
; CHECK-SD-NEXT: stp w9, w8, [sp, #24] // 8-byte Folded Spill
; CHECK-SD-NEXT: umov w8, v2.h[5]
; CHECK-SD-NEXT: umov w9, v0.h[5]
; CHECK-SD-NEXT: udiv w10, w5, w0
; CHECK-SD-NEXT: ldr w20, [sp, #24] // 4-byte Folded Reload
; CHECK-SD-NEXT: msub w1, w20, w1, w3
; CHECK-SD-NEXT: str w9, [sp, #40] // 4-byte Folded Spill
; CHECK-SD-NEXT: str w8, [sp, #48] // 4-byte Folded Spill
; CHECK-SD-NEXT: fmov s0, w1
; CHECK-SD-NEXT: ldr w1, [sp, #12] // 4-byte Folded Reload
; CHECK-SD-NEXT: msub w1, w1, w19, w22
; CHECK-SD-NEXT: ldr w19, [sp, #28] // 4-byte Folded Reload
; CHECK-SD-NEXT: udiv w9, w9, w8
; CHECK-SD-NEXT: umov w8, v2.h[6]
; CHECK-SD-NEXT: mov v0.h[1], w21
; CHECK-SD-NEXT: msub w18, w19, w7, w18
; CHECK-SD-NEXT: ldp x20, x19, [sp, #144] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x22, x21, [sp, #128] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v0.h[2], w1
; CHECK-SD-NEXT: str w9, [sp, #56] // 4-byte Folded Spill
; CHECK-SD-NEXT: udiv w9, w4, w8
; CHECK-SD-NEXT: mov v0.h[3], w18
; CHECK-SD-NEXT: ldr w18, [sp, #40] // 4-byte Folded Reload
; CHECK-SD-NEXT: stp w8, w9, [sp, #32] // 8-byte Folded Spill
; CHECK-SD-NEXT: udiv w8, w6, w2
; CHECK-SD-NEXT: umov w9, v1.h[4]
; CHECK-SD-NEXT: udiv w29, w28, w27
; CHECK-SD-NEXT: stp w8, w10, [sp, #16] // 8-byte Folded Spill
; CHECK-SD-NEXT: umov w8, v1.h[2]
; CHECK-SD-NEXT: umov w10, v3.h[4]
; CHECK-SD-NEXT: udiv w26, w25, w24
; CHECK-SD-NEXT: msub w3, w29, w27, w28
; CHECK-SD-NEXT: ldp x28, x27, [sp, #80] // 16-byte Folded Reload
; CHECK-SD-NEXT: fmov s2, w3
; CHECK-SD-NEXT: umov w3, v1.h[6]
; CHECK-SD-NEXT: udiv w15, w8, w30
; CHECK-SD-NEXT: msub w24, w26, w24, w25
; CHECK-SD-NEXT: mov v2.h[1], w24
; CHECK-SD-NEXT: ldp x24, x23, [sp, #112] // 16-byte Folded Reload
; CHECK-SD-NEXT: udiv w17, w11, w12
; CHECK-SD-NEXT: msub w8, w15, w30, w8
; CHECK-SD-NEXT: umov w15, v3.h[6]
; CHECK-SD-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v2.h[2], w8
; CHECK-SD-NEXT: udiv w16, w9, w10
; CHECK-SD-NEXT: msub w8, w17, w12, w11
; CHECK-SD-NEXT: ldr w12, [sp, #20] // 4-byte Folded Reload
; CHECK-SD-NEXT: ldr w17, [sp, #48] // 4-byte Folded Reload
; CHECK-SD-NEXT: msub w12, w12, w0, w5
; CHECK-SD-NEXT: mov v2.h[3], w8
; CHECK-SD-NEXT: mov v0.h[4], w12
; CHECK-SD-NEXT: udiv w25, w13, w14
; CHECK-SD-NEXT: msub w8, w16, w10, w9
; CHECK-SD-NEXT: umov w9, v3.h[7]
; CHECK-SD-NEXT: umov w10, v1.h[7]
; CHECK-SD-NEXT: ldr w16, [sp, #56] // 4-byte Folded Reload
; CHECK-SD-NEXT: mov v2.h[4], w8
; CHECK-SD-NEXT: msub w16, w16, w17, w18
; CHECK-SD-NEXT: mov v0.h[5], w16
; CHECK-SD-NEXT: udiv w11, w3, w15
; CHECK-SD-NEXT: msub w8, w25, w14, w13
; CHECK-SD-NEXT: ldp w14, w13, [sp, #32] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x26, x25, [sp, #96] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v2.h[5], w8
; CHECK-SD-NEXT: msub w13, w13, w14, w4
; CHECK-SD-NEXT: mov v0.h[6], w13
; CHECK-SD-NEXT: udiv w12, w10, w9
; CHECK-SD-NEXT: msub w8, w11, w15, w3
; CHECK-SD-NEXT: ldr w11, [sp, #16] // 4-byte Folded Reload
; CHECK-SD-NEXT: msub w11, w11, w2, w6
; CHECK-SD-NEXT: mov v2.h[6], w8
; CHECK-SD-NEXT: mov v0.h[7], w11
; CHECK-SD-NEXT: msub w8, w12, w9, w10
; CHECK-SD-NEXT: mov v2.h[7], w8
; CHECK-SD-NEXT: mov v1.16b, v2.16b
; CHECK-SD-NEXT: add sp, sp, #160
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv16i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ushll v4.4s, v0.4h, #0
; CHECK-GI-NEXT: ushll v5.4s, v2.4h, #0
; CHECK-GI-NEXT: ushll v6.4s, v1.4h, #0
; CHECK-GI-NEXT: ushll v7.4s, v3.4h, #0
; CHECK-GI-NEXT: fmov w8, s4
; CHECK-GI-NEXT: fmov w9, s5
; CHECK-GI-NEXT: mov w12, v5.s[3]
; CHECK-GI-NEXT: fmov w17, s7
; CHECK-GI-NEXT: mov w18, v7.s[1]
; CHECK-GI-NEXT: mov w0, v7.s[2]
; CHECK-GI-NEXT: mov w1, v7.s[3]
; CHECK-GI-NEXT: ushll2 v7.4s, v3.8h, #0
; CHECK-GI-NEXT: ushll v3.4s, v3.4h, #0
; CHECK-GI-NEXT: udiv w11, w8, w9
; CHECK-GI-NEXT: mov w8, v4.s[1]
; CHECK-GI-NEXT: mov w9, v5.s[1]
; CHECK-GI-NEXT: fmov w2, s7
; CHECK-GI-NEXT: mov w3, v7.s[1]
; CHECK-GI-NEXT: mov w4, v7.s[2]
; CHECK-GI-NEXT: udiv w10, w8, w9
; CHECK-GI-NEXT: mov w8, v4.s[2]
; CHECK-GI-NEXT: mov w9, v5.s[2]
; CHECK-GI-NEXT: ushll2 v5.4s, v2.8h, #0
; CHECK-GI-NEXT: mov v16.s[0], w11
; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0
; CHECK-GI-NEXT: fmov w13, s5
; CHECK-GI-NEXT: mov w14, v5.s[1]
; CHECK-GI-NEXT: mov w15, v5.s[2]
; CHECK-GI-NEXT: mov w16, v5.s[3]
; CHECK-GI-NEXT: udiv w9, w8, w9
; CHECK-GI-NEXT: mov w8, v4.s[3]
; CHECK-GI-NEXT: ushll2 v4.4s, v0.8h, #0
; CHECK-GI-NEXT: mov v16.s[1], w10
; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GI-NEXT: udiv w8, w8, w12
; CHECK-GI-NEXT: fmov w12, s4
; CHECK-GI-NEXT: mov v16.s[2], w9
; CHECK-GI-NEXT: udiv w13, w12, w13
; CHECK-GI-NEXT: mov w12, v4.s[1]
; CHECK-GI-NEXT: mov v16.s[3], w8
; CHECK-GI-NEXT: mls v0.4s, v16.4s, v2.4s
; CHECK-GI-NEXT: udiv w14, w12, w14
; CHECK-GI-NEXT: mov w12, v4.s[2]
; CHECK-GI-NEXT: mov v17.s[0], w13
; CHECK-GI-NEXT: mov w13, v7.s[3]
; CHECK-GI-NEXT: udiv w15, w12, w15
; CHECK-GI-NEXT: mov w12, v4.s[3]
; CHECK-GI-NEXT: mov v17.s[1], w14
; CHECK-GI-NEXT: udiv w12, w12, w16
; CHECK-GI-NEXT: fmov w16, s6
; CHECK-GI-NEXT: mov v17.s[2], w15
; CHECK-GI-NEXT: udiv w16, w16, w17
; CHECK-GI-NEXT: mov w17, v6.s[1]
; CHECK-GI-NEXT: mov v17.s[3], w12
; CHECK-GI-NEXT: mls v4.4s, v17.4s, v5.4s
; CHECK-GI-NEXT: udiv w17, w17, w18
; CHECK-GI-NEXT: mov w18, v6.s[2]
; CHECK-GI-NEXT: mov v18.s[0], w16
; CHECK-GI-NEXT: uzp1 v0.8h, v0.8h, v4.8h
; CHECK-GI-NEXT: udiv w18, w18, w0
; CHECK-GI-NEXT: mov w0, v6.s[3]
; CHECK-GI-NEXT: ushll2 v6.4s, v1.8h, #0
; CHECK-GI-NEXT: mov v18.s[1], w17
; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-GI-NEXT: mov w11, v6.s[3]
; CHECK-GI-NEXT: udiv w0, w0, w1
; CHECK-GI-NEXT: fmov w1, s6
; CHECK-GI-NEXT: mov v18.s[2], w18
; CHECK-GI-NEXT: udiv w1, w1, w2
; CHECK-GI-NEXT: mov w2, v6.s[1]
; CHECK-GI-NEXT: mov v18.s[3], w0
; CHECK-GI-NEXT: mls v1.4s, v18.4s, v3.4s
; CHECK-GI-NEXT: udiv w2, w2, w3
; CHECK-GI-NEXT: mov w3, v6.s[2]
; CHECK-GI-NEXT: mov v19.s[0], w1
; CHECK-GI-NEXT: udiv w3, w3, w4
; CHECK-GI-NEXT: mov v19.s[1], w2
; CHECK-GI-NEXT: udiv w10, w11, w13
; CHECK-GI-NEXT: mov v19.s[2], w3
; CHECK-GI-NEXT: mov v19.s[3], w10
; CHECK-GI-NEXT: mls v6.4s, v19.4s, v7.4s
; CHECK-GI-NEXT: uzp1 v1.8h, v1.8h, v6.8h
; CHECK-GI-NEXT: ret
entry:
%s = urem <16 x i16> %d, %e
ret <16 x i16> %s
}
define <2 x i32> @sv2i32(<2 x i32> %d, <2 x i32> %e) {
; CHECK-SD-LABEL: sv2i32:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: fmov w8, s1
; CHECK-SD-NEXT: fmov w9, s0
; CHECK-SD-NEXT: mov w11, v1.s[1]
; CHECK-SD-NEXT: mov w12, v0.s[1]
; CHECK-SD-NEXT: sdiv w10, w9, w8
; CHECK-SD-NEXT: sdiv w13, w12, w11
; CHECK-SD-NEXT: msub w8, w10, w8, w9
; CHECK-SD-NEXT: fmov s0, w8
; CHECK-SD-NEXT: msub w9, w13, w11, w12
; CHECK-SD-NEXT: mov v0.s[1], w9
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sv2i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: fmov w9, s1
; CHECK-GI-NEXT: mov w10, v1.s[1]
; CHECK-GI-NEXT: sdiv w8, w8, w9
; CHECK-GI-NEXT: mov w9, v0.s[1]
; CHECK-GI-NEXT: sdiv w9, w9, w10
; CHECK-GI-NEXT: mov v2.s[0], w8
; CHECK-GI-NEXT: mov v2.s[1], w9
; CHECK-GI-NEXT: mls v0.2s, v2.2s, v1.2s
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
entry:
%s = srem <2 x i32> %d, %e
ret <2 x i32> %s
}
define <3 x i32> @sv3i32(<3 x i32> %d, <3 x i32> %e) {
; CHECK-SD-LABEL: sv3i32:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fmov w11, s1
; CHECK-SD-NEXT: fmov w12, s0
; CHECK-SD-NEXT: mov w8, v1.s[1]
; CHECK-SD-NEXT: mov w9, v0.s[1]
; CHECK-SD-NEXT: mov w14, v1.s[2]
; CHECK-SD-NEXT: mov w15, v0.s[2]
; CHECK-SD-NEXT: sdiv w13, w12, w11
; CHECK-SD-NEXT: sdiv w10, w9, w8
; CHECK-SD-NEXT: msub w11, w13, w11, w12
; CHECK-SD-NEXT: fmov s0, w11
; CHECK-SD-NEXT: sdiv w16, w15, w14
; CHECK-SD-NEXT: msub w8, w10, w8, w9
; CHECK-SD-NEXT: mov v0.s[1], w8
; CHECK-SD-NEXT: msub w8, w16, w14, w15
; CHECK-SD-NEXT: mov v0.s[2], w8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sv3i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: fmov w9, s1
; CHECK-GI-NEXT: mov s2, v0.s[1]
; CHECK-GI-NEXT: mov s3, v1.s[1]
; CHECK-GI-NEXT: mov s0, v0.s[2]
; CHECK-GI-NEXT: mov s1, v1.s[2]
; CHECK-GI-NEXT: sdiv w10, w8, w9
; CHECK-GI-NEXT: fmov w11, s2
; CHECK-GI-NEXT: fmov w12, s3
; CHECK-GI-NEXT: fmov w14, s0
; CHECK-GI-NEXT: fmov w15, s1
; CHECK-GI-NEXT: sdiv w13, w11, w12
; CHECK-GI-NEXT: msub w8, w10, w9, w8
; CHECK-GI-NEXT: mov v0.s[0], w8
; CHECK-GI-NEXT: sdiv w9, w14, w15
; CHECK-GI-NEXT: msub w8, w13, w12, w11
; CHECK-GI-NEXT: mov v0.s[1], w8
; CHECK-GI-NEXT: msub w8, w9, w15, w14
; CHECK-GI-NEXT: mov v0.s[2], w8
; CHECK-GI-NEXT: ret
entry:
%s = srem <3 x i32> %d, %e
ret <3 x i32> %s
}
define <4 x i32> @sv4i32(<4 x i32> %d, <4 x i32> %e) {
; CHECK-SD-LABEL: sv4i32:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fmov w11, s1
; CHECK-SD-NEXT: fmov w12, s0
; CHECK-SD-NEXT: mov w8, v1.s[1]
; CHECK-SD-NEXT: mov w9, v0.s[1]
; CHECK-SD-NEXT: mov w14, v1.s[2]
; CHECK-SD-NEXT: mov w15, v0.s[2]
; CHECK-SD-NEXT: mov w17, v1.s[3]
; CHECK-SD-NEXT: mov w18, v0.s[3]
; CHECK-SD-NEXT: sdiv w13, w12, w11
; CHECK-SD-NEXT: sdiv w10, w9, w8
; CHECK-SD-NEXT: msub w11, w13, w11, w12
; CHECK-SD-NEXT: fmov s0, w11
; CHECK-SD-NEXT: sdiv w16, w15, w14
; CHECK-SD-NEXT: msub w8, w10, w8, w9
; CHECK-SD-NEXT: mov v0.s[1], w8
; CHECK-SD-NEXT: sdiv w9, w18, w17
; CHECK-SD-NEXT: msub w8, w16, w14, w15
; CHECK-SD-NEXT: mov v0.s[2], w8
; CHECK-SD-NEXT: msub w8, w9, w17, w18
; CHECK-SD-NEXT: mov v0.s[3], w8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sv4i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: fmov w9, s1
; CHECK-GI-NEXT: mov w10, v1.s[1]
; CHECK-GI-NEXT: mov w11, v1.s[2]
; CHECK-GI-NEXT: mov w12, v1.s[3]
; CHECK-GI-NEXT: sdiv w8, w8, w9
; CHECK-GI-NEXT: mov w9, v0.s[1]
; CHECK-GI-NEXT: sdiv w9, w9, w10
; CHECK-GI-NEXT: mov w10, v0.s[2]
; CHECK-GI-NEXT: mov v2.s[0], w8
; CHECK-GI-NEXT: sdiv w10, w10, w11
; CHECK-GI-NEXT: mov w11, v0.s[3]
; CHECK-GI-NEXT: mov v2.s[1], w9
; CHECK-GI-NEXT: sdiv w8, w11, w12
; CHECK-GI-NEXT: mov v2.s[2], w10
; CHECK-GI-NEXT: mov v2.s[3], w8
; CHECK-GI-NEXT: mls v0.4s, v2.4s, v1.4s
; CHECK-GI-NEXT: ret
entry:
%s = srem <4 x i32> %d, %e
ret <4 x i32> %s
}
define <8 x i32> @sv8i32(<8 x i32> %d, <8 x i32> %e) {
; CHECK-SD-LABEL: sv8i32:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: stp x22, x21, [sp, #-32]! // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
; CHECK-SD-NEXT: .cfi_offset w19, -8
; CHECK-SD-NEXT: .cfi_offset w20, -16
; CHECK-SD-NEXT: .cfi_offset w21, -24
; CHECK-SD-NEXT: .cfi_offset w22, -32
; CHECK-SD-NEXT: mov w8, v2.s[1]
; CHECK-SD-NEXT: mov w9, v0.s[1]
; CHECK-SD-NEXT: fmov w11, s2
; CHECK-SD-NEXT: fmov w12, s0
; CHECK-SD-NEXT: fmov w4, s3
; CHECK-SD-NEXT: fmov w5, s1
; CHECK-SD-NEXT: mov w1, v3.s[1]
; CHECK-SD-NEXT: mov w2, v1.s[1]
; CHECK-SD-NEXT: mov w14, v2.s[2]
; CHECK-SD-NEXT: mov w15, v0.s[2]
; CHECK-SD-NEXT: mov w7, v3.s[2]
; CHECK-SD-NEXT: mov w19, v1.s[2]
; CHECK-SD-NEXT: sdiv w10, w9, w8
; CHECK-SD-NEXT: mov w17, v2.s[3]
; CHECK-SD-NEXT: mov w18, v0.s[3]
; CHECK-SD-NEXT: mov w21, v3.s[3]
; CHECK-SD-NEXT: mov w22, v1.s[3]
; CHECK-SD-NEXT: sdiv w13, w12, w11
; CHECK-SD-NEXT: msub w8, w10, w8, w9
; CHECK-SD-NEXT: sdiv w6, w5, w4
; CHECK-SD-NEXT: msub w9, w13, w11, w12
; CHECK-SD-NEXT: fmov s0, w9
; CHECK-SD-NEXT: mov v0.s[1], w8
; CHECK-SD-NEXT: sdiv w3, w2, w1
; CHECK-SD-NEXT: msub w10, w6, w4, w5
; CHECK-SD-NEXT: fmov s1, w10
; CHECK-SD-NEXT: sdiv w16, w15, w14
; CHECK-SD-NEXT: msub w11, w3, w1, w2
; CHECK-SD-NEXT: mov v1.s[1], w11
; CHECK-SD-NEXT: sdiv w20, w19, w7
; CHECK-SD-NEXT: msub w9, w16, w14, w15
; CHECK-SD-NEXT: mov v0.s[2], w9
; CHECK-SD-NEXT: sdiv w0, w18, w17
; CHECK-SD-NEXT: msub w8, w20, w7, w19
; CHECK-SD-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v1.s[2], w8
; CHECK-SD-NEXT: sdiv w12, w22, w21
; CHECK-SD-NEXT: msub w10, w0, w17, w18
; CHECK-SD-NEXT: mov v0.s[3], w10
; CHECK-SD-NEXT: msub w8, w12, w21, w22
; CHECK-SD-NEXT: mov v1.s[3], w8
; CHECK-SD-NEXT: ldp x22, x21, [sp], #32 // 16-byte Folded Reload
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sv8i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: fmov w9, s2
; CHECK-GI-NEXT: mov w10, v2.s[1]
; CHECK-GI-NEXT: mov w11, v2.s[2]
; CHECK-GI-NEXT: mov w12, v2.s[3]
; CHECK-GI-NEXT: fmov w13, s3
; CHECK-GI-NEXT: mov w14, v3.s[1]
; CHECK-GI-NEXT: mov w15, v3.s[2]
; CHECK-GI-NEXT: sdiv w8, w8, w9
; CHECK-GI-NEXT: mov w9, v0.s[1]
; CHECK-GI-NEXT: sdiv w9, w9, w10
; CHECK-GI-NEXT: mov w10, v0.s[2]
; CHECK-GI-NEXT: mov v4.s[0], w8
; CHECK-GI-NEXT: mov w8, v1.s[3]
; CHECK-GI-NEXT: sdiv w10, w10, w11
; CHECK-GI-NEXT: mov w11, v0.s[3]
; CHECK-GI-NEXT: mov v4.s[1], w9
; CHECK-GI-NEXT: sdiv w11, w11, w12
; CHECK-GI-NEXT: fmov w12, s1
; CHECK-GI-NEXT: mov v4.s[2], w10
; CHECK-GI-NEXT: sdiv w12, w12, w13
; CHECK-GI-NEXT: mov w13, v1.s[1]
; CHECK-GI-NEXT: mov v4.s[3], w11
; CHECK-GI-NEXT: mls v0.4s, v4.4s, v2.4s
; CHECK-GI-NEXT: sdiv w13, w13, w14
; CHECK-GI-NEXT: mov w14, v1.s[2]
; CHECK-GI-NEXT: mov v5.s[0], w12
; CHECK-GI-NEXT: mov w12, v3.s[3]
; CHECK-GI-NEXT: sdiv w14, w14, w15
; CHECK-GI-NEXT: mov v5.s[1], w13
; CHECK-GI-NEXT: sdiv w8, w8, w12
; CHECK-GI-NEXT: mov v5.s[2], w14
; CHECK-GI-NEXT: mov v5.s[3], w8
; CHECK-GI-NEXT: mls v1.4s, v5.4s, v3.4s
; CHECK-GI-NEXT: ret
entry:
%s = srem <8 x i32> %d, %e
ret <8 x i32> %s
}
define <2 x i32> @uv2i32(<2 x i32> %d, <2 x i32> %e) {
; CHECK-SD-LABEL: uv2i32:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: fmov w8, s1
; CHECK-SD-NEXT: fmov w9, s0
; CHECK-SD-NEXT: mov w11, v1.s[1]
; CHECK-SD-NEXT: mov w12, v0.s[1]
; CHECK-SD-NEXT: udiv w10, w9, w8
; CHECK-SD-NEXT: udiv w13, w12, w11
; CHECK-SD-NEXT: msub w8, w10, w8, w9
; CHECK-SD-NEXT: fmov s0, w8
; CHECK-SD-NEXT: msub w9, w13, w11, w12
; CHECK-SD-NEXT: mov v0.s[1], w9
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv2i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: fmov w9, s1
; CHECK-GI-NEXT: mov w10, v1.s[1]
; CHECK-GI-NEXT: udiv w8, w8, w9
; CHECK-GI-NEXT: mov w9, v0.s[1]
; CHECK-GI-NEXT: udiv w9, w9, w10
; CHECK-GI-NEXT: mov v2.s[0], w8
; CHECK-GI-NEXT: mov v2.s[1], w9
; CHECK-GI-NEXT: mls v0.2s, v2.2s, v1.2s
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
entry:
%s = urem <2 x i32> %d, %e
ret <2 x i32> %s
}
define <3 x i32> @uv3i32(<3 x i32> %d, <3 x i32> %e) {
; CHECK-SD-LABEL: uv3i32:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fmov w11, s1
; CHECK-SD-NEXT: fmov w12, s0
; CHECK-SD-NEXT: mov w8, v1.s[1]
; CHECK-SD-NEXT: mov w9, v0.s[1]
; CHECK-SD-NEXT: mov w14, v1.s[2]
; CHECK-SD-NEXT: mov w15, v0.s[2]
; CHECK-SD-NEXT: udiv w13, w12, w11
; CHECK-SD-NEXT: udiv w10, w9, w8
; CHECK-SD-NEXT: msub w11, w13, w11, w12
; CHECK-SD-NEXT: fmov s0, w11
; CHECK-SD-NEXT: udiv w16, w15, w14
; CHECK-SD-NEXT: msub w8, w10, w8, w9
; CHECK-SD-NEXT: mov v0.s[1], w8
; CHECK-SD-NEXT: msub w8, w16, w14, w15
; CHECK-SD-NEXT: mov v0.s[2], w8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv3i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: fmov w9, s1
; CHECK-GI-NEXT: mov s2, v0.s[1]
; CHECK-GI-NEXT: mov s3, v1.s[1]
; CHECK-GI-NEXT: mov s0, v0.s[2]
; CHECK-GI-NEXT: mov s1, v1.s[2]
; CHECK-GI-NEXT: udiv w10, w8, w9
; CHECK-GI-NEXT: fmov w11, s2
; CHECK-GI-NEXT: fmov w12, s3
; CHECK-GI-NEXT: fmov w14, s0
; CHECK-GI-NEXT: fmov w15, s1
; CHECK-GI-NEXT: udiv w13, w11, w12
; CHECK-GI-NEXT: msub w8, w10, w9, w8
; CHECK-GI-NEXT: mov v0.s[0], w8
; CHECK-GI-NEXT: udiv w9, w14, w15
; CHECK-GI-NEXT: msub w8, w13, w12, w11
; CHECK-GI-NEXT: mov v0.s[1], w8
; CHECK-GI-NEXT: msub w8, w9, w15, w14
; CHECK-GI-NEXT: mov v0.s[2], w8
; CHECK-GI-NEXT: ret
entry:
%s = urem <3 x i32> %d, %e
ret <3 x i32> %s
}
define <4 x i32> @uv4i32(<4 x i32> %d, <4 x i32> %e) {
; CHECK-SD-LABEL: uv4i32:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fmov w11, s1
; CHECK-SD-NEXT: fmov w12, s0
; CHECK-SD-NEXT: mov w8, v1.s[1]
; CHECK-SD-NEXT: mov w9, v0.s[1]
; CHECK-SD-NEXT: mov w14, v1.s[2]
; CHECK-SD-NEXT: mov w15, v0.s[2]
; CHECK-SD-NEXT: mov w17, v1.s[3]
; CHECK-SD-NEXT: mov w18, v0.s[3]
; CHECK-SD-NEXT: udiv w13, w12, w11
; CHECK-SD-NEXT: udiv w10, w9, w8
; CHECK-SD-NEXT: msub w11, w13, w11, w12
; CHECK-SD-NEXT: fmov s0, w11
; CHECK-SD-NEXT: udiv w16, w15, w14
; CHECK-SD-NEXT: msub w8, w10, w8, w9
; CHECK-SD-NEXT: mov v0.s[1], w8
; CHECK-SD-NEXT: udiv w9, w18, w17
; CHECK-SD-NEXT: msub w8, w16, w14, w15
; CHECK-SD-NEXT: mov v0.s[2], w8
; CHECK-SD-NEXT: msub w8, w9, w17, w18
; CHECK-SD-NEXT: mov v0.s[3], w8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv4i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: fmov w9, s1
; CHECK-GI-NEXT: mov w10, v1.s[1]
; CHECK-GI-NEXT: mov w11, v1.s[2]
; CHECK-GI-NEXT: mov w12, v1.s[3]
; CHECK-GI-NEXT: udiv w8, w8, w9
; CHECK-GI-NEXT: mov w9, v0.s[1]
; CHECK-GI-NEXT: udiv w9, w9, w10
; CHECK-GI-NEXT: mov w10, v0.s[2]
; CHECK-GI-NEXT: mov v2.s[0], w8
; CHECK-GI-NEXT: udiv w10, w10, w11
; CHECK-GI-NEXT: mov w11, v0.s[3]
; CHECK-GI-NEXT: mov v2.s[1], w9
; CHECK-GI-NEXT: udiv w8, w11, w12
; CHECK-GI-NEXT: mov v2.s[2], w10
; CHECK-GI-NEXT: mov v2.s[3], w8
; CHECK-GI-NEXT: mls v0.4s, v2.4s, v1.4s
; CHECK-GI-NEXT: ret
entry:
%s = urem <4 x i32> %d, %e
ret <4 x i32> %s
}
define <8 x i32> @uv8i32(<8 x i32> %d, <8 x i32> %e) {
; CHECK-SD-LABEL: uv8i32:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: stp x22, x21, [sp, #-32]! // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
; CHECK-SD-NEXT: .cfi_offset w19, -8
; CHECK-SD-NEXT: .cfi_offset w20, -16
; CHECK-SD-NEXT: .cfi_offset w21, -24
; CHECK-SD-NEXT: .cfi_offset w22, -32
; CHECK-SD-NEXT: mov w8, v2.s[1]
; CHECK-SD-NEXT: mov w9, v0.s[1]
; CHECK-SD-NEXT: fmov w11, s2
; CHECK-SD-NEXT: fmov w12, s0
; CHECK-SD-NEXT: fmov w4, s3
; CHECK-SD-NEXT: fmov w5, s1
; CHECK-SD-NEXT: mov w1, v3.s[1]
; CHECK-SD-NEXT: mov w2, v1.s[1]
; CHECK-SD-NEXT: mov w14, v2.s[2]
; CHECK-SD-NEXT: mov w15, v0.s[2]
; CHECK-SD-NEXT: mov w7, v3.s[2]
; CHECK-SD-NEXT: mov w19, v1.s[2]
; CHECK-SD-NEXT: udiv w10, w9, w8
; CHECK-SD-NEXT: mov w17, v2.s[3]
; CHECK-SD-NEXT: mov w18, v0.s[3]
; CHECK-SD-NEXT: mov w21, v3.s[3]
; CHECK-SD-NEXT: mov w22, v1.s[3]
; CHECK-SD-NEXT: udiv w13, w12, w11
; CHECK-SD-NEXT: msub w8, w10, w8, w9
; CHECK-SD-NEXT: udiv w6, w5, w4
; CHECK-SD-NEXT: msub w9, w13, w11, w12
; CHECK-SD-NEXT: fmov s0, w9
; CHECK-SD-NEXT: mov v0.s[1], w8
; CHECK-SD-NEXT: udiv w3, w2, w1
; CHECK-SD-NEXT: msub w10, w6, w4, w5
; CHECK-SD-NEXT: fmov s1, w10
; CHECK-SD-NEXT: udiv w16, w15, w14
; CHECK-SD-NEXT: msub w11, w3, w1, w2
; CHECK-SD-NEXT: mov v1.s[1], w11
; CHECK-SD-NEXT: udiv w20, w19, w7
; CHECK-SD-NEXT: msub w9, w16, w14, w15
; CHECK-SD-NEXT: mov v0.s[2], w9
; CHECK-SD-NEXT: udiv w0, w18, w17
; CHECK-SD-NEXT: msub w8, w20, w7, w19
; CHECK-SD-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v1.s[2], w8
; CHECK-SD-NEXT: udiv w12, w22, w21
; CHECK-SD-NEXT: msub w10, w0, w17, w18
; CHECK-SD-NEXT: mov v0.s[3], w10
; CHECK-SD-NEXT: msub w8, w12, w21, w22
; CHECK-SD-NEXT: mov v1.s[3], w8
; CHECK-SD-NEXT: ldp x22, x21, [sp], #32 // 16-byte Folded Reload
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv8i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: fmov w9, s2
; CHECK-GI-NEXT: mov w10, v2.s[1]
; CHECK-GI-NEXT: mov w11, v2.s[2]
; CHECK-GI-NEXT: mov w12, v2.s[3]
; CHECK-GI-NEXT: fmov w13, s3
; CHECK-GI-NEXT: mov w14, v3.s[1]
; CHECK-GI-NEXT: mov w15, v3.s[2]
; CHECK-GI-NEXT: udiv w8, w8, w9
; CHECK-GI-NEXT: mov w9, v0.s[1]
; CHECK-GI-NEXT: udiv w9, w9, w10
; CHECK-GI-NEXT: mov w10, v0.s[2]
; CHECK-GI-NEXT: mov v4.s[0], w8
; CHECK-GI-NEXT: mov w8, v1.s[3]
; CHECK-GI-NEXT: udiv w10, w10, w11
; CHECK-GI-NEXT: mov w11, v0.s[3]
; CHECK-GI-NEXT: mov v4.s[1], w9
; CHECK-GI-NEXT: udiv w11, w11, w12
; CHECK-GI-NEXT: fmov w12, s1
; CHECK-GI-NEXT: mov v4.s[2], w10
; CHECK-GI-NEXT: udiv w12, w12, w13
; CHECK-GI-NEXT: mov w13, v1.s[1]
; CHECK-GI-NEXT: mov v4.s[3], w11
; CHECK-GI-NEXT: mls v0.4s, v4.4s, v2.4s
; CHECK-GI-NEXT: udiv w13, w13, w14
; CHECK-GI-NEXT: mov w14, v1.s[2]
; CHECK-GI-NEXT: mov v5.s[0], w12
; CHECK-GI-NEXT: mov w12, v3.s[3]
; CHECK-GI-NEXT: udiv w14, w14, w15
; CHECK-GI-NEXT: mov v5.s[1], w13
; CHECK-GI-NEXT: udiv w8, w8, w12
; CHECK-GI-NEXT: mov v5.s[2], w14
; CHECK-GI-NEXT: mov v5.s[3], w8
; CHECK-GI-NEXT: mls v1.4s, v5.4s, v3.4s
; CHECK-GI-NEXT: ret
entry:
%s = urem <8 x i32> %d, %e
ret <8 x i32> %s
}
define <2 x i64> @sv2i64(<2 x i64> %d, <2 x i64> %e) {
; CHECK-SD-LABEL: sv2i64:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fmov x8, d1
; CHECK-SD-NEXT: fmov x9, d0
; CHECK-SD-NEXT: mov x11, v1.d[1]
; CHECK-SD-NEXT: mov x12, v0.d[1]
; CHECK-SD-NEXT: sdiv x10, x9, x8
; CHECK-SD-NEXT: sdiv x13, x12, x11
; CHECK-SD-NEXT: msub x8, x10, x8, x9
; CHECK-SD-NEXT: fmov d0, x8
; CHECK-SD-NEXT: msub x9, x13, x11, x12
; CHECK-SD-NEXT: mov v0.d[1], x9
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sv2i64:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov x9, d1
; CHECK-GI-NEXT: mov x10, v1.d[1]
; CHECK-GI-NEXT: mov x11, v0.d[1]
; CHECK-GI-NEXT: sdiv x8, x8, x9
; CHECK-GI-NEXT: sdiv x11, x11, x10
; CHECK-GI-NEXT: mov v1.d[0], x8
; CHECK-GI-NEXT: mov v1.d[1], x11
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov x11, v1.d[1]
; CHECK-GI-NEXT: mul x8, x8, x9
; CHECK-GI-NEXT: mul x9, x11, x10
; CHECK-GI-NEXT: mov v1.d[0], x8
; CHECK-GI-NEXT: mov v1.d[1], x9
; CHECK-GI-NEXT: sub v0.2d, v0.2d, v1.2d
; CHECK-GI-NEXT: ret
entry:
%s = srem <2 x i64> %d, %e
ret <2 x i64> %s
}
define <3 x i64> @sv3i64(<3 x i64> %d, <3 x i64> %e) {
; CHECK-SD-LABEL: sv3i64:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d5 killed $d5 def $q5
; CHECK-SD-NEXT: // kill: def $d4 killed $d4 def $q4
; CHECK-SD-NEXT: // kill: def $d3 killed $d3 def $q3
; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: fmov x8, d3
; CHECK-SD-NEXT: fmov x9, d0
; CHECK-SD-NEXT: fmov x11, d4
; CHECK-SD-NEXT: fmov x12, d1
; CHECK-SD-NEXT: fmov x14, d5
; CHECK-SD-NEXT: fmov x15, d2
; CHECK-SD-NEXT: sdiv x10, x9, x8
; CHECK-SD-NEXT: sdiv x13, x12, x11
; CHECK-SD-NEXT: msub x8, x10, x8, x9
; CHECK-SD-NEXT: fmov d0, x8
; CHECK-SD-NEXT: sdiv x16, x15, x14
; CHECK-SD-NEXT: msub x9, x13, x11, x12
; CHECK-SD-NEXT: fmov d1, x9
; CHECK-SD-NEXT: msub x10, x16, x14, x15
; CHECK-SD-NEXT: fmov d2, x10
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sv3i64:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov x9, d3
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: // kill: def $d4 killed $d4 def $q4
; CHECK-GI-NEXT: fmov x10, d4
; CHECK-GI-NEXT: mov v3.d[1], v4.d[0]
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: sdiv x8, x8, x9
; CHECK-GI-NEXT: fmov x9, d1
; CHECK-GI-NEXT: fmov x11, d3
; CHECK-GI-NEXT: mov x14, v3.d[1]
; CHECK-GI-NEXT: sdiv x9, x9, x10
; CHECK-GI-NEXT: mov v6.d[0], x8
; CHECK-GI-NEXT: fmov x8, d2
; CHECK-GI-NEXT: mov v6.d[1], x9
; CHECK-GI-NEXT: fmov x9, d5
; CHECK-GI-NEXT: sdiv x12, x8, x9
; CHECK-GI-NEXT: fmov x10, d6
; CHECK-GI-NEXT: mov x13, v6.d[1]
; CHECK-GI-NEXT: mul x10, x10, x11
; CHECK-GI-NEXT: mul x11, x13, x14
; CHECK-GI-NEXT: mov v2.d[0], x10
; CHECK-GI-NEXT: mov v2.d[1], x11
; CHECK-GI-NEXT: msub x8, x12, x9, x8
; CHECK-GI-NEXT: sub v0.2d, v0.2d, v2.2d
; CHECK-GI-NEXT: mov d1, v0.d[1]
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: fmov d2, x8
; CHECK-GI-NEXT: ret
entry:
%s = srem <3 x i64> %d, %e
ret <3 x i64> %s
}
define <4 x i64> @sv4i64(<4 x i64> %d, <4 x i64> %e) {
; CHECK-SD-LABEL: sv4i64:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: mov x8, v2.d[1]
; CHECK-SD-NEXT: mov x9, v0.d[1]
; CHECK-SD-NEXT: fmov x11, d2
; CHECK-SD-NEXT: fmov x12, d0
; CHECK-SD-NEXT: fmov x14, d3
; CHECK-SD-NEXT: fmov x15, d1
; CHECK-SD-NEXT: mov x17, v3.d[1]
; CHECK-SD-NEXT: mov x18, v1.d[1]
; CHECK-SD-NEXT: sdiv x10, x9, x8
; CHECK-SD-NEXT: sdiv x13, x12, x11
; CHECK-SD-NEXT: msub x8, x10, x8, x9
; CHECK-SD-NEXT: sdiv x16, x15, x14
; CHECK-SD-NEXT: msub x9, x13, x11, x12
; CHECK-SD-NEXT: fmov d0, x9
; CHECK-SD-NEXT: mov v0.d[1], x8
; CHECK-SD-NEXT: sdiv x0, x18, x17
; CHECK-SD-NEXT: msub x10, x16, x14, x15
; CHECK-SD-NEXT: fmov d1, x10
; CHECK-SD-NEXT: msub x11, x0, x17, x18
; CHECK-SD-NEXT: mov v1.d[1], x11
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sv4i64:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov x9, d2
; CHECK-GI-NEXT: mov x10, v2.d[1]
; CHECK-GI-NEXT: mov x11, v0.d[1]
; CHECK-GI-NEXT: fmov x12, d1
; CHECK-GI-NEXT: fmov x13, d3
; CHECK-GI-NEXT: mov x14, v3.d[1]
; CHECK-GI-NEXT: mov x15, v1.d[1]
; CHECK-GI-NEXT: sdiv x8, x8, x9
; CHECK-GI-NEXT: sdiv x12, x12, x13
; CHECK-GI-NEXT: mov v2.d[0], x8
; CHECK-GI-NEXT: sdiv x11, x11, x10
; CHECK-GI-NEXT: mov v3.d[0], x12
; CHECK-GI-NEXT: sdiv x15, x15, x14
; CHECK-GI-NEXT: mov v2.d[1], x11
; CHECK-GI-NEXT: fmov x8, d2
; CHECK-GI-NEXT: mov x11, v2.d[1]
; CHECK-GI-NEXT: mul x8, x8, x9
; CHECK-GI-NEXT: mul x10, x11, x10
; CHECK-GI-NEXT: mov v2.d[0], x8
; CHECK-GI-NEXT: mov v3.d[1], x15
; CHECK-GI-NEXT: mov v2.d[1], x10
; CHECK-GI-NEXT: fmov x9, d3
; CHECK-GI-NEXT: mov x12, v3.d[1]
; CHECK-GI-NEXT: sub v0.2d, v0.2d, v2.2d
; CHECK-GI-NEXT: mul x9, x9, x13
; CHECK-GI-NEXT: mul x11, x12, x14
; CHECK-GI-NEXT: mov v3.d[0], x9
; CHECK-GI-NEXT: mov v3.d[1], x11
; CHECK-GI-NEXT: sub v1.2d, v1.2d, v3.2d
; CHECK-GI-NEXT: ret
entry:
%s = srem <4 x i64> %d, %e
ret <4 x i64> %s
}
define <2 x i64> @uv2i64(<2 x i64> %d, <2 x i64> %e) {
; CHECK-SD-LABEL: uv2i64:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fmov x8, d1
; CHECK-SD-NEXT: fmov x9, d0
; CHECK-SD-NEXT: mov x11, v1.d[1]
; CHECK-SD-NEXT: mov x12, v0.d[1]
; CHECK-SD-NEXT: udiv x10, x9, x8
; CHECK-SD-NEXT: udiv x13, x12, x11
; CHECK-SD-NEXT: msub x8, x10, x8, x9
; CHECK-SD-NEXT: fmov d0, x8
; CHECK-SD-NEXT: msub x9, x13, x11, x12
; CHECK-SD-NEXT: mov v0.d[1], x9
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv2i64:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov x9, d1
; CHECK-GI-NEXT: mov x10, v1.d[1]
; CHECK-GI-NEXT: mov x11, v0.d[1]
; CHECK-GI-NEXT: udiv x8, x8, x9
; CHECK-GI-NEXT: udiv x11, x11, x10
; CHECK-GI-NEXT: mov v1.d[0], x8
; CHECK-GI-NEXT: mov v1.d[1], x11
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov x11, v1.d[1]
; CHECK-GI-NEXT: mul x8, x8, x9
; CHECK-GI-NEXT: mul x9, x11, x10
; CHECK-GI-NEXT: mov v1.d[0], x8
; CHECK-GI-NEXT: mov v1.d[1], x9
; CHECK-GI-NEXT: sub v0.2d, v0.2d, v1.2d
; CHECK-GI-NEXT: ret
entry:
%s = urem <2 x i64> %d, %e
ret <2 x i64> %s
}
define <3 x i64> @uv3i64(<3 x i64> %d, <3 x i64> %e) {
; CHECK-SD-LABEL: uv3i64:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d5 killed $d5 def $q5
; CHECK-SD-NEXT: // kill: def $d4 killed $d4 def $q4
; CHECK-SD-NEXT: // kill: def $d3 killed $d3 def $q3
; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: fmov x8, d3
; CHECK-SD-NEXT: fmov x9, d0
; CHECK-SD-NEXT: fmov x11, d4
; CHECK-SD-NEXT: fmov x12, d1
; CHECK-SD-NEXT: fmov x14, d5
; CHECK-SD-NEXT: fmov x15, d2
; CHECK-SD-NEXT: udiv x10, x9, x8
; CHECK-SD-NEXT: udiv x13, x12, x11
; CHECK-SD-NEXT: msub x8, x10, x8, x9
; CHECK-SD-NEXT: fmov d0, x8
; CHECK-SD-NEXT: udiv x16, x15, x14
; CHECK-SD-NEXT: msub x9, x13, x11, x12
; CHECK-SD-NEXT: fmov d1, x9
; CHECK-SD-NEXT: msub x10, x16, x14, x15
; CHECK-SD-NEXT: fmov d2, x10
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv3i64:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov x9, d3
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: // kill: def $d4 killed $d4 def $q4
; CHECK-GI-NEXT: fmov x10, d4
; CHECK-GI-NEXT: mov v3.d[1], v4.d[0]
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: udiv x8, x8, x9
; CHECK-GI-NEXT: fmov x9, d1
; CHECK-GI-NEXT: fmov x11, d3
; CHECK-GI-NEXT: mov x14, v3.d[1]
; CHECK-GI-NEXT: udiv x9, x9, x10
; CHECK-GI-NEXT: mov v6.d[0], x8
; CHECK-GI-NEXT: fmov x8, d2
; CHECK-GI-NEXT: mov v6.d[1], x9
; CHECK-GI-NEXT: fmov x9, d5
; CHECK-GI-NEXT: udiv x12, x8, x9
; CHECK-GI-NEXT: fmov x10, d6
; CHECK-GI-NEXT: mov x13, v6.d[1]
; CHECK-GI-NEXT: mul x10, x10, x11
; CHECK-GI-NEXT: mul x11, x13, x14
; CHECK-GI-NEXT: mov v2.d[0], x10
; CHECK-GI-NEXT: mov v2.d[1], x11
; CHECK-GI-NEXT: msub x8, x12, x9, x8
; CHECK-GI-NEXT: sub v0.2d, v0.2d, v2.2d
; CHECK-GI-NEXT: mov d1, v0.d[1]
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: fmov d2, x8
; CHECK-GI-NEXT: ret
entry:
%s = urem <3 x i64> %d, %e
ret <3 x i64> %s
}
define <4 x i64> @uv4i64(<4 x i64> %d, <4 x i64> %e) {
; CHECK-SD-LABEL: uv4i64:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: mov x8, v2.d[1]
; CHECK-SD-NEXT: mov x9, v0.d[1]
; CHECK-SD-NEXT: fmov x11, d2
; CHECK-SD-NEXT: fmov x12, d0
; CHECK-SD-NEXT: fmov x14, d3
; CHECK-SD-NEXT: fmov x15, d1
; CHECK-SD-NEXT: mov x17, v3.d[1]
; CHECK-SD-NEXT: mov x18, v1.d[1]
; CHECK-SD-NEXT: udiv x10, x9, x8
; CHECK-SD-NEXT: udiv x13, x12, x11
; CHECK-SD-NEXT: msub x8, x10, x8, x9
; CHECK-SD-NEXT: udiv x16, x15, x14
; CHECK-SD-NEXT: msub x9, x13, x11, x12
; CHECK-SD-NEXT: fmov d0, x9
; CHECK-SD-NEXT: mov v0.d[1], x8
; CHECK-SD-NEXT: udiv x0, x18, x17
; CHECK-SD-NEXT: msub x10, x16, x14, x15
; CHECK-SD-NEXT: fmov d1, x10
; CHECK-SD-NEXT: msub x11, x0, x17, x18
; CHECK-SD-NEXT: mov v1.d[1], x11
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv4i64:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov x9, d2
; CHECK-GI-NEXT: mov x10, v2.d[1]
; CHECK-GI-NEXT: mov x11, v0.d[1]
; CHECK-GI-NEXT: fmov x12, d1
; CHECK-GI-NEXT: fmov x13, d3
; CHECK-GI-NEXT: mov x14, v3.d[1]
; CHECK-GI-NEXT: mov x15, v1.d[1]
; CHECK-GI-NEXT: udiv x8, x8, x9
; CHECK-GI-NEXT: udiv x12, x12, x13
; CHECK-GI-NEXT: mov v2.d[0], x8
; CHECK-GI-NEXT: udiv x11, x11, x10
; CHECK-GI-NEXT: mov v3.d[0], x12
; CHECK-GI-NEXT: udiv x15, x15, x14
; CHECK-GI-NEXT: mov v2.d[1], x11
; CHECK-GI-NEXT: fmov x8, d2
; CHECK-GI-NEXT: mov x11, v2.d[1]
; CHECK-GI-NEXT: mul x8, x8, x9
; CHECK-GI-NEXT: mul x10, x11, x10
; CHECK-GI-NEXT: mov v2.d[0], x8
; CHECK-GI-NEXT: mov v3.d[1], x15
; CHECK-GI-NEXT: mov v2.d[1], x10
; CHECK-GI-NEXT: fmov x9, d3
; CHECK-GI-NEXT: mov x12, v3.d[1]
; CHECK-GI-NEXT: sub v0.2d, v0.2d, v2.2d
; CHECK-GI-NEXT: mul x9, x9, x13
; CHECK-GI-NEXT: mul x11, x12, x14
; CHECK-GI-NEXT: mov v3.d[0], x9
; CHECK-GI-NEXT: mov v3.d[1], x11
; CHECK-GI-NEXT: sub v1.2d, v1.2d, v3.2d
; CHECK-GI-NEXT: ret
entry:
%s = urem <4 x i64> %d, %e
ret <4 x i64> %s
}
define <2 x i128> @sv2i128(<2 x i128> %d, <2 x i128> %e) {
; CHECK-SD-LABEL: sv2i128:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: str x30, [sp, #-64]! // 8-byte Folded Spill
; CHECK-SD-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
; CHECK-SD-NEXT: .cfi_offset w19, -8
; CHECK-SD-NEXT: .cfi_offset w20, -16
; CHECK-SD-NEXT: .cfi_offset w21, -24
; CHECK-SD-NEXT: .cfi_offset w22, -32
; CHECK-SD-NEXT: .cfi_offset w23, -40
; CHECK-SD-NEXT: .cfi_offset w24, -48
; CHECK-SD-NEXT: .cfi_offset w30, -64
; CHECK-SD-NEXT: mov x21, x3
; CHECK-SD-NEXT: mov x22, x2
; CHECK-SD-NEXT: mov x2, x4
; CHECK-SD-NEXT: mov x3, x5
; CHECK-SD-NEXT: mov x19, x7
; CHECK-SD-NEXT: mov x20, x6
; CHECK-SD-NEXT: bl __modti3
; CHECK-SD-NEXT: mov x23, x0
; CHECK-SD-NEXT: mov x24, x1
; CHECK-SD-NEXT: mov x0, x22
; CHECK-SD-NEXT: mov x1, x21
; CHECK-SD-NEXT: mov x2, x20
; CHECK-SD-NEXT: mov x3, x19
; CHECK-SD-NEXT: bl __modti3
; CHECK-SD-NEXT: mov x2, x0
; CHECK-SD-NEXT: mov x3, x1
; CHECK-SD-NEXT: mov x0, x23
; CHECK-SD-NEXT: mov x1, x24
; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldr x30, [sp], #64 // 8-byte Folded Reload
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sv2i128:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: str x30, [sp, #-64]! // 8-byte Folded Spill
; CHECK-GI-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; CHECK-GI-NEXT: .cfi_def_cfa_offset 64
; CHECK-GI-NEXT: .cfi_offset w19, -8
; CHECK-GI-NEXT: .cfi_offset w20, -16
; CHECK-GI-NEXT: .cfi_offset w21, -24
; CHECK-GI-NEXT: .cfi_offset w22, -32
; CHECK-GI-NEXT: .cfi_offset w23, -40
; CHECK-GI-NEXT: .cfi_offset w24, -48
; CHECK-GI-NEXT: .cfi_offset w30, -64
; CHECK-GI-NEXT: mov x19, x2
; CHECK-GI-NEXT: mov x20, x3
; CHECK-GI-NEXT: mov x2, x4
; CHECK-GI-NEXT: mov x3, x5
; CHECK-GI-NEXT: mov x21, x6
; CHECK-GI-NEXT: mov x22, x7
; CHECK-GI-NEXT: bl __modti3
; CHECK-GI-NEXT: mov x23, x0
; CHECK-GI-NEXT: mov x24, x1
; CHECK-GI-NEXT: mov x0, x19
; CHECK-GI-NEXT: mov x1, x20
; CHECK-GI-NEXT: mov x2, x21
; CHECK-GI-NEXT: mov x3, x22
; CHECK-GI-NEXT: bl __modti3
; CHECK-GI-NEXT: mov x2, x0
; CHECK-GI-NEXT: mov x3, x1
; CHECK-GI-NEXT: mov x0, x23
; CHECK-GI-NEXT: mov x1, x24
; CHECK-GI-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldr x30, [sp], #64 // 8-byte Folded Reload
; CHECK-GI-NEXT: ret
entry:
%s = srem <2 x i128> %d, %e
ret <2 x i128> %s
}
define <3 x i128> @sv3i128(<3 x i128> %d, <3 x i128> %e) {
; CHECK-SD-LABEL: sv3i128:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: str x30, [sp, #-96]! // 8-byte Folded Spill
; CHECK-SD-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-SD-NEXT: .cfi_def_cfa_offset 96
; CHECK-SD-NEXT: .cfi_offset w19, -8
; CHECK-SD-NEXT: .cfi_offset w20, -16
; CHECK-SD-NEXT: .cfi_offset w21, -24
; CHECK-SD-NEXT: .cfi_offset w22, -32
; CHECK-SD-NEXT: .cfi_offset w23, -40
; CHECK-SD-NEXT: .cfi_offset w24, -48
; CHECK-SD-NEXT: .cfi_offset w25, -56
; CHECK-SD-NEXT: .cfi_offset w26, -64
; CHECK-SD-NEXT: .cfi_offset w27, -72
; CHECK-SD-NEXT: .cfi_offset w28, -80
; CHECK-SD-NEXT: .cfi_offset w30, -96
; CHECK-SD-NEXT: ldp x23, x24, [sp, #112]
; CHECK-SD-NEXT: mov x21, x3
; CHECK-SD-NEXT: ldp x25, x26, [sp, #96]
; CHECK-SD-NEXT: mov x22, x2
; CHECK-SD-NEXT: mov x2, x6
; CHECK-SD-NEXT: mov x3, x7
; CHECK-SD-NEXT: mov x19, x5
; CHECK-SD-NEXT: mov x20, x4
; CHECK-SD-NEXT: bl __modti3
; CHECK-SD-NEXT: mov x27, x0
; CHECK-SD-NEXT: mov x28, x1
; CHECK-SD-NEXT: mov x0, x22
; CHECK-SD-NEXT: mov x1, x21
; CHECK-SD-NEXT: mov x2, x25
; CHECK-SD-NEXT: mov x3, x26
; CHECK-SD-NEXT: bl __modti3
; CHECK-SD-NEXT: mov x21, x0
; CHECK-SD-NEXT: mov x22, x1
; CHECK-SD-NEXT: mov x0, x20
; CHECK-SD-NEXT: mov x1, x19
; CHECK-SD-NEXT: mov x2, x23
; CHECK-SD-NEXT: mov x3, x24
; CHECK-SD-NEXT: bl __modti3
; CHECK-SD-NEXT: mov x4, x0
; CHECK-SD-NEXT: mov x5, x1
; CHECK-SD-NEXT: mov x0, x27
; CHECK-SD-NEXT: mov x1, x28
; CHECK-SD-NEXT: mov x2, x21
; CHECK-SD-NEXT: mov x3, x22
; CHECK-SD-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldr x30, [sp], #96 // 8-byte Folded Reload
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sv3i128:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: str x30, [sp, #-96]! // 8-byte Folded Spill
; CHECK-GI-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-GI-NEXT: .cfi_def_cfa_offset 96
; CHECK-GI-NEXT: .cfi_offset w19, -8
; CHECK-GI-NEXT: .cfi_offset w20, -16
; CHECK-GI-NEXT: .cfi_offset w21, -24
; CHECK-GI-NEXT: .cfi_offset w22, -32
; CHECK-GI-NEXT: .cfi_offset w23, -40
; CHECK-GI-NEXT: .cfi_offset w24, -48
; CHECK-GI-NEXT: .cfi_offset w25, -56
; CHECK-GI-NEXT: .cfi_offset w26, -64
; CHECK-GI-NEXT: .cfi_offset w27, -72
; CHECK-GI-NEXT: .cfi_offset w28, -80
; CHECK-GI-NEXT: .cfi_offset w30, -96
; CHECK-GI-NEXT: ldp x23, x24, [sp, #96]
; CHECK-GI-NEXT: mov x19, x2
; CHECK-GI-NEXT: ldp x25, x26, [sp, #112]
; CHECK-GI-NEXT: mov x20, x3
; CHECK-GI-NEXT: mov x2, x6
; CHECK-GI-NEXT: mov x3, x7
; CHECK-GI-NEXT: mov x21, x4
; CHECK-GI-NEXT: mov x22, x5
; CHECK-GI-NEXT: bl __modti3
; CHECK-GI-NEXT: mov x27, x0
; CHECK-GI-NEXT: mov x28, x1
; CHECK-GI-NEXT: mov x0, x19
; CHECK-GI-NEXT: mov x1, x20
; CHECK-GI-NEXT: mov x2, x23
; CHECK-GI-NEXT: mov x3, x24
; CHECK-GI-NEXT: bl __modti3
; CHECK-GI-NEXT: mov x19, x0
; CHECK-GI-NEXT: mov x20, x1
; CHECK-GI-NEXT: mov x0, x21
; CHECK-GI-NEXT: mov x1, x22
; CHECK-GI-NEXT: mov x2, x25
; CHECK-GI-NEXT: mov x3, x26
; CHECK-GI-NEXT: bl __modti3
; CHECK-GI-NEXT: mov x4, x0
; CHECK-GI-NEXT: mov x5, x1
; CHECK-GI-NEXT: mov x0, x27
; CHECK-GI-NEXT: mov x1, x28
; CHECK-GI-NEXT: mov x2, x19
; CHECK-GI-NEXT: mov x3, x20
; CHECK-GI-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldr x30, [sp], #96 // 8-byte Folded Reload
; CHECK-GI-NEXT: ret
entry:
%s = srem <3 x i128> %d, %e
ret <3 x i128> %s
}
define <4 x i128> @sv4i128(<4 x i128> %d, <4 x i128> %e) {
; CHECK-SD-LABEL: sv4i128:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: sub sp, sp, #128
; CHECK-SD-NEXT: stp x29, x30, [sp, #32] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x28, x27, [sp, #48] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x26, x25, [sp, #64] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x24, x23, [sp, #80] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x22, x21, [sp, #96] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill
; CHECK-SD-NEXT: .cfi_def_cfa_offset 128
; CHECK-SD-NEXT: .cfi_offset w19, -8
; CHECK-SD-NEXT: .cfi_offset w20, -16
; CHECK-SD-NEXT: .cfi_offset w21, -24
; CHECK-SD-NEXT: .cfi_offset w22, -32
; CHECK-SD-NEXT: .cfi_offset w23, -40
; CHECK-SD-NEXT: .cfi_offset w24, -48
; CHECK-SD-NEXT: .cfi_offset w25, -56
; CHECK-SD-NEXT: .cfi_offset w26, -64
; CHECK-SD-NEXT: .cfi_offset w27, -72
; CHECK-SD-NEXT: .cfi_offset w28, -80
; CHECK-SD-NEXT: .cfi_offset w30, -88
; CHECK-SD-NEXT: .cfi_offset w29, -96
; CHECK-SD-NEXT: mov x23, x3
; CHECK-SD-NEXT: mov x24, x2
; CHECK-SD-NEXT: stp x6, x7, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT: ldp x8, x26, [sp, #176]
; CHECK-SD-NEXT: mov x21, x5
; CHECK-SD-NEXT: ldp x2, x3, [sp, #128]
; CHECK-SD-NEXT: mov x22, x4
; CHECK-SD-NEXT: ldp x27, x28, [sp, #160]
; CHECK-SD-NEXT: ldp x29, x19, [sp, #144]
; CHECK-SD-NEXT: str x8, [sp, #8] // 8-byte Folded Spill
; CHECK-SD-NEXT: bl __modti3
; CHECK-SD-NEXT: mov x20, x0
; CHECK-SD-NEXT: mov x25, x1
; CHECK-SD-NEXT: mov x0, x24
; CHECK-SD-NEXT: mov x1, x23
; CHECK-SD-NEXT: mov x2, x29
; CHECK-SD-NEXT: mov x3, x19
; CHECK-SD-NEXT: bl __modti3
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x23, x1
; CHECK-SD-NEXT: mov x0, x22
; CHECK-SD-NEXT: mov x1, x21
; CHECK-SD-NEXT: mov x2, x27
; CHECK-SD-NEXT: mov x3, x28
; CHECK-SD-NEXT: bl __modti3
; CHECK-SD-NEXT: mov x21, x0
; CHECK-SD-NEXT: mov x22, x1
; CHECK-SD-NEXT: ldr x2, [sp, #8] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x0, x1, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x3, x26
; CHECK-SD-NEXT: bl __modti3
; CHECK-SD-NEXT: mov x6, x0
; CHECK-SD-NEXT: mov x7, x1
; CHECK-SD-NEXT: mov x0, x20
; CHECK-SD-NEXT: mov x1, x25
; CHECK-SD-NEXT: mov x2, x19
; CHECK-SD-NEXT: mov x3, x23
; CHECK-SD-NEXT: mov x4, x21
; CHECK-SD-NEXT: mov x5, x22
; CHECK-SD-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x24, x23, [sp, #80] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x26, x25, [sp, #64] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x28, x27, [sp, #48] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: add sp, sp, #128
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sv4i128:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sub sp, sp, #128
; CHECK-GI-NEXT: stp x29, x30, [sp, #32] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x28, x27, [sp, #48] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x26, x25, [sp, #64] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x24, x23, [sp, #80] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x22, x21, [sp, #96] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill
; CHECK-GI-NEXT: .cfi_def_cfa_offset 128
; CHECK-GI-NEXT: .cfi_offset w19, -8
; CHECK-GI-NEXT: .cfi_offset w20, -16
; CHECK-GI-NEXT: .cfi_offset w21, -24
; CHECK-GI-NEXT: .cfi_offset w22, -32
; CHECK-GI-NEXT: .cfi_offset w23, -40
; CHECK-GI-NEXT: .cfi_offset w24, -48
; CHECK-GI-NEXT: .cfi_offset w25, -56
; CHECK-GI-NEXT: .cfi_offset w26, -64
; CHECK-GI-NEXT: .cfi_offset w27, -72
; CHECK-GI-NEXT: .cfi_offset w28, -80
; CHECK-GI-NEXT: .cfi_offset w30, -88
; CHECK-GI-NEXT: .cfi_offset w29, -96
; CHECK-GI-NEXT: mov x19, x2
; CHECK-GI-NEXT: mov x20, x3
; CHECK-GI-NEXT: mov x21, x4
; CHECK-GI-NEXT: ldp x2, x3, [sp, #128]
; CHECK-GI-NEXT: mov x22, x5
; CHECK-GI-NEXT: ldp x9, x8, [sp, #176]
; CHECK-GI-NEXT: mov x23, x7
; CHECK-GI-NEXT: ldp x24, x25, [sp, #144]
; CHECK-GI-NEXT: ldp x26, x27, [sp, #160]
; CHECK-GI-NEXT: stp x9, x6, [sp, #16] // 16-byte Folded Spill
; CHECK-GI-NEXT: str x8, [sp, #8] // 8-byte Folded Spill
; CHECK-GI-NEXT: bl __modti3
; CHECK-GI-NEXT: mov x28, x0
; CHECK-GI-NEXT: mov x29, x1
; CHECK-GI-NEXT: mov x0, x19
; CHECK-GI-NEXT: mov x1, x20
; CHECK-GI-NEXT: mov x2, x24
; CHECK-GI-NEXT: mov x3, x25
; CHECK-GI-NEXT: bl __modti3
; CHECK-GI-NEXT: mov x19, x0
; CHECK-GI-NEXT: mov x20, x1
; CHECK-GI-NEXT: mov x0, x21
; CHECK-GI-NEXT: mov x1, x22
; CHECK-GI-NEXT: mov x2, x26
; CHECK-GI-NEXT: mov x3, x27
; CHECK-GI-NEXT: bl __modti3
; CHECK-GI-NEXT: mov x21, x0
; CHECK-GI-NEXT: ldp x2, x0, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldr x3, [sp, #8] // 8-byte Folded Reload
; CHECK-GI-NEXT: mov x22, x1
; CHECK-GI-NEXT: mov x1, x23
; CHECK-GI-NEXT: bl __modti3
; CHECK-GI-NEXT: mov x6, x0
; CHECK-GI-NEXT: mov x7, x1
; CHECK-GI-NEXT: mov x0, x28
; CHECK-GI-NEXT: mov x1, x29
; CHECK-GI-NEXT: mov x2, x19
; CHECK-GI-NEXT: mov x3, x20
; CHECK-GI-NEXT: mov x4, x21
; CHECK-GI-NEXT: mov x5, x22
; CHECK-GI-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp x24, x23, [sp, #80] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp x26, x25, [sp, #64] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp x28, x27, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: add sp, sp, #128
; CHECK-GI-NEXT: ret
entry:
%s = srem <4 x i128> %d, %e
ret <4 x i128> %s
}
define <2 x i128> @uv2i128(<2 x i128> %d, <2 x i128> %e) {
; CHECK-SD-LABEL: uv2i128:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: str x30, [sp, #-64]! // 8-byte Folded Spill
; CHECK-SD-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
; CHECK-SD-NEXT: .cfi_offset w19, -8
; CHECK-SD-NEXT: .cfi_offset w20, -16
; CHECK-SD-NEXT: .cfi_offset w21, -24
; CHECK-SD-NEXT: .cfi_offset w22, -32
; CHECK-SD-NEXT: .cfi_offset w23, -40
; CHECK-SD-NEXT: .cfi_offset w24, -48
; CHECK-SD-NEXT: .cfi_offset w30, -64
; CHECK-SD-NEXT: mov x21, x3
; CHECK-SD-NEXT: mov x22, x2
; CHECK-SD-NEXT: mov x2, x4
; CHECK-SD-NEXT: mov x3, x5
; CHECK-SD-NEXT: mov x19, x7
; CHECK-SD-NEXT: mov x20, x6
; CHECK-SD-NEXT: bl __umodti3
; CHECK-SD-NEXT: mov x23, x0
; CHECK-SD-NEXT: mov x24, x1
; CHECK-SD-NEXT: mov x0, x22
; CHECK-SD-NEXT: mov x1, x21
; CHECK-SD-NEXT: mov x2, x20
; CHECK-SD-NEXT: mov x3, x19
; CHECK-SD-NEXT: bl __umodti3
; CHECK-SD-NEXT: mov x2, x0
; CHECK-SD-NEXT: mov x3, x1
; CHECK-SD-NEXT: mov x0, x23
; CHECK-SD-NEXT: mov x1, x24
; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldr x30, [sp], #64 // 8-byte Folded Reload
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv2i128:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: str x30, [sp, #-64]! // 8-byte Folded Spill
; CHECK-GI-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; CHECK-GI-NEXT: .cfi_def_cfa_offset 64
; CHECK-GI-NEXT: .cfi_offset w19, -8
; CHECK-GI-NEXT: .cfi_offset w20, -16
; CHECK-GI-NEXT: .cfi_offset w21, -24
; CHECK-GI-NEXT: .cfi_offset w22, -32
; CHECK-GI-NEXT: .cfi_offset w23, -40
; CHECK-GI-NEXT: .cfi_offset w24, -48
; CHECK-GI-NEXT: .cfi_offset w30, -64
; CHECK-GI-NEXT: mov x19, x2
; CHECK-GI-NEXT: mov x20, x3
; CHECK-GI-NEXT: mov x2, x4
; CHECK-GI-NEXT: mov x3, x5
; CHECK-GI-NEXT: mov x21, x6
; CHECK-GI-NEXT: mov x22, x7
; CHECK-GI-NEXT: bl __umodti3
; CHECK-GI-NEXT: mov x23, x0
; CHECK-GI-NEXT: mov x24, x1
; CHECK-GI-NEXT: mov x0, x19
; CHECK-GI-NEXT: mov x1, x20
; CHECK-GI-NEXT: mov x2, x21
; CHECK-GI-NEXT: mov x3, x22
; CHECK-GI-NEXT: bl __umodti3
; CHECK-GI-NEXT: mov x2, x0
; CHECK-GI-NEXT: mov x3, x1
; CHECK-GI-NEXT: mov x0, x23
; CHECK-GI-NEXT: mov x1, x24
; CHECK-GI-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldr x30, [sp], #64 // 8-byte Folded Reload
; CHECK-GI-NEXT: ret
entry:
%s = urem <2 x i128> %d, %e
ret <2 x i128> %s
}
define <3 x i128> @uv3i128(<3 x i128> %d, <3 x i128> %e) {
; CHECK-SD-LABEL: uv3i128:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: str x30, [sp, #-96]! // 8-byte Folded Spill
; CHECK-SD-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-SD-NEXT: .cfi_def_cfa_offset 96
; CHECK-SD-NEXT: .cfi_offset w19, -8
; CHECK-SD-NEXT: .cfi_offset w20, -16
; CHECK-SD-NEXT: .cfi_offset w21, -24
; CHECK-SD-NEXT: .cfi_offset w22, -32
; CHECK-SD-NEXT: .cfi_offset w23, -40
; CHECK-SD-NEXT: .cfi_offset w24, -48
; CHECK-SD-NEXT: .cfi_offset w25, -56
; CHECK-SD-NEXT: .cfi_offset w26, -64
; CHECK-SD-NEXT: .cfi_offset w27, -72
; CHECK-SD-NEXT: .cfi_offset w28, -80
; CHECK-SD-NEXT: .cfi_offset w30, -96
; CHECK-SD-NEXT: ldp x23, x24, [sp, #112]
; CHECK-SD-NEXT: mov x21, x3
; CHECK-SD-NEXT: ldp x25, x26, [sp, #96]
; CHECK-SD-NEXT: mov x22, x2
; CHECK-SD-NEXT: mov x2, x6
; CHECK-SD-NEXT: mov x3, x7
; CHECK-SD-NEXT: mov x19, x5
; CHECK-SD-NEXT: mov x20, x4
; CHECK-SD-NEXT: bl __umodti3
; CHECK-SD-NEXT: mov x27, x0
; CHECK-SD-NEXT: mov x28, x1
; CHECK-SD-NEXT: mov x0, x22
; CHECK-SD-NEXT: mov x1, x21
; CHECK-SD-NEXT: mov x2, x25
; CHECK-SD-NEXT: mov x3, x26
; CHECK-SD-NEXT: bl __umodti3
; CHECK-SD-NEXT: mov x21, x0
; CHECK-SD-NEXT: mov x22, x1
; CHECK-SD-NEXT: mov x0, x20
; CHECK-SD-NEXT: mov x1, x19
; CHECK-SD-NEXT: mov x2, x23
; CHECK-SD-NEXT: mov x3, x24
; CHECK-SD-NEXT: bl __umodti3
; CHECK-SD-NEXT: mov x4, x0
; CHECK-SD-NEXT: mov x5, x1
; CHECK-SD-NEXT: mov x0, x27
; CHECK-SD-NEXT: mov x1, x28
; CHECK-SD-NEXT: mov x2, x21
; CHECK-SD-NEXT: mov x3, x22
; CHECK-SD-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldr x30, [sp], #96 // 8-byte Folded Reload
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv3i128:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: str x30, [sp, #-96]! // 8-byte Folded Spill
; CHECK-GI-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-GI-NEXT: .cfi_def_cfa_offset 96
; CHECK-GI-NEXT: .cfi_offset w19, -8
; CHECK-GI-NEXT: .cfi_offset w20, -16
; CHECK-GI-NEXT: .cfi_offset w21, -24
; CHECK-GI-NEXT: .cfi_offset w22, -32
; CHECK-GI-NEXT: .cfi_offset w23, -40
; CHECK-GI-NEXT: .cfi_offset w24, -48
; CHECK-GI-NEXT: .cfi_offset w25, -56
; CHECK-GI-NEXT: .cfi_offset w26, -64
; CHECK-GI-NEXT: .cfi_offset w27, -72
; CHECK-GI-NEXT: .cfi_offset w28, -80
; CHECK-GI-NEXT: .cfi_offset w30, -96
; CHECK-GI-NEXT: ldp x23, x24, [sp, #96]
; CHECK-GI-NEXT: mov x19, x2
; CHECK-GI-NEXT: ldp x25, x26, [sp, #112]
; CHECK-GI-NEXT: mov x20, x3
; CHECK-GI-NEXT: mov x2, x6
; CHECK-GI-NEXT: mov x3, x7
; CHECK-GI-NEXT: mov x21, x4
; CHECK-GI-NEXT: mov x22, x5
; CHECK-GI-NEXT: bl __umodti3
; CHECK-GI-NEXT: mov x27, x0
; CHECK-GI-NEXT: mov x28, x1
; CHECK-GI-NEXT: mov x0, x19
; CHECK-GI-NEXT: mov x1, x20
; CHECK-GI-NEXT: mov x2, x23
; CHECK-GI-NEXT: mov x3, x24
; CHECK-GI-NEXT: bl __umodti3
; CHECK-GI-NEXT: mov x19, x0
; CHECK-GI-NEXT: mov x20, x1
; CHECK-GI-NEXT: mov x0, x21
; CHECK-GI-NEXT: mov x1, x22
; CHECK-GI-NEXT: mov x2, x25
; CHECK-GI-NEXT: mov x3, x26
; CHECK-GI-NEXT: bl __umodti3
; CHECK-GI-NEXT: mov x4, x0
; CHECK-GI-NEXT: mov x5, x1
; CHECK-GI-NEXT: mov x0, x27
; CHECK-GI-NEXT: mov x1, x28
; CHECK-GI-NEXT: mov x2, x19
; CHECK-GI-NEXT: mov x3, x20
; CHECK-GI-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldr x30, [sp], #96 // 8-byte Folded Reload
; CHECK-GI-NEXT: ret
entry:
%s = urem <3 x i128> %d, %e
ret <3 x i128> %s
}
define <4 x i128> @uv4i128(<4 x i128> %d, <4 x i128> %e) {
; CHECK-SD-LABEL: uv4i128:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: sub sp, sp, #128
; CHECK-SD-NEXT: stp x29, x30, [sp, #32] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x28, x27, [sp, #48] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x26, x25, [sp, #64] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x24, x23, [sp, #80] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x22, x21, [sp, #96] // 16-byte Folded Spill
; CHECK-SD-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill
; CHECK-SD-NEXT: .cfi_def_cfa_offset 128
; CHECK-SD-NEXT: .cfi_offset w19, -8
; CHECK-SD-NEXT: .cfi_offset w20, -16
; CHECK-SD-NEXT: .cfi_offset w21, -24
; CHECK-SD-NEXT: .cfi_offset w22, -32
; CHECK-SD-NEXT: .cfi_offset w23, -40
; CHECK-SD-NEXT: .cfi_offset w24, -48
; CHECK-SD-NEXT: .cfi_offset w25, -56
; CHECK-SD-NEXT: .cfi_offset w26, -64
; CHECK-SD-NEXT: .cfi_offset w27, -72
; CHECK-SD-NEXT: .cfi_offset w28, -80
; CHECK-SD-NEXT: .cfi_offset w30, -88
; CHECK-SD-NEXT: .cfi_offset w29, -96
; CHECK-SD-NEXT: mov x23, x3
; CHECK-SD-NEXT: mov x24, x2
; CHECK-SD-NEXT: stp x6, x7, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT: ldp x8, x26, [sp, #176]
; CHECK-SD-NEXT: mov x21, x5
; CHECK-SD-NEXT: ldp x2, x3, [sp, #128]
; CHECK-SD-NEXT: mov x22, x4
; CHECK-SD-NEXT: ldp x27, x28, [sp, #160]
; CHECK-SD-NEXT: ldp x29, x19, [sp, #144]
; CHECK-SD-NEXT: str x8, [sp, #8] // 8-byte Folded Spill
; CHECK-SD-NEXT: bl __umodti3
; CHECK-SD-NEXT: mov x20, x0
; CHECK-SD-NEXT: mov x25, x1
; CHECK-SD-NEXT: mov x0, x24
; CHECK-SD-NEXT: mov x1, x23
; CHECK-SD-NEXT: mov x2, x29
; CHECK-SD-NEXT: mov x3, x19
; CHECK-SD-NEXT: bl __umodti3
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x23, x1
; CHECK-SD-NEXT: mov x0, x22
; CHECK-SD-NEXT: mov x1, x21
; CHECK-SD-NEXT: mov x2, x27
; CHECK-SD-NEXT: mov x3, x28
; CHECK-SD-NEXT: bl __umodti3
; CHECK-SD-NEXT: mov x21, x0
; CHECK-SD-NEXT: mov x22, x1
; CHECK-SD-NEXT: ldr x2, [sp, #8] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x0, x1, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x3, x26
; CHECK-SD-NEXT: bl __umodti3
; CHECK-SD-NEXT: mov x6, x0
; CHECK-SD-NEXT: mov x7, x1
; CHECK-SD-NEXT: mov x0, x20
; CHECK-SD-NEXT: mov x1, x25
; CHECK-SD-NEXT: mov x2, x19
; CHECK-SD-NEXT: mov x3, x23
; CHECK-SD-NEXT: mov x4, x21
; CHECK-SD-NEXT: mov x5, x22
; CHECK-SD-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x24, x23, [sp, #80] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x26, x25, [sp, #64] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x28, x27, [sp, #48] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: add sp, sp, #128
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uv4i128:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sub sp, sp, #128
; CHECK-GI-NEXT: stp x29, x30, [sp, #32] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x28, x27, [sp, #48] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x26, x25, [sp, #64] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x24, x23, [sp, #80] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x22, x21, [sp, #96] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill
; CHECK-GI-NEXT: .cfi_def_cfa_offset 128
; CHECK-GI-NEXT: .cfi_offset w19, -8
; CHECK-GI-NEXT: .cfi_offset w20, -16
; CHECK-GI-NEXT: .cfi_offset w21, -24
; CHECK-GI-NEXT: .cfi_offset w22, -32
; CHECK-GI-NEXT: .cfi_offset w23, -40
; CHECK-GI-NEXT: .cfi_offset w24, -48
; CHECK-GI-NEXT: .cfi_offset w25, -56
; CHECK-GI-NEXT: .cfi_offset w26, -64
; CHECK-GI-NEXT: .cfi_offset w27, -72
; CHECK-GI-NEXT: .cfi_offset w28, -80
; CHECK-GI-NEXT: .cfi_offset w30, -88
; CHECK-GI-NEXT: .cfi_offset w29, -96
; CHECK-GI-NEXT: mov x19, x2
; CHECK-GI-NEXT: mov x20, x3
; CHECK-GI-NEXT: mov x21, x4
; CHECK-GI-NEXT: ldp x2, x3, [sp, #128]
; CHECK-GI-NEXT: mov x22, x5
; CHECK-GI-NEXT: ldp x9, x8, [sp, #176]
; CHECK-GI-NEXT: mov x23, x7
; CHECK-GI-NEXT: ldp x24, x25, [sp, #144]
; CHECK-GI-NEXT: ldp x26, x27, [sp, #160]
; CHECK-GI-NEXT: stp x9, x6, [sp, #16] // 16-byte Folded Spill
; CHECK-GI-NEXT: str x8, [sp, #8] // 8-byte Folded Spill
; CHECK-GI-NEXT: bl __umodti3
; CHECK-GI-NEXT: mov x28, x0
; CHECK-GI-NEXT: mov x29, x1
; CHECK-GI-NEXT: mov x0, x19
; CHECK-GI-NEXT: mov x1, x20
; CHECK-GI-NEXT: mov x2, x24
; CHECK-GI-NEXT: mov x3, x25
; CHECK-GI-NEXT: bl __umodti3
; CHECK-GI-NEXT: mov x19, x0
; CHECK-GI-NEXT: mov x20, x1
; CHECK-GI-NEXT: mov x0, x21
; CHECK-GI-NEXT: mov x1, x22
; CHECK-GI-NEXT: mov x2, x26
; CHECK-GI-NEXT: mov x3, x27
; CHECK-GI-NEXT: bl __umodti3
; CHECK-GI-NEXT: mov x21, x0
; CHECK-GI-NEXT: ldp x2, x0, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldr x3, [sp, #8] // 8-byte Folded Reload
; CHECK-GI-NEXT: mov x22, x1
; CHECK-GI-NEXT: mov x1, x23
; CHECK-GI-NEXT: bl __umodti3
; CHECK-GI-NEXT: mov x6, x0
; CHECK-GI-NEXT: mov x7, x1
; CHECK-GI-NEXT: mov x0, x28
; CHECK-GI-NEXT: mov x1, x29
; CHECK-GI-NEXT: mov x2, x19
; CHECK-GI-NEXT: mov x3, x20
; CHECK-GI-NEXT: mov x4, x21
; CHECK-GI-NEXT: mov x5, x22
; CHECK-GI-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp x24, x23, [sp, #80] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp x26, x25, [sp, #64] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp x28, x27, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: add sp, sp, #128
; CHECK-GI-NEXT: ret
entry:
%s = urem <4 x i128> %d, %e
ret <4 x i128> %s
}