llvm/llvm/test/CodeGen/AArch64/arm64-fp128.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc -mtriple=arm64-linux-gnu -verify-machineinstrs -global-isel -global-isel-abort=2 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI

; CHECK-GI:       warning: Instruction selection used fallback path for test_neg_sub

define fp128 @test_add(fp128 %lhs, fp128 %rhs) {
; CHECK-LABEL: test_add:
; CHECK:       // %bb.0:
; CHECK-NEXT:    b __addtf3
  %val = fadd fp128 %lhs, %rhs
  ret fp128 %val
}

define fp128 @test_sub(fp128 %lhs, fp128 %rhs) {
; CHECK-LABEL: test_sub:
; CHECK:       // %bb.0:
; CHECK-NEXT:    b __subtf3
  %val = fsub fp128 %lhs, %rhs
  ret fp128 %val
}

define fp128 @test_mul(fp128 %lhs, fp128 %rhs) {
; CHECK-LABEL: test_mul:
; CHECK:       // %bb.0:
; CHECK-NEXT:    b __multf3
  %val = fmul fp128 %lhs, %rhs
  ret fp128 %val
}

define fp128 @test_div(fp128 %lhs, fp128 %rhs) {
; CHECK-LABEL: test_div:
; CHECK:       // %bb.0:
; CHECK-NEXT:    b __divtf3
  %val = fdiv fp128 %lhs, %rhs
  ret fp128 %val
}

define i32 @test_fptosi_32(fp128 %val) {
; CHECK-SD-LABEL: test_fptosi_32:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    bl __fixtfsi
; CHECK-SD-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_fptosi_32:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    b __fixtfsi
  %val32 = fptosi fp128 %val to i32
  ret i32 %val32
}

define i64 @test_fptosi_64(fp128 %val) {
; CHECK-SD-LABEL: test_fptosi_64:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    bl __fixtfdi
; CHECK-SD-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_fptosi_64:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    b __fixtfdi
  %val64 = fptosi fp128 %val to i64
  ret i64 %val64
}

define i32 @test_fptoui_32(fp128 %val) {
; CHECK-SD-LABEL: test_fptoui_32:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    bl __fixunstfsi
; CHECK-SD-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_fptoui_32:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    b __fixunstfsi
  %val32 = fptoui fp128 %val to i32
  ret i32 %val32
}

define i64 @test_fptoui_64(fp128 %val) {
; CHECK-SD-LABEL: test_fptoui_64:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    bl __fixunstfdi
; CHECK-SD-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_fptoui_64:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    b __fixunstfdi
  %val64 = fptoui fp128 %val to i64
  ret i64 %val64
}

define fp128 @test_sitofp_32(i32 %src32) {
; CHECK-SD-LABEL: test_sitofp_32:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    bl __floatsitf
; CHECK-SD-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_sitofp_32:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    b __floatsitf
  %val32 = sitofp i32 %src32 to fp128
  ret fp128 %val32
}

define fp128 @test_sitofp_64(i64 %src64) {
; CHECK-SD-LABEL: test_sitofp_64:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    bl __floatditf
; CHECK-SD-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_sitofp_64:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    b __floatditf
  %val64 = sitofp i64 %src64 to fp128
  ret fp128 %val64
}

define fp128 @test_uitofp_32(i32 %src32) {
; CHECK-SD-LABEL: test_uitofp_32:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    bl __floatunsitf
; CHECK-SD-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_uitofp_32:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    b __floatunsitf
  %val32 = uitofp i32 %src32 to fp128
  ret fp128 %val32
}

define fp128 @test_uitofp_64(i64 %src64) {
; CHECK-SD-LABEL: test_uitofp_64:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    bl __floatunditf
; CHECK-SD-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_uitofp_64:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    b __floatunditf
  %val64 = uitofp i64 %src64 to fp128
  ret fp128 %val64
}

; Technically, everything after the call to __letf2 is redundant, but we'll let
; LLVM have its fun for now.
define i1 @test_setcc1(fp128 %lhs, fp128 %rhs) {
; CHECK-LABEL: test_setcc1:
; CHECK:       // %bb.0:
; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT:    .cfi_def_cfa_offset 16
; CHECK-NEXT:    .cfi_offset w30, -16
; CHECK-NEXT:    bl __letf2
; CHECK-NEXT:    cmp w0, #0
; CHECK-NEXT:    cset w0, le
; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT:    ret
  %val = fcmp ole fp128 %lhs, %rhs
  ret i1 %val
}

define i1 @test_setcc2(fp128 %lhs, fp128 %rhs) {
; CHECK-LABEL: test_setcc2:
; CHECK:       // %bb.0:
; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT:    .cfi_def_cfa_offset 16
; CHECK-NEXT:    .cfi_offset w30, -16
; CHECK-NEXT:    bl __letf2
; CHECK-NEXT:    cmp w0, #0
; CHECK-NEXT:    cset w0, gt
; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT:    ret
  %val = fcmp ugt fp128 %lhs, %rhs
  ret i1 %val
}

define i1 @test_setcc3(fp128 %lhs, fp128 %rhs) {
; CHECK-SD-LABEL: test_setcc3:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    sub sp, sp, #48
; CHECK-SD-NEXT:    stp x30, x19, [sp, #32] // 16-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
; CHECK-SD-NEXT:    .cfi_offset w19, -8
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
; CHECK-SD-NEXT:    bl __eqtf2
; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
; CHECK-SD-NEXT:    mov w19, w0
; CHECK-SD-NEXT:    bl __unordtf2
; CHECK-SD-NEXT:    cmp w0, #0
; CHECK-SD-NEXT:    ccmp w19, #0, #4, eq
; CHECK-SD-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT:    cset w0, eq
; CHECK-SD-NEXT:    add sp, sp, #48
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_setcc3:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    sub sp, sp, #48
; CHECK-GI-NEXT:    stp x30, x19, [sp, #32] // 16-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 48
; CHECK-GI-NEXT:    .cfi_offset w19, -8
; CHECK-GI-NEXT:    .cfi_offset w30, -16
; CHECK-GI-NEXT:    stp q1, q0, [sp] // 32-byte Folded Spill
; CHECK-GI-NEXT:    bl __eqtf2
; CHECK-GI-NEXT:    ldp q1, q0, [sp] // 32-byte Folded Reload
; CHECK-GI-NEXT:    cmp w0, #0
; CHECK-GI-NEXT:    cset w19, eq
; CHECK-GI-NEXT:    bl __unordtf2
; CHECK-GI-NEXT:    cmp w0, #0
; CHECK-GI-NEXT:    cset w8, ne
; CHECK-GI-NEXT:    orr w0, w19, w8
; CHECK-GI-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT:    add sp, sp, #48
; CHECK-GI-NEXT:    ret
  %val = fcmp ueq fp128 %lhs, %rhs
  ret i1 %val
}

; olt == !uge, which LLVM optimizes this to.
define i32 @test_br_cc(fp128 %lhs, fp128 %rhs) {
; CHECK-SD-LABEL: test_br_cc:
; CHECK-SD:       // %bb.0: // %common.ret
; CHECK-SD-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    bl __lttf2
; CHECK-SD-NEXT:    mov w8, #29 // =0x1d
; CHECK-SD-NEXT:    cmp w0, #0
; CHECK-SD-NEXT:    mov w9, #42 // =0x2a
; CHECK-SD-NEXT:    csel w0, w9, w8, lt
; CHECK-SD-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_br_cc:
; CHECK-GI:       // %bb.0: // %common.ret
; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
; CHECK-GI-NEXT:    .cfi_offset w30, -16
; CHECK-GI-NEXT:    bl __lttf2
; CHECK-GI-NEXT:    mov w8, #29 // =0x1d
; CHECK-GI-NEXT:    mov w9, #42 // =0x2a
; CHECK-GI-NEXT:    cmp w0, #0
; CHECK-GI-NEXT:    csel w0, w9, w8, lt
; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-GI-NEXT:    ret
  %cond = fcmp olt fp128 %lhs, %rhs
  br i1 %cond, label %iftrue, label %iffalse

iftrue:
  ret i32 42
iffalse:
  ret i32 29
}

define fp128 @test_select(i1 %cond, fp128 %lhs, fp128 %rhs) {
; CHECK-SD-LABEL: test_select:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    tst w0, #0x1
; CHECK-SD-NEXT:    b.eq .LBB16_2
; CHECK-SD-NEXT:  // %bb.1:
; CHECK-SD-NEXT:    mov v1.16b, v0.16b
; CHECK-SD-NEXT:  .LBB16_2:
; CHECK-SD-NEXT:    mov v0.16b, v1.16b
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_select:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    and w8, w0, #0x1
; CHECK-GI-NEXT:    mov d2, v0.d[1]
; CHECK-GI-NEXT:    mov d3, v1.d[1]
; CHECK-GI-NEXT:    tst w8, #0x1
; CHECK-GI-NEXT:    fcsel d0, d0, d1, ne
; CHECK-GI-NEXT:    fcsel d1, d2, d3, ne
; CHECK-GI-NEXT:    fmov x8, d0
; CHECK-GI-NEXT:    mov v0.d[0], x8
; CHECK-GI-NEXT:    fmov x8, d1
; CHECK-GI-NEXT:    mov v0.d[1], x8
; CHECK-GI-NEXT:    ret
  %val = select i1 %cond, fp128 %lhs, fp128 %rhs
  ret fp128 %val
}

define half @test_round_f16(fp128 %val) {
; CHECK-SD-LABEL: test_round_f16:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    bl __trunctfhf2
; CHECK-SD-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_round_f16:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    b __trunctfhf2
  %dst = fptrunc fp128 %val to half
  ret half %dst
}

define float @test_round_f32(fp128 %val) {
; CHECK-SD-LABEL: test_round_f32:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    bl __trunctfsf2
; CHECK-SD-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_round_f32:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    b __trunctfsf2
  %dst = fptrunc fp128 %val to float
  ret float %dst
}

define double @test_round_f64(fp128 %val) {
; CHECK-SD-LABEL: test_round_f64:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    bl __trunctfdf2
; CHECK-SD-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_round_f64:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    b __trunctfdf2
  %dst = fptrunc fp128 %val to double
  ret double %dst
}

define fp128 @test_extend_f16(half %val) {
; CHECK-LABEL: test_extend_f16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    b __extendhftf2
  %dst = fpext half %val to fp128
  ret fp128 %dst
}

define fp128 @test_extend_f32(float %val) {
; CHECK-LABEL: test_extend_f32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    b __extendsftf2
  %dst = fpext float %val to fp128
  ret fp128 %dst
}

define fp128 @test_extend_f64(double %val) {
; CHECK-LABEL: test_extend_f64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    b __extenddftf2
  %dst = fpext double %val to fp128
  ret fp128 %dst
}

;; We convert this to fneg, and target-independent code expands it with
;; integer operations.
define fp128 @test_neg_sub(fp128 %in) {
; CHECK-LABEL: test_neg_sub:
; CHECK:       // %bb.0:
; CHECK-NEXT:    str q0, [sp, #-16]!
; CHECK-NEXT:    .cfi_def_cfa_offset 16
; CHECK-NEXT:    ldrb w8, [sp, #15]
; CHECK-NEXT:    eor w8, w8, #0x80
; CHECK-NEXT:    strb w8, [sp, #15]
; CHECK-NEXT:    ldr q0, [sp], #16
; CHECK-NEXT:    ret
  %ret = fsub fp128 0xL00000000000000008000000000000000, %in
  ret fp128 %ret
}

define fp128 @test_neg(fp128 %in) {
; CHECK-SD-LABEL: test_neg:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    str q0, [sp, #-16]!
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
; CHECK-SD-NEXT:    ldrb w8, [sp, #15]
; CHECK-SD-NEXT:    eor w8, w8, #0x80
; CHECK-SD-NEXT:    strb w8, [sp, #15]
; CHECK-SD-NEXT:    ldr q0, [sp], #16
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_neg:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    mov x8, v0.d[1]
; CHECK-GI-NEXT:    mov v0.d[0], v0.d[0]
; CHECK-GI-NEXT:    eor x8, x8, #0x8000000000000000
; CHECK-GI-NEXT:    mov v0.d[1], x8
; CHECK-GI-NEXT:    ret
  %ret = fneg fp128 %in
  ret fp128 %ret
}



define <2 x fp128> @vec_add(<2 x fp128> %lhs, <2 x fp128> %rhs) {
; CHECK-SD-LABEL: vec_add:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    sub sp, sp, #64
; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    stp q1, q3, [sp, #16] // 32-byte Folded Spill
; CHECK-SD-NEXT:    mov v1.16b, v2.16b
; CHECK-SD-NEXT:    bl __addtf3
; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
; CHECK-SD-NEXT:    bl __addtf3
; CHECK-SD-NEXT:    mov v1.16b, v0.16b
; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
; CHECK-SD-NEXT:    add sp, sp, #64
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: vec_add:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    sub sp, sp, #64
; CHECK-GI-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 64
; CHECK-GI-NEXT:    .cfi_offset w30, -16
; CHECK-GI-NEXT:    stp q3, q1, [sp, #16] // 32-byte Folded Spill
; CHECK-GI-NEXT:    mov v1.16b, v2.16b
; CHECK-GI-NEXT:    bl __addtf3
; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT:    ldp q1, q0, [sp, #16] // 32-byte Folded Reload
; CHECK-GI-NEXT:    bl __addtf3
; CHECK-GI-NEXT:    mov v1.16b, v0.16b
; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
; CHECK-GI-NEXT:    add sp, sp, #64
; CHECK-GI-NEXT:    ret
  %val = fadd <2 x fp128> %lhs, %rhs
  ret <2 x fp128> %val
}

define <2 x fp128> @vec_sub(<2 x fp128> %lhs, <2 x fp128> %rhs) {
; CHECK-SD-LABEL: vec_sub:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    sub sp, sp, #64
; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    stp q1, q3, [sp, #16] // 32-byte Folded Spill
; CHECK-SD-NEXT:    mov v1.16b, v2.16b
; CHECK-SD-NEXT:    bl __subtf3
; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
; CHECK-SD-NEXT:    bl __subtf3
; CHECK-SD-NEXT:    mov v1.16b, v0.16b
; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
; CHECK-SD-NEXT:    add sp, sp, #64
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: vec_sub:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    sub sp, sp, #64
; CHECK-GI-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 64
; CHECK-GI-NEXT:    .cfi_offset w30, -16
; CHECK-GI-NEXT:    stp q3, q1, [sp, #16] // 32-byte Folded Spill
; CHECK-GI-NEXT:    mov v1.16b, v2.16b
; CHECK-GI-NEXT:    bl __subtf3
; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT:    ldp q1, q0, [sp, #16] // 32-byte Folded Reload
; CHECK-GI-NEXT:    bl __subtf3
; CHECK-GI-NEXT:    mov v1.16b, v0.16b
; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
; CHECK-GI-NEXT:    add sp, sp, #64
; CHECK-GI-NEXT:    ret
  %val = fsub <2 x fp128> %lhs, %rhs
  ret <2 x fp128> %val
}

define <2 x fp128> @vec_mul(<2 x fp128> %lhs, <2 x fp128> %rhs) {
; CHECK-SD-LABEL: vec_mul:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    sub sp, sp, #64
; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    stp q1, q3, [sp, #16] // 32-byte Folded Spill
; CHECK-SD-NEXT:    mov v1.16b, v2.16b
; CHECK-SD-NEXT:    bl __multf3
; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
; CHECK-SD-NEXT:    bl __multf3
; CHECK-SD-NEXT:    mov v1.16b, v0.16b
; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
; CHECK-SD-NEXT:    add sp, sp, #64
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: vec_mul:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    sub sp, sp, #64
; CHECK-GI-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 64
; CHECK-GI-NEXT:    .cfi_offset w30, -16
; CHECK-GI-NEXT:    stp q3, q1, [sp, #16] // 32-byte Folded Spill
; CHECK-GI-NEXT:    mov v1.16b, v2.16b
; CHECK-GI-NEXT:    bl __multf3
; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT:    ldp q1, q0, [sp, #16] // 32-byte Folded Reload
; CHECK-GI-NEXT:    bl __multf3
; CHECK-GI-NEXT:    mov v1.16b, v0.16b
; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
; CHECK-GI-NEXT:    add sp, sp, #64
; CHECK-GI-NEXT:    ret
  %val = fmul <2 x fp128> %lhs, %rhs
  ret <2 x fp128> %val
}

define <2 x fp128> @vec_div(<2 x fp128> %lhs, <2 x fp128> %rhs) {
; CHECK-SD-LABEL: vec_div:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    sub sp, sp, #64
; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    stp q1, q3, [sp, #16] // 32-byte Folded Spill
; CHECK-SD-NEXT:    mov v1.16b, v2.16b
; CHECK-SD-NEXT:    bl __divtf3
; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
; CHECK-SD-NEXT:    bl __divtf3
; CHECK-SD-NEXT:    mov v1.16b, v0.16b
; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
; CHECK-SD-NEXT:    add sp, sp, #64
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: vec_div:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    sub sp, sp, #64
; CHECK-GI-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 64
; CHECK-GI-NEXT:    .cfi_offset w30, -16
; CHECK-GI-NEXT:    stp q3, q1, [sp, #16] // 32-byte Folded Spill
; CHECK-GI-NEXT:    mov v1.16b, v2.16b
; CHECK-GI-NEXT:    bl __divtf3
; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT:    ldp q1, q0, [sp, #16] // 32-byte Folded Reload
; CHECK-GI-NEXT:    bl __divtf3
; CHECK-GI-NEXT:    mov v1.16b, v0.16b
; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
; CHECK-GI-NEXT:    add sp, sp, #64
; CHECK-GI-NEXT:    ret
  %val = fdiv <2 x fp128> %lhs, %rhs
  ret <2 x fp128> %val
}

define <2 x i32> @vec_fptosi_32(<2 x fp128> %val) {
; CHECK-SD-LABEL: vec_fptosi_32:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    sub sp, sp, #48
; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT:    bl __fixtfsi
; CHECK-SD-NEXT:    fmov s0, w0
; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-NEXT:    bl __fixtfsi
; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
; CHECK-SD-NEXT:    mov v0.s[1], w0
; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT:    add sp, sp, #48
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: vec_fptosi_32:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    sub sp, sp, #32
; CHECK-GI-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
; CHECK-GI-NEXT:    .cfi_offset w19, -8
; CHECK-GI-NEXT:    .cfi_offset w30, -16
; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT:    bl __fixtfsi
; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT:    mov w19, w0
; CHECK-GI-NEXT:    bl __fixtfsi
; CHECK-GI-NEXT:    mov v0.s[0], w19
; CHECK-GI-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT:    mov v0.s[1], w0
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT:    add sp, sp, #32
; CHECK-GI-NEXT:    ret
  %val32 = fptosi <2 x fp128> %val to <2 x i32>
  ret <2 x i32> %val32
}

define <2 x i64> @vec_fptosi_64(<2 x fp128> %val) {
; CHECK-SD-LABEL: vec_fptosi_64:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    sub sp, sp, #48
; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT:    mov v0.16b, v1.16b
; CHECK-SD-NEXT:    bl __fixtfdi
; CHECK-SD-NEXT:    fmov d0, x0
; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT:    bl __fixtfdi
; CHECK-SD-NEXT:    fmov d0, x0
; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT:    add sp, sp, #48
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: vec_fptosi_64:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    sub sp, sp, #32
; CHECK-GI-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
; CHECK-GI-NEXT:    .cfi_offset w19, -8
; CHECK-GI-NEXT:    .cfi_offset w30, -16
; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT:    bl __fixtfdi
; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT:    mov x19, x0
; CHECK-GI-NEXT:    bl __fixtfdi
; CHECK-GI-NEXT:    mov v0.d[0], x19
; CHECK-GI-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT:    mov v0.d[1], x0
; CHECK-GI-NEXT:    add sp, sp, #32
; CHECK-GI-NEXT:    ret
  %val64 = fptosi <2 x fp128> %val to <2 x i64>
  ret <2 x i64> %val64
}

define <2 x i32> @vec_fptoui_32(<2 x fp128> %val) {
; CHECK-SD-LABEL: vec_fptoui_32:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    sub sp, sp, #48
; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT:    bl __fixunstfsi
; CHECK-SD-NEXT:    fmov s0, w0
; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-NEXT:    bl __fixunstfsi
; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
; CHECK-SD-NEXT:    mov v0.s[1], w0
; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT:    add sp, sp, #48
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: vec_fptoui_32:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    sub sp, sp, #32
; CHECK-GI-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
; CHECK-GI-NEXT:    .cfi_offset w19, -8
; CHECK-GI-NEXT:    .cfi_offset w30, -16
; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT:    bl __fixunstfsi
; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT:    mov w19, w0
; CHECK-GI-NEXT:    bl __fixunstfsi
; CHECK-GI-NEXT:    mov v0.s[0], w19
; CHECK-GI-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT:    mov v0.s[1], w0
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT:    add sp, sp, #32
; CHECK-GI-NEXT:    ret
  %val32 = fptoui <2 x fp128> %val to <2 x i32>
  ret <2 x i32> %val32
}

define <2 x i64> @vec_fptoui_64(<2 x fp128> %val) {
; CHECK-SD-LABEL: vec_fptoui_64:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    sub sp, sp, #48
; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT:    mov v0.16b, v1.16b
; CHECK-SD-NEXT:    bl __fixunstfdi
; CHECK-SD-NEXT:    fmov d0, x0
; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT:    bl __fixunstfdi
; CHECK-SD-NEXT:    fmov d0, x0
; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT:    add sp, sp, #48
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: vec_fptoui_64:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    sub sp, sp, #32
; CHECK-GI-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
; CHECK-GI-NEXT:    .cfi_offset w19, -8
; CHECK-GI-NEXT:    .cfi_offset w30, -16
; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT:    bl __fixunstfdi
; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT:    mov x19, x0
; CHECK-GI-NEXT:    bl __fixunstfdi
; CHECK-GI-NEXT:    mov v0.d[0], x19
; CHECK-GI-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT:    mov v0.d[1], x0
; CHECK-GI-NEXT:    add sp, sp, #32
; CHECK-GI-NEXT:    ret
  %val64 = fptoui <2 x fp128> %val to <2 x i64>
  ret <2 x i64> %val64
}

define <2 x fp128> @vec_sitofp_32(<2 x i32> %src32) {
; CHECK-SD-LABEL: vec_sitofp_32:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    sub sp, sp, #32
; CHECK-SD-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    fmov w0, s0
; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT:    bl __floatsitf
; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT:    mov w0, v1.s[1]
; CHECK-SD-NEXT:    bl __floatsitf
; CHECK-SD-NEXT:    mov v1.16b, v0.16b
; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-SD-NEXT:    add sp, sp, #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: vec_sitofp_32:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    sub sp, sp, #32
; CHECK-GI-NEXT:    str d8, [sp, #16] // 8-byte Folded Spill
; CHECK-GI-NEXT:    str x30, [sp, #24] // 8-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
; CHECK-GI-NEXT:    .cfi_offset w30, -8
; CHECK-GI-NEXT:    .cfi_offset b8, -16
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    fmov w0, s0
; CHECK-GI-NEXT:    mov s8, v0.s[1]
; CHECK-GI-NEXT:    bl __floatsitf
; CHECK-GI-NEXT:    fmov w0, s8
; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT:    bl __floatsitf
; CHECK-GI-NEXT:    mov v1.16b, v0.16b
; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
; CHECK-GI-NEXT:    add sp, sp, #32
; CHECK-GI-NEXT:    ret
  %val32 = sitofp <2 x i32> %src32 to <2 x fp128>
  ret <2 x fp128> %val32
}

define <2 x fp128> @vec_sitofp_64(<2 x i64> %src64) {
; CHECK-SD-LABEL: vec_sitofp_64:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    sub sp, sp, #48
; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    fmov x0, d0
; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT:    bl __floatditf
; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT:    mov x0, v0.d[1]
; CHECK-SD-NEXT:    bl __floatditf
; CHECK-SD-NEXT:    mov v1.16b, v0.16b
; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
; CHECK-SD-NEXT:    add sp, sp, #48
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: vec_sitofp_64:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    sub sp, sp, #32
; CHECK-GI-NEXT:    str d8, [sp, #16] // 8-byte Folded Spill
; CHECK-GI-NEXT:    str x30, [sp, #24] // 8-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
; CHECK-GI-NEXT:    .cfi_offset w30, -8
; CHECK-GI-NEXT:    .cfi_offset b8, -16
; CHECK-GI-NEXT:    fmov x0, d0
; CHECK-GI-NEXT:    mov d8, v0.d[1]
; CHECK-GI-NEXT:    bl __floatditf
; CHECK-GI-NEXT:    fmov x0, d8
; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT:    bl __floatditf
; CHECK-GI-NEXT:    mov v1.16b, v0.16b
; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
; CHECK-GI-NEXT:    add sp, sp, #32
; CHECK-GI-NEXT:    ret
  %val64 = sitofp <2 x i64> %src64 to <2 x fp128>
  ret <2 x fp128> %val64
}

define <2 x fp128> @vec_uitofp_32(<2 x i32> %src32) {
; CHECK-SD-LABEL: vec_uitofp_32:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    sub sp, sp, #32
; CHECK-SD-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    fmov w0, s0
; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT:    bl __floatunsitf
; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT:    mov w0, v1.s[1]
; CHECK-SD-NEXT:    bl __floatunsitf
; CHECK-SD-NEXT:    mov v1.16b, v0.16b
; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-SD-NEXT:    add sp, sp, #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: vec_uitofp_32:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    sub sp, sp, #32
; CHECK-GI-NEXT:    str d8, [sp, #16] // 8-byte Folded Spill
; CHECK-GI-NEXT:    str x30, [sp, #24] // 8-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
; CHECK-GI-NEXT:    .cfi_offset w30, -8
; CHECK-GI-NEXT:    .cfi_offset b8, -16
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    fmov w0, s0
; CHECK-GI-NEXT:    mov s8, v0.s[1]
; CHECK-GI-NEXT:    bl __floatunsitf
; CHECK-GI-NEXT:    fmov w0, s8
; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT:    bl __floatunsitf
; CHECK-GI-NEXT:    mov v1.16b, v0.16b
; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
; CHECK-GI-NEXT:    add sp, sp, #32
; CHECK-GI-NEXT:    ret
  %val32 = uitofp <2 x i32> %src32 to <2 x fp128>
  ret <2 x fp128> %val32
}

define <2 x fp128> @vec_uitofp_64(<2 x i64> %src64) {
; CHECK-SD-LABEL: vec_uitofp_64:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    sub sp, sp, #48
; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    fmov x0, d0
; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT:    bl __floatunditf
; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT:    mov x0, v0.d[1]
; CHECK-SD-NEXT:    bl __floatunditf
; CHECK-SD-NEXT:    mov v1.16b, v0.16b
; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
; CHECK-SD-NEXT:    add sp, sp, #48
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: vec_uitofp_64:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    sub sp, sp, #32
; CHECK-GI-NEXT:    str d8, [sp, #16] // 8-byte Folded Spill
; CHECK-GI-NEXT:    str x30, [sp, #24] // 8-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
; CHECK-GI-NEXT:    .cfi_offset w30, -8
; CHECK-GI-NEXT:    .cfi_offset b8, -16
; CHECK-GI-NEXT:    fmov x0, d0
; CHECK-GI-NEXT:    mov d8, v0.d[1]
; CHECK-GI-NEXT:    bl __floatunditf
; CHECK-GI-NEXT:    fmov x0, d8
; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT:    bl __floatunditf
; CHECK-GI-NEXT:    mov v1.16b, v0.16b
; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
; CHECK-GI-NEXT:    add sp, sp, #32
; CHECK-GI-NEXT:    ret
  %val64 = uitofp <2 x i64> %src64 to <2 x fp128>
  ret <2 x fp128> %val64
}

define <2 x i1> @vec_setcc1(<2 x fp128> %lhs, <2 x fp128> %rhs) {
; CHECK-SD-LABEL: vec_setcc1:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    sub sp, sp, #48
; CHECK-SD-NEXT:    str d8, [sp, #32] // 8-byte Folded Spill
; CHECK-SD-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
; CHECK-SD-NEXT:    .cfi_offset w30, -8
; CHECK-SD-NEXT:    .cfi_offset b8, -16
; CHECK-SD-NEXT:    stp q0, q2, [sp] // 32-byte Folded Spill
; CHECK-SD-NEXT:    mov v0.16b, v1.16b
; CHECK-SD-NEXT:    mov v1.16b, v3.16b
; CHECK-SD-NEXT:    bl __letf2
; CHECK-SD-NEXT:    cmp w0, #0
; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
; CHECK-SD-NEXT:    cset w8, le
; CHECK-SD-NEXT:    sbfx x8, x8, #0, #1
; CHECK-SD-NEXT:    fmov d8, x8
; CHECK-SD-NEXT:    bl __letf2
; CHECK-SD-NEXT:    cmp w0, #0
; CHECK-SD-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
; CHECK-SD-NEXT:    cset w8, le
; CHECK-SD-NEXT:    sbfx x8, x8, #0, #1
; CHECK-SD-NEXT:    fmov d0, x8
; CHECK-SD-NEXT:    zip1 v0.2s, v0.2s, v8.2s
; CHECK-SD-NEXT:    ldr d8, [sp, #32] // 8-byte Folded Reload
; CHECK-SD-NEXT:    add sp, sp, #48
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: vec_setcc1:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    sub sp, sp, #48
; CHECK-GI-NEXT:    stp x30, x19, [sp, #32] // 16-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 48
; CHECK-GI-NEXT:    .cfi_offset w19, -8
; CHECK-GI-NEXT:    .cfi_offset w30, -16
; CHECK-GI-NEXT:    stp q3, q1, [sp] // 32-byte Folded Spill
; CHECK-GI-NEXT:    mov v1.16b, v2.16b
; CHECK-GI-NEXT:    bl __letf2
; CHECK-GI-NEXT:    ldp q1, q0, [sp] // 32-byte Folded Reload
; CHECK-GI-NEXT:    cmp w0, #0
; CHECK-GI-NEXT:    cset w19, le
; CHECK-GI-NEXT:    bl __letf2
; CHECK-GI-NEXT:    mov v0.s[0], w19
; CHECK-GI-NEXT:    cmp w0, #0
; CHECK-GI-NEXT:    cset w8, le
; CHECK-GI-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT:    mov v0.s[1], w8
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT:    add sp, sp, #48
; CHECK-GI-NEXT:    ret
  %val = fcmp ole <2 x fp128> %lhs, %rhs
  ret <2 x i1> %val
}

define <2 x i1> @vec_setcc2(<2 x fp128> %lhs, <2 x fp128> %rhs) {
; CHECK-SD-LABEL: vec_setcc2:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    sub sp, sp, #48
; CHECK-SD-NEXT:    str d8, [sp, #32] // 8-byte Folded Spill
; CHECK-SD-NEXT:    str x30, [sp, #40] // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
; CHECK-SD-NEXT:    .cfi_offset w30, -8
; CHECK-SD-NEXT:    .cfi_offset b8, -16
; CHECK-SD-NEXT:    stp q0, q2, [sp] // 32-byte Folded Spill
; CHECK-SD-NEXT:    mov v0.16b, v1.16b
; CHECK-SD-NEXT:    mov v1.16b, v3.16b
; CHECK-SD-NEXT:    bl __letf2
; CHECK-SD-NEXT:    cmp w0, #0
; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
; CHECK-SD-NEXT:    cset w8, gt
; CHECK-SD-NEXT:    sbfx x8, x8, #0, #1
; CHECK-SD-NEXT:    fmov d8, x8
; CHECK-SD-NEXT:    bl __letf2
; CHECK-SD-NEXT:    cmp w0, #0
; CHECK-SD-NEXT:    ldr x30, [sp, #40] // 8-byte Folded Reload
; CHECK-SD-NEXT:    cset w8, gt
; CHECK-SD-NEXT:    sbfx x8, x8, #0, #1
; CHECK-SD-NEXT:    fmov d0, x8
; CHECK-SD-NEXT:    zip1 v0.2s, v0.2s, v8.2s
; CHECK-SD-NEXT:    ldr d8, [sp, #32] // 8-byte Folded Reload
; CHECK-SD-NEXT:    add sp, sp, #48
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: vec_setcc2:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    sub sp, sp, #48
; CHECK-GI-NEXT:    stp x30, x19, [sp, #32] // 16-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 48
; CHECK-GI-NEXT:    .cfi_offset w19, -8
; CHECK-GI-NEXT:    .cfi_offset w30, -16
; CHECK-GI-NEXT:    stp q3, q1, [sp] // 32-byte Folded Spill
; CHECK-GI-NEXT:    mov v1.16b, v2.16b
; CHECK-GI-NEXT:    bl __letf2
; CHECK-GI-NEXT:    ldp q1, q0, [sp] // 32-byte Folded Reload
; CHECK-GI-NEXT:    cmp w0, #0
; CHECK-GI-NEXT:    cset w19, gt
; CHECK-GI-NEXT:    bl __letf2
; CHECK-GI-NEXT:    mov v0.s[0], w19
; CHECK-GI-NEXT:    cmp w0, #0
; CHECK-GI-NEXT:    cset w8, gt
; CHECK-GI-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT:    mov v0.s[1], w8
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT:    add sp, sp, #48
; CHECK-GI-NEXT:    ret
  %val = fcmp ugt <2 x fp128> %lhs, %rhs
  ret <2 x i1> %val
}

define <2 x i1> @vec_setcc3(<2 x fp128> %lhs, <2 x fp128> %rhs) {
; CHECK-SD-LABEL: vec_setcc3:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    sub sp, sp, #96
; CHECK-SD-NEXT:    str d8, [sp, #64] // 8-byte Folded Spill
; CHECK-SD-NEXT:    stp x30, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 96
; CHECK-SD-NEXT:    .cfi_offset w19, -8
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    .cfi_offset b8, -32
; CHECK-SD-NEXT:    stp q1, q3, [sp] // 32-byte Folded Spill
; CHECK-SD-NEXT:    stp q0, q2, [sp, #32] // 32-byte Folded Spill
; CHECK-SD-NEXT:    mov v0.16b, v1.16b
; CHECK-SD-NEXT:    mov v1.16b, v3.16b
; CHECK-SD-NEXT:    bl __eqtf2
; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
; CHECK-SD-NEXT:    mov w19, w0
; CHECK-SD-NEXT:    bl __unordtf2
; CHECK-SD-NEXT:    cmp w0, #0
; CHECK-SD-NEXT:    ldp q0, q1, [sp, #32] // 32-byte Folded Reload
; CHECK-SD-NEXT:    ccmp w19, #0, #4, eq
; CHECK-SD-NEXT:    cset w8, eq
; CHECK-SD-NEXT:    sbfx x8, x8, #0, #1
; CHECK-SD-NEXT:    fmov d8, x8
; CHECK-SD-NEXT:    bl __eqtf2
; CHECK-SD-NEXT:    ldp q0, q1, [sp, #32] // 32-byte Folded Reload
; CHECK-SD-NEXT:    mov w19, w0
; CHECK-SD-NEXT:    bl __unordtf2
; CHECK-SD-NEXT:    cmp w0, #0
; CHECK-SD-NEXT:    ccmp w19, #0, #4, eq
; CHECK-SD-NEXT:    ldp x30, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-SD-NEXT:    cset w8, eq
; CHECK-SD-NEXT:    sbfx x8, x8, #0, #1
; CHECK-SD-NEXT:    fmov d0, x8
; CHECK-SD-NEXT:    zip1 v0.2s, v0.2s, v8.2s
; CHECK-SD-NEXT:    ldr d8, [sp, #64] // 8-byte Folded Reload
; CHECK-SD-NEXT:    add sp, sp, #96
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: vec_setcc3:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    sub sp, sp, #96
; CHECK-GI-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
; CHECK-GI-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 96
; CHECK-GI-NEXT:    .cfi_offset w19, -8
; CHECK-GI-NEXT:    .cfi_offset w20, -16
; CHECK-GI-NEXT:    .cfi_offset w30, -32
; CHECK-GI-NEXT:    stp q2, q0, [sp] // 32-byte Folded Spill
; CHECK-GI-NEXT:    stp q3, q1, [sp, #32] // 32-byte Folded Spill
; CHECK-GI-NEXT:    mov v1.16b, v2.16b
; CHECK-GI-NEXT:    bl __eqtf2
; CHECK-GI-NEXT:    ldp q1, q0, [sp] // 32-byte Folded Reload
; CHECK-GI-NEXT:    cmp w0, #0
; CHECK-GI-NEXT:    cset w19, eq
; CHECK-GI-NEXT:    bl __unordtf2
; CHECK-GI-NEXT:    ldp q1, q0, [sp, #32] // 32-byte Folded Reload
; CHECK-GI-NEXT:    cmp w0, #0
; CHECK-GI-NEXT:    cset w8, ne
; CHECK-GI-NEXT:    orr w19, w19, w8
; CHECK-GI-NEXT:    bl __eqtf2
; CHECK-GI-NEXT:    ldp q1, q0, [sp, #32] // 32-byte Folded Reload
; CHECK-GI-NEXT:    cmp w0, #0
; CHECK-GI-NEXT:    cset w20, eq
; CHECK-GI-NEXT:    bl __unordtf2
; CHECK-GI-NEXT:    mov v0.s[0], w19
; CHECK-GI-NEXT:    cmp w0, #0
; CHECK-GI-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
; CHECK-GI-NEXT:    cset w8, ne
; CHECK-GI-NEXT:    orr w8, w20, w8
; CHECK-GI-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-GI-NEXT:    mov v0.s[1], w8
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT:    add sp, sp, #96
; CHECK-GI-NEXT:    ret
  %val = fcmp ueq <2 x fp128> %lhs, %rhs
  ret <2 x i1> %val
}

define <2 x fp128> @vec_select(<2 x i1> %cond, <2 x fp128> %lhs, <2 x fp128> %rhs) {
; CHECK-SD-LABEL: vec_select:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    fmov w8, s0
; CHECK-SD-NEXT:    tst w8, #0x1
; CHECK-SD-NEXT:    b.eq .LBB40_2
; CHECK-SD-NEXT:  // %bb.1:
; CHECK-SD-NEXT:    mov v3.16b, v1.16b
; CHECK-SD-NEXT:  .LBB40_2:
; CHECK-SD-NEXT:    mov w8, v0.s[1]
; CHECK-SD-NEXT:    tst w8, #0x1
; CHECK-SD-NEXT:    b.eq .LBB40_4
; CHECK-SD-NEXT:  // %bb.3:
; CHECK-SD-NEXT:    mov v4.16b, v2.16b
; CHECK-SD-NEXT:  .LBB40_4:
; CHECK-SD-NEXT:    mov v0.16b, v3.16b
; CHECK-SD-NEXT:    mov v1.16b, v4.16b
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: vec_select:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    mov w8, v0.s[1]
; CHECK-GI-NEXT:    fmov w9, s0
; CHECK-GI-NEXT:    mov d5, v1.d[1]
; CHECK-GI-NEXT:    mov d6, v3.d[1]
; CHECK-GI-NEXT:    and w9, w9, #0x1
; CHECK-GI-NEXT:    tst w9, #0x1
; CHECK-GI-NEXT:    and w8, w8, #0x1
; CHECK-GI-NEXT:    fcsel d0, d1, d3, ne
; CHECK-GI-NEXT:    fcsel d3, d5, d6, ne
; CHECK-GI-NEXT:    tst w8, #0x1
; CHECK-GI-NEXT:    mov d1, v2.d[1]
; CHECK-GI-NEXT:    mov d5, v4.d[1]
; CHECK-GI-NEXT:    fcsel d2, d2, d4, ne
; CHECK-GI-NEXT:    fmov x8, d0
; CHECK-GI-NEXT:    fmov x9, d2
; CHECK-GI-NEXT:    fcsel d2, d1, d5, ne
; CHECK-GI-NEXT:    mov v0.d[0], x8
; CHECK-GI-NEXT:    fmov x8, d3
; CHECK-GI-NEXT:    mov v1.d[0], x9
; CHECK-GI-NEXT:    fmov x9, d2
; CHECK-GI-NEXT:    mov v0.d[1], x8
; CHECK-GI-NEXT:    mov v1.d[1], x9
; CHECK-GI-NEXT:    ret
  %val = select <2 x i1> %cond, <2 x fp128> %lhs, <2 x fp128> %rhs
  ret <2 x fp128> %val
}

define <2 x half> @vec_round_f16(<2 x fp128> %val) {
; CHECK-SD-LABEL: vec_round_f16:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    sub sp, sp, #48
; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT:    mov v0.16b, v1.16b
; CHECK-SD-NEXT:    bl __trunctfhf2
; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 def $q0
; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT:    bl __trunctfhf2
; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 def $q0
; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
; CHECK-SD-NEXT:    mov v0.h[1], v1.h[0]
; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT:    add sp, sp, #48
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: vec_round_f16:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    sub sp, sp, #64
; CHECK-GI-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 64
; CHECK-GI-NEXT:    .cfi_offset w30, -16
; CHECK-GI-NEXT:    mov v2.d[0], x8
; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT:    mov v2.d[1], x8
; CHECK-GI-NEXT:    str q2, [sp, #32] // 16-byte Folded Spill
; CHECK-GI-NEXT:    bl __trunctfhf2
; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 def $q0
; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT:    bl __trunctfhf2
; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 def $q0
; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT:    bl __trunctfhf2
; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT:    bl __trunctfhf2
; CHECK-GI-NEXT:    ldp q1, q0, [sp] // 32-byte Folded Reload
; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
; CHECK-GI-NEXT:    mov v0.h[1], v1.h[0]
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT:    add sp, sp, #64
; CHECK-GI-NEXT:    ret
  %dst = fptrunc <2 x fp128> %val to <2 x half>
  ret <2 x half> %dst
}

define <2 x float> @vec_round_f32(<2 x fp128> %val) {
; CHECK-SD-LABEL: vec_round_f32:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    sub sp, sp, #48
; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT:    mov v0.16b, v1.16b
; CHECK-SD-NEXT:    bl __trunctfsf2
; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT:    bl __trunctfsf2
; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT:    add sp, sp, #48
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: vec_round_f32:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    sub sp, sp, #48
; CHECK-GI-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 48
; CHECK-GI-NEXT:    .cfi_offset w30, -16
; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT:    bl __trunctfsf2
; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT:    bl __trunctfsf2
; CHECK-GI-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
; CHECK-GI-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
; CHECK-GI-NEXT:    fmov d0, d1
; CHECK-GI-NEXT:    add sp, sp, #48
; CHECK-GI-NEXT:    ret
  %dst = fptrunc <2 x fp128> %val to <2 x float>
  ret <2 x float> %dst
}

define <2 x double> @vec_round_f64(<2 x fp128> %val) {
; CHECK-SD-LABEL: vec_round_f64:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    sub sp, sp, #48
; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT:    mov v0.16b, v1.16b
; CHECK-SD-NEXT:    bl __trunctfdf2
; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT:    bl __trunctfdf2
; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT:    add sp, sp, #48
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: vec_round_f64:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    sub sp, sp, #48
; CHECK-GI-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 48
; CHECK-GI-NEXT:    .cfi_offset w30, -16
; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT:    bl __trunctfdf2
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT:    bl __trunctfdf2
; CHECK-GI-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
; CHECK-GI-NEXT:    mov v0.16b, v1.16b
; CHECK-GI-NEXT:    add sp, sp, #48
; CHECK-GI-NEXT:    ret
  %dst = fptrunc <2 x fp128> %val to <2 x double>
  ret <2 x double> %dst
}

define <2 x fp128> @vec_extend_f16(<2 x half> %val) {
; CHECK-SD-LABEL: vec_extend_f16:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    sub sp, sp, #32
; CHECK-SD-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 killed $q0
; CHECK-SD-NEXT:    bl __extendhftf2
; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT:    mov h1, v1.h[1]
; CHECK-SD-NEXT:    fmov s0, s1
; CHECK-SD-NEXT:    bl __extendhftf2
; CHECK-SD-NEXT:    mov v1.16b, v0.16b
; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-SD-NEXT:    add sp, sp, #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: vec_extend_f16:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    sub sp, sp, #32
; CHECK-GI-NEXT:    str d8, [sp, #16] // 8-byte Folded Spill
; CHECK-GI-NEXT:    str x30, [sp, #24] // 8-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
; CHECK-GI-NEXT:    .cfi_offset w30, -8
; CHECK-GI-NEXT:    .cfi_offset b8, -16
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    mov h8, v0.h[1]
; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 killed $q0
; CHECK-GI-NEXT:    bl __extendhftf2
; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT:    fmov s0, s8
; CHECK-GI-NEXT:    bl __extendhftf2
; CHECK-GI-NEXT:    mov v1.16b, v0.16b
; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
; CHECK-GI-NEXT:    add sp, sp, #32
; CHECK-GI-NEXT:    ret
  %dst = fpext <2 x half> %val to <2 x fp128>
  ret <2 x fp128> %dst
}

define <2 x fp128> @vec_extend_f32(<2 x float> %val) {
; CHECK-SD-LABEL: vec_extend_f32:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    sub sp, sp, #32
; CHECK-SD-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
; CHECK-SD-NEXT:    bl __extendsftf2
; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT:    mov s1, v1.s[1]
; CHECK-SD-NEXT:    fmov s0, s1
; CHECK-SD-NEXT:    bl __extendsftf2
; CHECK-SD-NEXT:    mov v1.16b, v0.16b
; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-SD-NEXT:    add sp, sp, #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: vec_extend_f32:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    sub sp, sp, #32
; CHECK-GI-NEXT:    str d8, [sp, #16] // 8-byte Folded Spill
; CHECK-GI-NEXT:    str x30, [sp, #24] // 8-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
; CHECK-GI-NEXT:    .cfi_offset w30, -8
; CHECK-GI-NEXT:    .cfi_offset b8, -16
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT:    mov s8, v0.s[1]
; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
; CHECK-GI-NEXT:    bl __extendsftf2
; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT:    fmov s0, s8
; CHECK-GI-NEXT:    bl __extendsftf2
; CHECK-GI-NEXT:    mov v1.16b, v0.16b
; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
; CHECK-GI-NEXT:    add sp, sp, #32
; CHECK-GI-NEXT:    ret
  %dst = fpext <2 x float> %val to <2 x fp128>
  ret <2 x fp128> %dst
}

define <2 x fp128> @vec_extend_f64(<2 x double> %val) {
; CHECK-SD-LABEL: vec_extend_f64:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    sub sp, sp, #48
; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT:    bl __extenddftf2
; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT:    mov d0, v0.d[1]
; CHECK-SD-NEXT:    bl __extenddftf2
; CHECK-SD-NEXT:    mov v1.16b, v0.16b
; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
; CHECK-SD-NEXT:    add sp, sp, #48
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: vec_extend_f64:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    sub sp, sp, #32
; CHECK-GI-NEXT:    str d8, [sp, #16] // 8-byte Folded Spill
; CHECK-GI-NEXT:    str x30, [sp, #24] // 8-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
; CHECK-GI-NEXT:    .cfi_offset w30, -8
; CHECK-GI-NEXT:    .cfi_offset b8, -16
; CHECK-GI-NEXT:    mov d8, v0.d[1]
; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT:    bl __extenddftf2
; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT:    fmov d0, d8
; CHECK-GI-NEXT:    bl __extenddftf2
; CHECK-GI-NEXT:    mov v1.16b, v0.16b
; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
; CHECK-GI-NEXT:    add sp, sp, #32
; CHECK-GI-NEXT:    ret
  %dst = fpext <2 x double> %val to <2 x fp128>
  ret <2 x fp128> %dst
}

define <2 x fp128> @vec_neg_sub(<2 x fp128> %in) {
; CHECK-SD-LABEL: vec_neg_sub:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    sub sp, sp, #64
; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
; CHECK-SD-NEXT:    mov v1.16b, v0.16b
; CHECK-SD-NEXT:    adrp x8, .LCPI47_0
; CHECK-SD-NEXT:    ldr q0, [x8, :lo12:.LCPI47_0]
; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-SD-NEXT:    bl __subtf3
; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT:    bl __subtf3
; CHECK-SD-NEXT:    mov v1.16b, v0.16b
; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
; CHECK-SD-NEXT:    add sp, sp, #64
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: vec_neg_sub:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    sub sp, sp, #64
; CHECK-GI-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 64
; CHECK-GI-NEXT:    .cfi_offset w30, -16
; CHECK-GI-NEXT:    mov v2.16b, v0.16b
; CHECK-GI-NEXT:    adrp x8, .LCPI47_0
; CHECK-GI-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
; CHECK-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI47_0]
; CHECK-GI-NEXT:    mov v1.16b, v2.16b
; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT:    bl __subtf3
; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT:    bl __subtf3
; CHECK-GI-NEXT:    mov v1.16b, v0.16b
; CHECK-GI-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
; CHECK-GI-NEXT:    add sp, sp, #64
; CHECK-GI-NEXT:    ret
  %ret = fsub <2 x fp128> zeroinitializer, %in
  ret <2 x fp128> %ret
}

define <2 x fp128> @vec_neg(<2 x fp128> %in) {
; CHECK-SD-LABEL: vec_neg:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
; CHECK-SD-NEXT:    ldrb w8, [sp, #15]
; CHECK-SD-NEXT:    eor w8, w8, #0x80
; CHECK-SD-NEXT:    strb w8, [sp, #15]
; CHECK-SD-NEXT:    ldrb w8, [sp, #31]
; CHECK-SD-NEXT:    eor w8, w8, #0x80
; CHECK-SD-NEXT:    strb w8, [sp, #31]
; CHECK-SD-NEXT:    ldp q0, q1, [sp], #32
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: vec_neg:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    mov x8, v0.d[1]
; CHECK-GI-NEXT:    mov x9, v1.d[1]
; CHECK-GI-NEXT:    mov v0.d[0], v0.d[0]
; CHECK-GI-NEXT:    mov v1.d[0], v1.d[0]
; CHECK-GI-NEXT:    eor x8, x8, #0x8000000000000000
; CHECK-GI-NEXT:    eor x9, x9, #0x8000000000000000
; CHECK-GI-NEXT:    mov v0.d[1], x8
; CHECK-GI-NEXT:    mov v1.d[1], x9
; CHECK-GI-NEXT:    ret
  %ret = fneg <2 x fp128> %in
  ret <2 x fp128> %ret
}