llvm/llvm/test/CodeGen/Hexagon/autohvx/fp-to-int.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=hexagon -hexagon-hvx-widen=32 < %s | FileCheck %s

target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
target triple = "hexagon"

; f16 -> s8
; No widening
define void @f16s8_0(ptr %a0, ptr %a1) #0 {
; CHECK-LABEL: f16s8_0:
; CHECK:         .cfi_startproc
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    {
; CHECK-NEXT:     r3:2 = combine(##32768,#1)
; CHECK-NEXT:     r4 = #14
; CHECK-NEXT:     v1 = vmem(r0+#0)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2.h = vsplat(r3)
; CHECK-NEXT:     r6 = #5
; CHECK-NEXT:     v3.h = vasl(v0.h,r2)
; CHECK-NEXT:     v0.cur = vmem(r0+#1)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4.h = vsplat(r4)
; CHECK-NEXT:     v8.h = vasl(v1.h,r2)
; CHECK-NEXT:     v3.h = vsub(v3.h,v2.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     r5:4 = combine(#11,##32767)
; CHECK-NEXT:     v7 = vxor(v7,v7)
; CHECK-NEXT:     v8.h = vsub(v8.h,v2.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     r3 = #16
; CHECK-NEXT:     v5.h = vasl(v1.h,r6)
; CHECK-NEXT:     q1 = vcmp.gt(v7.h,v0.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v6.h = vsplat(r3)
; CHECK-NEXT:     v27.h = vasr(v3.h,r5)
; CHECK-NEXT:     v5 = vor(v5,v2)
; CHECK-NEXT:     q0 = vcmp.gt(v7.h,v1.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v9.h = vsplat(r4)
; CHECK-NEXT:     v8.h = vasr(v8.h,r5)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v26.h = vasl(v0.h,r6)
; CHECK-NEXT:     v0.h = vsub(v4.h,v27.h)
; CHECK-NEXT:     v4.h = vsub(v4.h,v8.h)
; CHECK-NEXT:     v28 = vmux(q0,v2,v9)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4.h = vmin(v4.h,v6.h)
; CHECK-NEXT:     v1 = vor(v26,v2)
; CHECK-NEXT:     v0.h = vmin(v0.h,v6.h)
; CHECK-NEXT:     v2 = vmux(q1,v2,v9)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q2 = vcmp.gt(v4.h,v7.h)
; CHECK-NEXT:     q3 = vcmp.gt(v0.h,v7.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v5.h = vlsr(v5.h,v4.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1.h = vlsr(v1.h,v0.h)
; CHECK-NEXT:     v29.h = vsub(v7.h,v5.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v30.h = vsub(v7.h,v1.h)
; CHECK-NEXT:     v5 = vmux(q0,v29,v5)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1 = vmux(q1,v30,v1)
; CHECK-NEXT:     v31 = vmux(q2,v5,v28)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1 = vmux(q3,v1,v2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0.b = vpack(v1.h,v31.h):sat
; CHECK-NEXT:     jumpr r31
; CHECK-NEXT:     vmem(r1+#0) = v0.new
; CHECK-NEXT:    }
  %v0 = load <128 x half>, ptr %a0, align 128
  %v1 = fptosi <128 x half> %v0 to <128 x i8>
  store <128 x i8> %v1, ptr %a1, align 128
  ret void
}

; Widen result
define void @f16s8_1(ptr %a0, ptr %a1) #0 {
; CHECK-LABEL: f16s8_1:
; CHECK:         .cfi_startproc
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    {
; CHECK-NEXT:     r7 = ##32768
; CHECK-NEXT:     r3:2 = combine(#5,#1)
; CHECK-NEXT:     v0 = vmem(r0+#0)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2.h = vsplat(r7)
; CHECK-NEXT:     v3.h = vasl(v0.h,r2)
; CHECK-NEXT:     r6 = #14
; CHECK-NEXT:     r5 = #11
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4.h = vsplat(r6)
; CHECK-NEXT:     r4 = #16
; CHECK-NEXT:     v6.h = vasl(v0.h,r3)
; CHECK-NEXT:     v3.h = vsub(v3.h,v2.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v5.h = vsplat(r4)
; CHECK-NEXT:     r3 = #32767
; CHECK-NEXT:     v29 = vor(v6,v2)
; CHECK-NEXT:     v1 = vxor(v1,v1)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v30.h = vsplat(r3)
; CHECK-NEXT:     r2 = #64
; CHECK-NEXT:     v3.h = vasr(v3.h,r5)
; CHECK-NEXT:     q0 = vcmp.gt(v1.h,v0.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q3 = vsetq(r2)
; CHECK-NEXT:     v3.h = vsub(v4.h,v3.h)
; CHECK-NEXT:     v2 = vmux(q0,v2,v30)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v3.h = vmin(v3.h,v5.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q2 = vcmp.gt(v3.h,v1.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4.h = vlsr(v29.h,v3.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v31.h = vsub(v1.h,v4.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0 = vmux(q0,v31,v4)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0 = vmux(q2,v0,v2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0.b = vpack(v0.h,v0.h):sat
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     jumpr r31
; CHECK-NEXT:     if (q3) vmem(r1+#0) = v0
; CHECK-NEXT:    }
  %v0 = load <64 x half>, ptr %a0, align 128
  %v1 = fptosi <64 x half> %v0 to <64 x i8>
  store <64 x i8> %v1, ptr %a1, align 128
  ret void
}

; f16 -> s16
; No widening
define void @f16s16_0(ptr %a0, ptr %a1) #0 {
; CHECK-LABEL: f16s16_0:
; CHECK:         .cfi_startproc
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    {
; CHECK-NEXT:     r7 = ##32768
; CHECK-NEXT:     r3:2 = combine(#5,#1)
; CHECK-NEXT:     v0 = vmem(r0+#0)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2.h = vsplat(r7)
; CHECK-NEXT:     v3.h = vasl(v0.h,r2)
; CHECK-NEXT:     r6 = #14
; CHECK-NEXT:     r5 = #11
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4.h = vsplat(r6)
; CHECK-NEXT:     r4 = #16
; CHECK-NEXT:     v6.h = vasl(v0.h,r3)
; CHECK-NEXT:     v3.h = vsub(v3.h,v2.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v5.h = vsplat(r4)
; CHECK-NEXT:     r2 = #32767
; CHECK-NEXT:     v29 = vor(v6,v2)
; CHECK-NEXT:     v1 = vxor(v1,v1)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v30.h = vsplat(r2)
; CHECK-NEXT:     v3.h = vasr(v3.h,r5)
; CHECK-NEXT:     q0 = vcmp.gt(v1.h,v0.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v3.h = vsub(v4.h,v3.h)
; CHECK-NEXT:     v2 = vmux(q0,v2,v30)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v3.h = vmin(v3.h,v5.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q3 = vcmp.gt(v3.h,v1.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4.h = vlsr(v29.h,v3.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v31.h = vsub(v1.h,v4.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0 = vmux(q0,v31,v4)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0 = vmux(q3,v0,v2)
; CHECK-NEXT:     jumpr r31
; CHECK-NEXT:     vmem(r1+#0) = v0.new
; CHECK-NEXT:    }
  %v0 = load <64 x half>, ptr %a0, align 128
  %v1 = fptosi <64 x half> %v0 to <64 x i16>
  store <64 x i16> %v1, ptr %a1, align 128
  ret void
}

; Widen input and result
define void @f16s16_1(ptr %a0, ptr %a1) #0 {
; CHECK-LABEL: f16s16_1:
; CHECK:         .cfi_startproc
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    {
; CHECK-NEXT:     r7 = ##32768
; CHECK-NEXT:     r3:2 = combine(#5,#1)
; CHECK-NEXT:     v0 = vmem(r0+#0)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2.h = vsplat(r7)
; CHECK-NEXT:     v3.h = vasl(v0.h,r2)
; CHECK-NEXT:     r6 = #14
; CHECK-NEXT:     r5 = #11
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4.h = vsplat(r6)
; CHECK-NEXT:     r4 = #16
; CHECK-NEXT:     v6.h = vasl(v0.h,r3)
; CHECK-NEXT:     v3.h = vsub(v3.h,v2.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v5.h = vsplat(r4)
; CHECK-NEXT:     r3 = #32767
; CHECK-NEXT:     v29 = vor(v6,v2)
; CHECK-NEXT:     v1 = vxor(v1,v1)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v30.h = vsplat(r3)
; CHECK-NEXT:     r2 = #64
; CHECK-NEXT:     v3.h = vasr(v3.h,r5)
; CHECK-NEXT:     q0 = vcmp.gt(v1.h,v0.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q3 = vsetq(r2)
; CHECK-NEXT:     v3.h = vsub(v4.h,v3.h)
; CHECK-NEXT:     v2 = vmux(q0,v2,v30)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v3.h = vmin(v3.h,v5.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q1 = vcmp.gt(v3.h,v1.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4.h = vlsr(v29.h,v3.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v31.h = vsub(v1.h,v4.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0 = vmux(q0,v31,v4)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0 = vmux(q1,v0,v2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     jumpr r31
; CHECK-NEXT:     if (q3) vmem(r1+#0) = v0
; CHECK-NEXT:    }
  %v0 = load <32 x half>, ptr %a0, align 128
  %v1 = fptosi <32 x half> %v0 to <32 x i16>
  store <32 x i16> %v1, ptr %a1, align 128
  ret void
}

; f16 -> s32
; No widening
define void @f16s32_0(ptr %a0, ptr %a1) #0 {
; CHECK-LABEL: f16s32_0:
; CHECK:         .cfi_startproc
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    {
; CHECK-NEXT:     r2 = #15360
; CHECK-NEXT:     r7 = #-4
; CHECK-NEXT:     r6 = #1
; CHECK-NEXT:     v1 = vmem(r0+#0)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0.h = vsplat(r2)
; CHECK-NEXT:     r4 = #32
; CHECK-NEXT:     r5 = #8
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v7 = vsplat(r4)
; CHECK-NEXT:     r2 = ##2147483647
; CHECK-NEXT:     v24 = vxor(v24,v24)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v25 = vsplat(r2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1:0.qf32 = vmpy(v1.hf,v0.hf)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0.sf = v0.qf32
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1.sf = v1.qf32
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     r7 = ##-2147483648
; CHECK-NEXT:     v1:0 = vshuff(v1,v0,r7)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2 = vsplat(r7)
; CHECK-NEXT:     q0 = vcmp.gt(v24.w,v1.w)
; CHECK-NEXT:     q1 = vcmp.gt(v24.w,v0.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v3.w = vasl(v0.w,r6)
; CHECK-NEXT:     v28 = vmux(q0,v2,v25)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     r7:6 = combine(#30,#24)
; CHECK-NEXT:     v4.w = vasl(v1.w,r6)
; CHECK-NEXT:     v3.w = vsub(v3.w,v2.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v6 = vsplat(r7)
; CHECK-NEXT:     v5.w = vasl(v0.w,r5)
; CHECK-NEXT:     v4.w = vsub(v4.w,v2.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v3.w = vasr(v3.w,r6)
; CHECK-NEXT:     v5 = vor(v5,v2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4.w = vasr(v4.w,r6)
; CHECK-NEXT:     v3.w = vsub(v6.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v8.w = vasl(v1.w,r5)
; CHECK-NEXT:     v4.w = vsub(v6.w,v4.w)
; CHECK-NEXT:     v3.w = vmin(v3.w,v7.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v8 = vor(v8,v2)
; CHECK-NEXT:     v4.w = vmin(v4.w,v7.w)
; CHECK-NEXT:     v2 = vmux(q1,v2,v25)
; CHECK-NEXT:     q3 = vcmp.gt(v3.w,v24.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v26.w = vlsr(v5.w,v3.w)
; CHECK-NEXT:     q2 = vcmp.gt(v4.w,v24.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v27.w = vlsr(v8.w,v4.w)
; CHECK-NEXT:     v29.w = vsub(v24.w,v26.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v9.w = vsub(v24.w,v27.w)
; CHECK-NEXT:     v1 = vmux(q1,v29,v26)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v30 = vmux(q0,v9,v27)
; CHECK-NEXT:     v31 = vmux(q3,v1,v2)
; CHECK-NEXT:     vmem(r1+#0) = v31.new
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0 = vmux(q2,v30,v28)
; CHECK-NEXT:     jumpr r31
; CHECK-NEXT:     vmem(r1+#1) = v0.new
; CHECK-NEXT:    }
  %v0 = load <64 x half>, ptr %a0, align 128
  %v1 = fptosi <64 x half> %v0 to <64 x i32>
  store <64 x i32> %v1, ptr %a1, align 128
  ret void
}

; Widen input
define void @f16s32_1(ptr %a0, ptr %a1) #0 {
; CHECK-LABEL: f16s32_1:
; CHECK:         .cfi_startproc
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    {
; CHECK-NEXT:     r4 = #15360
; CHECK-NEXT:     r7 = #-4
; CHECK-NEXT:     v0 = vmem(r0+#0)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1.h = vsplat(r4)
; CHECK-NEXT:     r2 = ##-2147483648
; CHECK-NEXT:     r3 = #1
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v3 = vsplat(r2)
; CHECK-NEXT:     r5:4 = combine(#8,#30)
; CHECK-NEXT:     r6 = #24
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4 = vsplat(r4)
; CHECK-NEXT:     r2 = ##2147483647
; CHECK-NEXT:     r4 = #32
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1:0.qf32 = vmpy(v0.hf,v1.hf)
; CHECK-NEXT:     v2 = vxor(v2,v2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v5 = vsplat(r4)
; CHECK-NEXT:     v30 = vsplat(r2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0.sf = v0.qf32
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1.sf = v1.qf32
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1:0 = vshuff(v1,v0,r7)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q0 = vcmp.gt(v2.w,v0.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1.w = vasl(v0.w,r3)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v6.w = vasl(v0.w,r5)
; CHECK-NEXT:     v1.w = vsub(v1.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v29 = vor(v6,v3)
; CHECK-NEXT:     v3 = vmux(q0,v3,v30)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1.w = vasr(v1.w,r6)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1.w = vsub(v4.w,v1.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1.w = vmin(v1.w,v5.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q3 = vcmp.gt(v1.w,v2.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4.w = vlsr(v29.w,v1.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v31.w = vsub(v2.w,v4.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0 = vmux(q0,v31,v4)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0 = vmux(q3,v0,v3)
; CHECK-NEXT:     jumpr r31
; CHECK-NEXT:     vmem(r1+#0) = v0.new
; CHECK-NEXT:    }
  %v0 = load <32 x half>, ptr %a0, align 128
  %v1 = fptosi <32 x half> %v0 to <32 x i32>
  store <32 x i32> %v1, ptr %a1, align 128
  ret void
}

; f32 -> s8
; No widening
define void @f32s8_0(ptr %a0, ptr %a1) #0 {
; CHECK-LABEL: f32s8_0:
; CHECK:         .cfi_startproc
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    {
; CHECK-NEXT:     r4 = ##-2147483648
; CHECK-NEXT:     r3:2 = combine(#1,#8)
; CHECK-NEXT:     v5 = vmem(r0+#0)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1 = vsplat(r4)
; CHECK-NEXT:     r7 = #30
; CHECK-NEXT:     r6 = #24
; CHECK-NEXT:     v2 = vmem(r0+#2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v10 = vsplat(r7)
; CHECK-NEXT:     r5 = #32
; CHECK-NEXT:     v8.w = vasl(v4.w,r3)
; CHECK-NEXT:     v4.cur = vmem(r0+#1)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v7.w = vasl(v5.w,r3)
; CHECK-NEXT:     v12 = vxor(v12,v12)
; CHECK-NEXT:     v8.w = vsub(v8.w,v1.w)
; CHECK-NEXT:     v0 = vmem(r0+#3)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v13 = vsplat(r5)
; CHECK-NEXT:     v11.w = vasl(v0.w,r3)
; CHECK-NEXT:     v7.w = vsub(v7.w,v1.w)
; CHECK-NEXT:     q0 = vcmp.gt(v12.w,v5.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v9.w = vasl(v2.w,r3)
; CHECK-NEXT:     q1 = vcmp.gt(v12.w,v4.w)
; CHECK-NEXT:     v11.w = vsub(v11.w,v1.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     r3 = ##2147483647
; CHECK-NEXT:     r7 = #64
; CHECK-NEXT:     v8.w = vasr(v8.w,r6)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v22 = vsplat(r3)
; CHECK-NEXT:     v7.w = vasr(v7.w,r6)
; CHECK-NEXT:     v19.w = vsub(v9.w,v1.w)
; CHECK-NEXT:     v8.w = vsub(v10.w,v8.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v20.w = vasl(v4.w,r2)
; CHECK-NEXT:     v27 = vmux(q1,v1,v22)
; CHECK-NEXT:     v25 = vmux(q0,v1,v22)
; CHECK-NEXT:     v7.w = vsub(v10.w,v7.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v6.w = vasl(v5.w,r2)
; CHECK-NEXT:     v8.w = vmin(v8.w,v13.w)
; CHECK-NEXT:     v9 = vor(v20,v1)
; CHECK-NEXT:     v21.w = vmin(v7.w,v13.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v5.w = vasr(v19.w,r6)
; CHECK-NEXT:     q3 = vcmp.gt(v8.w,v12.w)
; CHECK-NEXT:     v6 = vor(v6,v1)
; CHECK-NEXT:     q2 = vcmp.gt(v21.w,v12.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v11.w = vasr(v11.w,r6)
; CHECK-NEXT:     v5.w = vsub(v10.w,v5.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v3.w = vasl(v2.w,r2)
; CHECK-NEXT:     v10.w = vsub(v10.w,v11.w)
; CHECK-NEXT:     v5.w = vmin(v5.w,v13.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v23.w = vasl(v0.w,r2)
; CHECK-NEXT:     v3 = vor(v3,v1)
; CHECK-NEXT:     v10.w = vmin(v10.w,v13.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v8.w = vlsr(v9.w,v8.w)
; CHECK-NEXT:     v4 = vor(v23,v1)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v6.w = vlsr(v6.w,v21.w)
; CHECK-NEXT:     v26.w = vsub(v12.w,v8.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v3.w = vlsr(v3.w,v5.w)
; CHECK-NEXT:     v24.w = vsub(v12.w,v6.w)
; CHECK-NEXT:     v8 = vmux(q1,v26,v8)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4.w = vlsr(v4.w,v10.w)
; CHECK-NEXT:     v6 = vmux(q0,v24,v6)
; CHECK-NEXT:     q0 = vcmp.gt(v12.w,v2.w)
; CHECK-NEXT:     v28.w = vsub(v12.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2 = vmux(q3,v8,v27)
; CHECK-NEXT:     v29.w = vsub(v12.w,v4.w)
; CHECK-NEXT:     q3 = vcmp.gt(v12.w,v0.w)
; CHECK-NEXT:     v6 = vmux(q2,v6,v25)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v30 = vmux(q0,v1,v22)
; CHECK-NEXT:     v3 = vmux(q0,v28,v3)
; CHECK-NEXT:     q2 = vcmp.gt(v5.w,v12.w)
; CHECK-NEXT:     v4 = vmux(q3,v29,v4)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2.h = vpack(v2.w,v6.w):sat
; CHECK-NEXT:     v1 = vmux(q3,v1,v22)
; CHECK-NEXT:     q3 = vcmp.gt(v10.w,v12.w)
; CHECK-NEXT:     v0 = vmux(q2,v3,v30)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1 = vmux(q3,v4,v1)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v3.h = vpack(v1.w,v0.w):sat
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0.h = vpack(v1.w,v0.w):sat
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v31.b = vpack(v3.h,v2.h):sat
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0.b = vpack(v3.h,v0.h):sat
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1:0 = vshuff(v0,v31,r7)
; CHECK-NEXT:     jumpr r31
; CHECK-NEXT:     vmem(r1+#0) = v0.new
; CHECK-NEXT:    }
  %v0 = load <128 x float>, ptr %a0, align 128
  %v1 = fptosi <128 x float> %v0 to <128 x i8>
  store <128 x i8> %v1, ptr %a1, align 128
  ret void
}

; Widen result #1
define void @f32s8_1(ptr %a0, ptr %a1) #0 {
; CHECK-LABEL: f32s8_1:
; CHECK:         .cfi_startproc
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    {
; CHECK-NEXT:     r3:2 = combine(##-2147483648,#8)
; CHECK-NEXT:     r4 = #1
; CHECK-NEXT:     v1 = vmem(r0+#0)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v3 = vsplat(r3)
; CHECK-NEXT:     r5 = #30
; CHECK-NEXT:     v4.w = vasl(v0.w,r4)
; CHECK-NEXT:     v0.cur = vmem(r0+#1)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v5.w = vasl(v1.w,r4)
; CHECK-NEXT:     v4.w = vsub(v4.w,v3.w)
; CHECK-NEXT:     r6 = #24
; CHECK-NEXT:     r4 = #32
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v6 = vsplat(r5)
; CHECK-NEXT:     v7 = vsplat(r4)
; CHECK-NEXT:     v2.w = vasl(v1.w,r2)
; CHECK-NEXT:     v5.w = vsub(v5.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4.w = vasr(v4.w,r6)
; CHECK-NEXT:     v26 = vxor(v26,v26)
; CHECK-NEXT:     v2 = vor(v2,v3)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     r3 = ##2147483647
; CHECK-NEXT:     v5.w = vasr(v5.w,r6)
; CHECK-NEXT:     q0 = vcmp.gt(v26.w,v1.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v27 = vsplat(r3)
; CHECK-NEXT:     v4.w = vsub(v6.w,v4.w)
; CHECK-NEXT:     q2 = vcmp.gt(v26.w,v0.w)
; CHECK-NEXT:     v5.w = vsub(v6.w,v5.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v8.w = vasl(v0.w,r2)
; CHECK-NEXT:     v4.w = vmin(v4.w,v7.w)
; CHECK-NEXT:     v30 = vmux(q0,v3,v27)
; CHECK-NEXT:     v5.w = vmin(v5.w,v7.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v25 = vor(v8,v3)
; CHECK-NEXT:     v1 = vmux(q2,v3,v27)
; CHECK-NEXT:     q3 = vcmp.gt(v4.w,v26.w)
; CHECK-NEXT:     q1 = vcmp.gt(v5.w,v26.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     r2 = #64
; CHECK-NEXT:     v2.w = vlsr(v2.w,v5.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v28.w = vlsr(v25.w,v4.w)
; CHECK-NEXT:     v29.w = vsub(v26.w,v2.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v6.w = vsub(v26.w,v28.w)
; CHECK-NEXT:     v0 = vmux(q0,v29,v2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v31 = vmux(q2,v6,v28)
; CHECK-NEXT:     v0 = vmux(q1,v0,v30)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q3 = vsetq(r2)
; CHECK-NEXT:     v1 = vmux(q3,v31,v1)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2.h = vpack(v1.w,v0.w):sat
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0.h = vpack(v1.w,v0.w):sat
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0.b = vpack(v2.h,v0.h):sat
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     jumpr r31
; CHECK-NEXT:     if (q3) vmem(r1+#0) = v0
; CHECK-NEXT:    }
  %v0 = load <64 x float>, ptr %a0, align 128
  %v1 = fptosi <64 x float> %v0 to <64 x i8>
  store <64 x i8> %v1, ptr %a1, align 128
  ret void
}

; Widen result #2
define void @f32s8_2(ptr %a0, ptr %a1) #0 {
; CHECK-LABEL: f32s8_2:
; CHECK:         .cfi_startproc
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    {
; CHECK-NEXT:     r7 = ##-2147483648
; CHECK-NEXT:     r3:2 = combine(#30,#1)
; CHECK-NEXT:     v0 = vmem(r0+#0)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2 = vsplat(r7)
; CHECK-NEXT:     r5:4 = combine(#8,#24)
; CHECK-NEXT:     r6 = #32
; CHECK-NEXT:     v3.w = vasl(v0.w,r2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4 = vsplat(r3)
; CHECK-NEXT:     v5 = vsplat(r6)
; CHECK-NEXT:     v6.w = vasl(v0.w,r5)
; CHECK-NEXT:     v3.w = vsub(v3.w,v2.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1 = vxor(v1,v1)
; CHECK-NEXT:     v29 = vor(v6,v2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q3 = vsetq(r6)
; CHECK-NEXT:     v3.w = vasr(v3.w,r4)
; CHECK-NEXT:     q0 = vcmp.gt(v1.w,v0.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     r4 = ##2147483647
; CHECK-NEXT:     v3.w = vsub(v4.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v30 = vsplat(r4)
; CHECK-NEXT:     v3.w = vmin(v3.w,v5.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2 = vmux(q0,v2,v30)
; CHECK-NEXT:     q2 = vcmp.gt(v3.w,v1.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4.w = vlsr(v29.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v31.w = vsub(v1.w,v4.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0 = vmux(q0,v31,v4)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0 = vmux(q2,v0,v2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1.h = vpack(v1.w,v0.w):sat
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0.h = vpack(v0.w,v0.w):sat
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0.b = vpack(v1.h,v0.h):sat
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     jumpr r31
; CHECK-NEXT:     if (q3) vmem(r1+#0) = v0
; CHECK-NEXT:    }
  %v0 = load <32 x float>, ptr %a0, align 128
  %v1 = fptosi <32 x float> %v0 to <32 x i8>
  store <32 x i8> %v1, ptr %a1, align 128
  ret void
}

; f32 -> s16
; No widening
define void @f32s16_0(ptr %a0, ptr %a1) #0 {
; CHECK-LABEL: f32s16_0:
; CHECK:         .cfi_startproc
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    {
; CHECK-NEXT:     r3:2 = combine(##-2147483648,#1)
; CHECK-NEXT:     r4 = #30
; CHECK-NEXT:     v1 = vmem(r0+#0)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2 = vsplat(r3)
; CHECK-NEXT:     r6 = #8
; CHECK-NEXT:     v3.w = vasl(v0.w,r2)
; CHECK-NEXT:     v0.cur = vmem(r0+#1)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4 = vsplat(r4)
; CHECK-NEXT:     v8.w = vasl(v1.w,r2)
; CHECK-NEXT:     v3.w = vsub(v3.w,v2.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     r5:4 = combine(#24,##2147483647)
; CHECK-NEXT:     v7 = vxor(v7,v7)
; CHECK-NEXT:     v8.w = vsub(v8.w,v2.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     r3 = #32
; CHECK-NEXT:     v5.w = vasl(v1.w,r6)
; CHECK-NEXT:     q1 = vcmp.gt(v7.w,v0.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v6 = vsplat(r3)
; CHECK-NEXT:     v27.w = vasr(v3.w,r5)
; CHECK-NEXT:     v5 = vor(v5,v2)
; CHECK-NEXT:     q0 = vcmp.gt(v7.w,v1.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v9 = vsplat(r4)
; CHECK-NEXT:     v8.w = vasr(v8.w,r5)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v26.w = vasl(v0.w,r6)
; CHECK-NEXT:     v0.w = vsub(v4.w,v27.w)
; CHECK-NEXT:     v4.w = vsub(v4.w,v8.w)
; CHECK-NEXT:     v28 = vmux(q0,v2,v9)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4.w = vmin(v4.w,v6.w)
; CHECK-NEXT:     v1 = vor(v26,v2)
; CHECK-NEXT:     v0.w = vmin(v0.w,v6.w)
; CHECK-NEXT:     v2 = vmux(q1,v2,v9)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q2 = vcmp.gt(v4.w,v7.w)
; CHECK-NEXT:     q3 = vcmp.gt(v0.w,v7.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v5.w = vlsr(v5.w,v4.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1.w = vlsr(v1.w,v0.w)
; CHECK-NEXT:     v29.w = vsub(v7.w,v5.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v30.w = vsub(v7.w,v1.w)
; CHECK-NEXT:     v5 = vmux(q0,v29,v5)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1 = vmux(q1,v30,v1)
; CHECK-NEXT:     v31 = vmux(q2,v5,v28)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1 = vmux(q3,v1,v2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0.h = vpack(v1.w,v31.w):sat
; CHECK-NEXT:     jumpr r31
; CHECK-NEXT:     vmem(r1+#0) = v0.new
; CHECK-NEXT:    }
  %v0 = load <64 x float>, ptr %a0, align 128
  %v1 = fptosi <64 x float> %v0 to <64 x i16>
  store <64 x i16> %v1, ptr %a1, align 128
  ret void
}

; Widen result
define void @f32s16_1(ptr %a0, ptr %a1) #0 {
; CHECK-LABEL: f32s16_1:
; CHECK:         .cfi_startproc
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    {
; CHECK-NEXT:     r7 = ##-2147483648
; CHECK-NEXT:     r3:2 = combine(#8,#1)
; CHECK-NEXT:     v0 = vmem(r0+#0)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2 = vsplat(r7)
; CHECK-NEXT:     v3.w = vasl(v0.w,r2)
; CHECK-NEXT:     r6 = #30
; CHECK-NEXT:     r5 = #24
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4 = vsplat(r6)
; CHECK-NEXT:     r4 = #32
; CHECK-NEXT:     v6.w = vasl(v0.w,r3)
; CHECK-NEXT:     v3.w = vsub(v3.w,v2.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v5 = vsplat(r4)
; CHECK-NEXT:     v29 = vor(v6,v2)
; CHECK-NEXT:     v1 = vxor(v1,v1)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     r3 = ##2147483647
; CHECK-NEXT:     r2 = #64
; CHECK-NEXT:     v3.w = vasr(v3.w,r5)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v30 = vsplat(r3)
; CHECK-NEXT:     q3 = vsetq(r2)
; CHECK-NEXT:     q0 = vcmp.gt(v1.w,v0.w)
; CHECK-NEXT:     v3.w = vsub(v4.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2 = vmux(q0,v2,v30)
; CHECK-NEXT:     v3.w = vmin(v3.w,v5.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q2 = vcmp.gt(v3.w,v1.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4.w = vlsr(v29.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v31.w = vsub(v1.w,v4.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0 = vmux(q0,v31,v4)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0 = vmux(q2,v0,v2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0.h = vpack(v0.w,v0.w):sat
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     jumpr r31
; CHECK-NEXT:     if (q3) vmem(r1+#0) = v0
; CHECK-NEXT:    }
  %v0 = load <32 x float>, ptr %a0, align 128
  %v1 = fptosi <32 x float> %v0 to <32 x i16>
  store <32 x i16> %v1, ptr %a1, align 128
  ret void
}

; f32 -> s32
; No widening
define void @f32s32_0(ptr %a0, ptr %a1) #0 {
; CHECK-LABEL: f32s32_0:
; CHECK:         .cfi_startproc
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    {
; CHECK-NEXT:     r7 = ##-2147483648
; CHECK-NEXT:     r3:2 = combine(#8,#1)
; CHECK-NEXT:     v0 = vmem(r0+#0)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2 = vsplat(r7)
; CHECK-NEXT:     v3.w = vasl(v0.w,r2)
; CHECK-NEXT:     r6 = #30
; CHECK-NEXT:     r5 = #24
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4 = vsplat(r6)
; CHECK-NEXT:     r4 = #32
; CHECK-NEXT:     v6.w = vasl(v0.w,r3)
; CHECK-NEXT:     v3.w = vsub(v3.w,v2.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v5 = vsplat(r4)
; CHECK-NEXT:     v29 = vor(v6,v2)
; CHECK-NEXT:     v1 = vxor(v1,v1)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     r2 = ##2147483647
; CHECK-NEXT:     v3.w = vasr(v3.w,r5)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v30 = vsplat(r2)
; CHECK-NEXT:     q0 = vcmp.gt(v1.w,v0.w)
; CHECK-NEXT:     v3.w = vsub(v4.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2 = vmux(q0,v2,v30)
; CHECK-NEXT:     v3.w = vmin(v3.w,v5.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q3 = vcmp.gt(v3.w,v1.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4.w = vlsr(v29.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v31.w = vsub(v1.w,v4.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0 = vmux(q0,v31,v4)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0 = vmux(q3,v0,v2)
; CHECK-NEXT:     jumpr r31
; CHECK-NEXT:     vmem(r1+#0) = v0.new
; CHECK-NEXT:    }
  %v0 = load <32 x float>, ptr %a0, align 128
  %v1 = fptosi <32 x float> %v0 to <32 x i32>
  store <32 x i32> %v1, ptr %a1, align 128
  ret void
}

; Widen input and result
define void @f32s32_1(ptr %a0, ptr %a1) #0 {
; CHECK-LABEL: f32s32_1:
; CHECK:         .cfi_startproc
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    {
; CHECK-NEXT:     r7 = ##-2147483648
; CHECK-NEXT:     r3:2 = combine(#8,#1)
; CHECK-NEXT:     v0 = vmem(r0+#0)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2 = vsplat(r7)
; CHECK-NEXT:     v3.w = vasl(v0.w,r2)
; CHECK-NEXT:     r6 = #30
; CHECK-NEXT:     r5 = #24
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4 = vsplat(r6)
; CHECK-NEXT:     r4 = #32
; CHECK-NEXT:     v6.w = vasl(v0.w,r3)
; CHECK-NEXT:     v3.w = vsub(v3.w,v2.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v5 = vsplat(r4)
; CHECK-NEXT:     v29 = vor(v6,v2)
; CHECK-NEXT:     v1 = vxor(v1,v1)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     r3 = ##2147483647
; CHECK-NEXT:     r2 = #64
; CHECK-NEXT:     v3.w = vasr(v3.w,r5)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v30 = vsplat(r3)
; CHECK-NEXT:     q3 = vsetq(r2)
; CHECK-NEXT:     q0 = vcmp.gt(v1.w,v0.w)
; CHECK-NEXT:     v3.w = vsub(v4.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2 = vmux(q0,v2,v30)
; CHECK-NEXT:     v3.w = vmin(v3.w,v5.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q1 = vcmp.gt(v3.w,v1.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4.w = vlsr(v29.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v31.w = vsub(v1.w,v4.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0 = vmux(q0,v31,v4)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0 = vmux(q1,v0,v2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     jumpr r31
; CHECK-NEXT:     if (q3) vmem(r1+#0) = v0
; CHECK-NEXT:    }
  %v0 = load <16 x float>, ptr %a0, align 128
  %v1 = fptosi <16 x float> %v0 to <16 x i32>
  store <16 x i32> %v1, ptr %a1, align 128
  ret void
}


; f16 -> u8
; No widening
define void @f16u8_0(ptr %a0, ptr %a1) #0 {
; CHECK-LABEL: f16u8_0:
; CHECK:         .cfi_startproc
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    {
; CHECK-NEXT:     r3:2 = combine(##32768,#1)
; CHECK-NEXT:     r4 = #14
; CHECK-NEXT:     v0 = vmem(r0+#1)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2.h = vsplat(r3)
; CHECK-NEXT:     r7:6 = combine(#11,#16)
; CHECK-NEXT:     v3.h = vasl(v0.h,r2)
; CHECK-NEXT:     v1 = vmem(r0+#0)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v6.h = vsplat(r4)
; CHECK-NEXT:     r5 = #5
; CHECK-NEXT:     v4.h = vasl(v1.h,r2)
; CHECK-NEXT:     v3.h = vsub(v3.h,v2.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v7.h = vsplat(r6)
; CHECK-NEXT:     v5.h = vasl(v1.h,r5)
; CHECK-NEXT:     v4.h = vsub(v4.h,v2.h)
; CHECK-NEXT:     v28 = vxor(v28,v28)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     r2 = #32767
; CHECK-NEXT:     v3.h = vasr(v3.h,r7)
; CHECK-NEXT:     v5 = vor(v5,v2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v29.h = vsplat(r2)
; CHECK-NEXT:     v4.h = vasr(v4.h,r7)
; CHECK-NEXT:     q2 = vcmp.gt(v28.h,v1.h)
; CHECK-NEXT:     v3.h = vsub(v6.h,v3.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v8.h = vasl(v0.h,r5)
; CHECK-NEXT:     q3 = vcmp.gt(v28.h,v0.h)
; CHECK-NEXT:     v4.h = vsub(v6.h,v4.h)
; CHECK-NEXT:     v3.h = vmin(v3.h,v7.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4.h = vmin(v4.h,v7.h)
; CHECK-NEXT:     v2 = vor(v8,v2)
; CHECK-NEXT:     q1 = vcmp.gt(v28.h,v3.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q0 = vcmp.gt(v28.h,v4.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v5.h = vlsr(v5.h,v4.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2.h = vlsr(v2.h,v3.h)
; CHECK-NEXT:     v30 = vmux(q0,v29,v5)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v31 = vmux(q1,v29,v2)
; CHECK-NEXT:     v0 = vmux(q2,v28,v30)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1 = vmux(q3,v28,v31)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0.ub = vpack(v1.h,v0.h):sat
; CHECK-NEXT:     jumpr r31
; CHECK-NEXT:     vmem(r1+#0) = v0.new
; CHECK-NEXT:    }
  %v0 = load <128 x half>, ptr %a0, align 128
  %v1 = fptoui <128 x half> %v0 to <128 x i8>
  store <128 x i8> %v1, ptr %a1, align 128
  ret void
}

; Widen result
define void @f16u8_1(ptr %a0, ptr %a1) #0 {
; CHECK-LABEL: f16u8_1:
; CHECK:         .cfi_startproc
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    {
; CHECK-NEXT:     r7 = ##32768
; CHECK-NEXT:     r3:2 = combine(#5,#1)
; CHECK-NEXT:     v0 = vmem(r0+#0)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2.h = vsplat(r7)
; CHECK-NEXT:     v3.h = vasl(v0.h,r2)
; CHECK-NEXT:     r6 = #14
; CHECK-NEXT:     r5 = #11
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4.h = vsplat(r6)
; CHECK-NEXT:     r4 = #16
; CHECK-NEXT:     v6.h = vasl(v0.h,r3)
; CHECK-NEXT:     v3.h = vsub(v3.h,v2.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v5.h = vsplat(r4)
; CHECK-NEXT:     r3 = #32767
; CHECK-NEXT:     v2 = vor(v6,v2)
; CHECK-NEXT:     v1 = vxor(v1,v1)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v30.h = vsplat(r3)
; CHECK-NEXT:     r2 = #64
; CHECK-NEXT:     v3.h = vasr(v3.h,r5)
; CHECK-NEXT:     q1 = vcmp.gt(v1.h,v0.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q3 = vsetq(r2)
; CHECK-NEXT:     v3.h = vsub(v4.h,v3.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v3.h = vmin(v3.h,v5.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q0 = vcmp.gt(v1.h,v3.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2.h = vlsr(v2.h,v3.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v31 = vmux(q0,v30,v2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0 = vmux(q1,v1,v31)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0.ub = vpack(v0.h,v0.h):sat
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     jumpr r31
; CHECK-NEXT:     if (q3) vmem(r1+#0) = v0
; CHECK-NEXT:    }
  %v0 = load <64 x half>, ptr %a0, align 128
  %v1 = fptoui <64 x half> %v0 to <64 x i8>
  store <64 x i8> %v1, ptr %a1, align 128
  ret void
}

; f16 -> u16
; No widening
define void @f16u16_0(ptr %a0, ptr %a1) #0 {
; CHECK-LABEL: f16u16_0:
; CHECK:         .cfi_startproc
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    {
; CHECK-NEXT:     r7 = ##32768
; CHECK-NEXT:     r3:2 = combine(#5,#1)
; CHECK-NEXT:     v0 = vmem(r0+#0)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2.h = vsplat(r7)
; CHECK-NEXT:     v3.h = vasl(v0.h,r2)
; CHECK-NEXT:     r6 = #14
; CHECK-NEXT:     r5 = #11
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4.h = vsplat(r6)
; CHECK-NEXT:     r4 = #16
; CHECK-NEXT:     v6.h = vasl(v0.h,r3)
; CHECK-NEXT:     v3.h = vsub(v3.h,v2.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v5.h = vsplat(r4)
; CHECK-NEXT:     r2 = #32767
; CHECK-NEXT:     v2 = vor(v6,v2)
; CHECK-NEXT:     v1 = vxor(v1,v1)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v30.h = vsplat(r2)
; CHECK-NEXT:     v3.h = vasr(v3.h,r5)
; CHECK-NEXT:     q1 = vcmp.gt(v1.h,v0.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v3.h = vsub(v4.h,v3.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v3.h = vmin(v3.h,v5.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q0 = vcmp.gt(v1.h,v3.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2.h = vlsr(v2.h,v3.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v31 = vmux(q0,v30,v2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0 = vmux(q1,v1,v31)
; CHECK-NEXT:     jumpr r31
; CHECK-NEXT:     vmem(r1+#0) = v0.new
; CHECK-NEXT:    }
  %v0 = load <64 x half>, ptr %a0, align 128
  %v1 = fptoui <64 x half> %v0 to <64 x i16>
  store <64 x i16> %v1, ptr %a1, align 128
  ret void
}

; Widen input and result
define void @f16u16_1(ptr %a0, ptr %a1) #0 {
; CHECK-LABEL: f16u16_1:
; CHECK:         .cfi_startproc
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    {
; CHECK-NEXT:     r7 = ##32768
; CHECK-NEXT:     r3:2 = combine(#5,#1)
; CHECK-NEXT:     v0 = vmem(r0+#0)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2.h = vsplat(r7)
; CHECK-NEXT:     v3.h = vasl(v0.h,r2)
; CHECK-NEXT:     r6 = #14
; CHECK-NEXT:     r5 = #11
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4.h = vsplat(r6)
; CHECK-NEXT:     r4 = #16
; CHECK-NEXT:     v6.h = vasl(v0.h,r3)
; CHECK-NEXT:     v3.h = vsub(v3.h,v2.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v5.h = vsplat(r4)
; CHECK-NEXT:     r3 = #32767
; CHECK-NEXT:     v2 = vor(v6,v2)
; CHECK-NEXT:     v1 = vxor(v1,v1)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v30.h = vsplat(r3)
; CHECK-NEXT:     r2 = #64
; CHECK-NEXT:     v3.h = vasr(v3.h,r5)
; CHECK-NEXT:     q1 = vcmp.gt(v1.h,v0.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q3 = vsetq(r2)
; CHECK-NEXT:     v3.h = vsub(v4.h,v3.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v3.h = vmin(v3.h,v5.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q0 = vcmp.gt(v1.h,v3.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2.h = vlsr(v2.h,v3.h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v31 = vmux(q0,v30,v2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0 = vmux(q1,v1,v31)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     jumpr r31
; CHECK-NEXT:     if (q3) vmem(r1+#0) = v0
; CHECK-NEXT:    }
  %v0 = load <32 x half>, ptr %a0, align 128
  %v1 = fptoui <32 x half> %v0 to <32 x i16>
  store <32 x i16> %v1, ptr %a1, align 128
  ret void
}

; f16 -> u32
; No widening
define void @f16u32_0(ptr %a0, ptr %a1) #0 {
; CHECK-LABEL: f16u32_0:
; CHECK:         .cfi_startproc
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    {
; CHECK-NEXT:     r2 = #15360
; CHECK-NEXT:     r7 = #-4
; CHECK-NEXT:     v0 = vmem(r0+#0)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1.h = vsplat(r2)
; CHECK-NEXT:     r4 = ##-2147483648
; CHECK-NEXT:     r3:2 = combine(#30,#1)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2 = vsplat(r4)
; CHECK-NEXT:     r4 = #32
; CHECK-NEXT:     r6 = #24
; CHECK-NEXT:     r0 = #8
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v6 = vsplat(r3)
; CHECK-NEXT:     v26 = vxor(v26,v26)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1:0.qf32 = vmpy(v0.hf,v1.hf)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v7 = vsplat(r4)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0.sf = v0.qf32
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1.sf = v1.qf32
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1:0 = vshuff(v1,v0,r7)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q1 = vcmp.gt(v26.w,v1.w)
; CHECK-NEXT:     q3 = vcmp.gt(v26.w,v0.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v3.w = vasl(v1.w,r2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     r2 = ##2147483647
; CHECK-NEXT:     v4.w = vasl(v0.w,r2)
; CHECK-NEXT:     v3.w = vsub(v3.w,v2.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v27 = vsplat(r2)
; CHECK-NEXT:     v5.w = vasl(v1.w,r0)
; CHECK-NEXT:     v4.w = vsub(v4.w,v2.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v3.w = vasr(v3.w,r6)
; CHECK-NEXT:     v5 = vor(v5,v2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4.w = vasr(v4.w,r6)
; CHECK-NEXT:     v3.w = vsub(v6.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v8.w = vasl(v0.w,r0)
; CHECK-NEXT:     v4.w = vsub(v6.w,v4.w)
; CHECK-NEXT:     v3.w = vmin(v3.w,v7.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4.w = vmin(v4.w,v7.w)
; CHECK-NEXT:     v2 = vor(v8,v2)
; CHECK-NEXT:     q0 = vcmp.gt(v26.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v3.w = vlsr(v5.w,v3.w)
; CHECK-NEXT:     q2 = vcmp.gt(v26.w,v4.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v28.w = vlsr(v2.w,v4.w)
; CHECK-NEXT:     v29 = vmux(q0,v27,v3)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v30 = vmux(q2,v27,v28)
; CHECK-NEXT:     v31 = vmux(q1,v26,v29)
; CHECK-NEXT:     vmem(r1+#1) = v31.new
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0 = vmux(q3,v26,v30)
; CHECK-NEXT:     jumpr r31
; CHECK-NEXT:     vmem(r1+#0) = v0.new
; CHECK-NEXT:    }
  %v0 = load <64 x half>, ptr %a0, align 128
  %v1 = fptoui <64 x half> %v0 to <64 x i32>
  store <64 x i32> %v1, ptr %a1, align 128
  ret void
}

; Widen input
define void @f16u32_1(ptr %a0, ptr %a1) #0 {
; CHECK-LABEL: f16u32_1:
; CHECK:         .cfi_startproc
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    {
; CHECK-NEXT:     r4 = #15360
; CHECK-NEXT:     r7 = #-4
; CHECK-NEXT:     v0 = vmem(r0+#0)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1.h = vsplat(r4)
; CHECK-NEXT:     r2 = ##-2147483648
; CHECK-NEXT:     r3 = #1
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v3 = vsplat(r2)
; CHECK-NEXT:     r5:4 = combine(#8,#30)
; CHECK-NEXT:     r6 = #24
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4 = vsplat(r4)
; CHECK-NEXT:     r2 = ##2147483647
; CHECK-NEXT:     r4 = #32
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1:0.qf32 = vmpy(v0.hf,v1.hf)
; CHECK-NEXT:     v2 = vxor(v2,v2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v5 = vsplat(r4)
; CHECK-NEXT:     v30 = vsplat(r2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0.sf = v0.qf32
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1.sf = v1.qf32
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1:0 = vshuff(v1,v0,r7)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q1 = vcmp.gt(v2.w,v0.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1.w = vasl(v0.w,r3)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v6.w = vasl(v0.w,r5)
; CHECK-NEXT:     v1.w = vsub(v1.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v3 = vor(v6,v3)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1.w = vasr(v1.w,r6)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1.w = vsub(v4.w,v1.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1.w = vmin(v1.w,v5.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q0 = vcmp.gt(v2.w,v1.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1.w = vlsr(v3.w,v1.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v31 = vmux(q0,v30,v1)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0 = vmux(q1,v2,v31)
; CHECK-NEXT:     jumpr r31
; CHECK-NEXT:     vmem(r1+#0) = v0.new
; CHECK-NEXT:    }
  %v0 = load <32 x half>, ptr %a0, align 128
  %v1 = fptoui <32 x half> %v0 to <32 x i32>
  store <32 x i32> %v1, ptr %a1, align 128
  ret void
}

; f32 -> u8
; No widening
define void @f32u8_0(ptr %a0, ptr %a1) #0 {
; CHECK-LABEL: f32u8_0:
; CHECK:         .cfi_startproc
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    {
; CHECK-NEXT:     r3:2 = combine(#8,#1)
; CHECK-NEXT:     r4 = ##-2147483648
; CHECK-NEXT:     v5 = vmem(r0+#0)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v3 = vsplat(r4)
; CHECK-NEXT:     r5 = #30
; CHECK-NEXT:     r6 = #24
; CHECK-NEXT:     v2 = vmem(r0+#1)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v14 = vsplat(r5)
; CHECK-NEXT:     r4 = #32
; CHECK-NEXT:     v8.w = vasl(v5.w,r2)
; CHECK-NEXT:     v0 = vmem(r0+#3)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v9.w = vasl(v2.w,r2)
; CHECK-NEXT:     v13 = vxor(v13,v13)
; CHECK-NEXT:     v8.w = vsub(v8.w,v3.w)
; CHECK-NEXT:     v1 = vmem(r0+#2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v20 = vsplat(r4)
; CHECK-NEXT:     v12.w = vasl(v0.w,r2)
; CHECK-NEXT:     v9.w = vsub(v9.w,v3.w)
; CHECK-NEXT:     q0 = vcmp.gt(v13.w,v5.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v11.w = vasl(v1.w,r2)
; CHECK-NEXT:     q3 = vcmp.gt(v13.w,v2.w)
; CHECK-NEXT:     v12.w = vsub(v12.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     r2 = ##2147483647
; CHECK-NEXT:     r7 = #64
; CHECK-NEXT:     v11.w = vsub(v11.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v22 = vsplat(r2)
; CHECK-NEXT:     v8.w = vasr(v8.w,r6)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v9.w = vasr(v9.w,r6)
; CHECK-NEXT:     v8.w = vsub(v14.w,v8.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v6.w = vasl(v5.w,r3)
; CHECK-NEXT:     v9.w = vsub(v14.w,v9.w)
; CHECK-NEXT:     v8.w = vmin(v8.w,v20.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v7.w = vasl(v2.w,r3)
; CHECK-NEXT:     v6 = vor(v6,v3)
; CHECK-NEXT:     v9.w = vmin(v9.w,v20.w)
; CHECK-NEXT:     q1 = vcmp.gt(v13.w,v8.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v19.w = vasr(v11.w,r6)
; CHECK-NEXT:     v7 = vor(v7,v3)
; CHECK-NEXT:     q2 = vcmp.gt(v13.w,v9.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v12.w = vasr(v12.w,r6)
; CHECK-NEXT:     v5.w = vsub(v14.w,v19.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4.w = vasl(v1.w,r3)
; CHECK-NEXT:     v21.w = vsub(v14.w,v12.w)
; CHECK-NEXT:     v5.w = vmin(v5.w,v20.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v10.w = vasl(v0.w,r3)
; CHECK-NEXT:     v4 = vor(v4,v3)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v6.w = vlsr(v6.w,v8.w)
; CHECK-NEXT:     v3 = vor(v10,v3)
; CHECK-NEXT:     v10.w = vmin(v21.w,v20.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v7.w = vlsr(v7.w,v9.w)
; CHECK-NEXT:     v24 = vmux(q1,v22,v6)
; CHECK-NEXT:     q1 = vcmp.gt(v13.w,v5.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v23.w = vlsr(v4.w,v5.w)
; CHECK-NEXT:     v25 = vmux(q2,v22,v7)
; CHECK-NEXT:     q2 = vcmp.gt(v13.w,v10.w)
; CHECK-NEXT:     v4 = vmux(q0,v13,v24)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v3.w = vlsr(v3.w,v10.w)
; CHECK-NEXT:     v26 = vmux(q3,v13,v25)
; CHECK-NEXT:     v2 = vmux(q1,v22,v23)
; CHECK-NEXT:     q1 = vcmp.gt(v13.w,v1.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v27 = vmux(q2,v22,v3)
; CHECK-NEXT:     q3 = vcmp.gt(v13.w,v0.w)
; CHECK-NEXT:     v28 = vmux(q1,v13,v2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v29.uh = vpack(v26.w,v4.w):sat
; CHECK-NEXT:     v1 = vmux(q3,v13,v27)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v30.uh = vpack(v1.w,v28.w):sat
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0.uh = vpack(v1.w,v28.w):sat
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v31.ub = vpack(v30.h,v29.h):sat
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0.ub = vpack(v30.h,v0.h):sat
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1:0 = vshuff(v0,v31,r7)
; CHECK-NEXT:     jumpr r31
; CHECK-NEXT:     vmem(r1+#0) = v0.new
; CHECK-NEXT:    }
  %v0 = load <128 x float>, ptr %a0, align 128
  %v1 = fptoui <128 x float> %v0 to <128 x i8>
  store <128 x i8> %v1, ptr %a1, align 128
  ret void
}

; Widen result #1
define void @f32u8_1(ptr %a0, ptr %a1) #0 {
; CHECK-LABEL: f32u8_1:
; CHECK:         .cfi_startproc
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    {
; CHECK-NEXT:     r3:2 = combine(##-2147483648,#1)
; CHECK-NEXT:     r4 = #30
; CHECK-NEXT:     v0 = vmem(r0+#1)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2 = vsplat(r3)
; CHECK-NEXT:     r7:6 = combine(#24,#32)
; CHECK-NEXT:     v3.w = vasl(v0.w,r2)
; CHECK-NEXT:     v1 = vmem(r0+#0)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v6 = vsplat(r4)
; CHECK-NEXT:     r5 = #8
; CHECK-NEXT:     v4.w = vasl(v1.w,r2)
; CHECK-NEXT:     v3.w = vsub(v3.w,v2.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v7 = vsplat(r6)
; CHECK-NEXT:     v5.w = vasl(v1.w,r5)
; CHECK-NEXT:     v4.w = vsub(v4.w,v2.w)
; CHECK-NEXT:     v27 = vxor(v27,v27)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     r3 = ##2147483647
; CHECK-NEXT:     v3.w = vasr(v3.w,r7)
; CHECK-NEXT:     v5 = vor(v5,v2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v28 = vsplat(r3)
; CHECK-NEXT:     v4.w = vasr(v4.w,r7)
; CHECK-NEXT:     q2 = vcmp.gt(v27.w,v1.w)
; CHECK-NEXT:     v3.w = vsub(v6.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     r2 = #64
; CHECK-NEXT:     v8.w = vasl(v0.w,r5)
; CHECK-NEXT:     q3 = vcmp.gt(v27.w,v0.w)
; CHECK-NEXT:     v4.w = vsub(v6.w,v4.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v3.w = vmin(v3.w,v7.w)
; CHECK-NEXT:     v4.w = vmin(v4.w,v7.w)
; CHECK-NEXT:     v2 = vor(v8,v2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q1 = vcmp.gt(v27.w,v3.w)
; CHECK-NEXT:     q0 = vcmp.gt(v27.w,v4.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v5.w = vlsr(v5.w,v4.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2.w = vlsr(v2.w,v3.w)
; CHECK-NEXT:     v29 = vmux(q0,v28,v5)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v30 = vmux(q1,v28,v2)
; CHECK-NEXT:     v0 = vmux(q2,v27,v29)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q3 = vsetq(r2)
; CHECK-NEXT:     v1 = vmux(q3,v27,v30)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v31.uh = vpack(v1.w,v0.w):sat
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0.uh = vpack(v1.w,v0.w):sat
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0.ub = vpack(v31.h,v0.h):sat
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     jumpr r31
; CHECK-NEXT:     if (q3) vmem(r1+#0) = v0
; CHECK-NEXT:    }
  %v0 = load <64 x float>, ptr %a0, align 128
  %v1 = fptoui <64 x float> %v0 to <64 x i8>
  store <64 x i8> %v1, ptr %a1, align 128
  ret void
}

; Widen result #2
define void @f32u8_2(ptr %a0, ptr %a1) #0 {
; CHECK-LABEL: f32u8_2:
; CHECK:         .cfi_startproc
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    {
; CHECK-NEXT:     r7 = ##-2147483648
; CHECK-NEXT:     r3:2 = combine(#30,#1)
; CHECK-NEXT:     v0 = vmem(r0+#0)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2 = vsplat(r7)
; CHECK-NEXT:     r5:4 = combine(#8,#24)
; CHECK-NEXT:     r6 = #32
; CHECK-NEXT:     v3.w = vasl(v0.w,r2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4 = vsplat(r3)
; CHECK-NEXT:     v5 = vsplat(r6)
; CHECK-NEXT:     v6.w = vasl(v0.w,r5)
; CHECK-NEXT:     v3.w = vsub(v3.w,v2.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1 = vxor(v1,v1)
; CHECK-NEXT:     v2 = vor(v6,v2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q3 = vsetq(r6)
; CHECK-NEXT:     v3.w = vasr(v3.w,r4)
; CHECK-NEXT:     q1 = vcmp.gt(v1.w,v0.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     r4 = ##2147483647
; CHECK-NEXT:     v3.w = vsub(v4.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v30 = vsplat(r4)
; CHECK-NEXT:     v3.w = vmin(v3.w,v5.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q0 = vcmp.gt(v1.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2.w = vlsr(v2.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v31 = vmux(q0,v30,v2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0 = vmux(q1,v1,v31)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1.uh = vpack(v1.w,v0.w):sat
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0.uh = vpack(v0.w,v0.w):sat
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0.ub = vpack(v1.h,v0.h):sat
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     jumpr r31
; CHECK-NEXT:     if (q3) vmem(r1+#0) = v0
; CHECK-NEXT:    }
  %v0 = load <32 x float>, ptr %a0, align 128
  %v1 = fptoui <32 x float> %v0 to <32 x i8>
  store <32 x i8> %v1, ptr %a1, align 128
  ret void
}

; f32 -> u16
; No widening
define void @f32u16_0(ptr %a0, ptr %a1) #0 {
; CHECK-LABEL: f32u16_0:
; CHECK:         .cfi_startproc
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    {
; CHECK-NEXT:     r3:2 = combine(##-2147483648,#1)
; CHECK-NEXT:     r4 = #30
; CHECK-NEXT:     v0 = vmem(r0+#1)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2 = vsplat(r3)
; CHECK-NEXT:     r7:6 = combine(#24,#32)
; CHECK-NEXT:     v3.w = vasl(v0.w,r2)
; CHECK-NEXT:     v1 = vmem(r0+#0)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v6 = vsplat(r4)
; CHECK-NEXT:     r5 = #8
; CHECK-NEXT:     v4.w = vasl(v1.w,r2)
; CHECK-NEXT:     v3.w = vsub(v3.w,v2.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v7 = vsplat(r6)
; CHECK-NEXT:     v5.w = vasl(v1.w,r5)
; CHECK-NEXT:     v4.w = vsub(v4.w,v2.w)
; CHECK-NEXT:     v28 = vxor(v28,v28)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     r2 = ##2147483647
; CHECK-NEXT:     v3.w = vasr(v3.w,r7)
; CHECK-NEXT:     v5 = vor(v5,v2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v29 = vsplat(r2)
; CHECK-NEXT:     v4.w = vasr(v4.w,r7)
; CHECK-NEXT:     q2 = vcmp.gt(v28.w,v1.w)
; CHECK-NEXT:     v3.w = vsub(v6.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v8.w = vasl(v0.w,r5)
; CHECK-NEXT:     q3 = vcmp.gt(v28.w,v0.w)
; CHECK-NEXT:     v4.w = vsub(v6.w,v4.w)
; CHECK-NEXT:     v3.w = vmin(v3.w,v7.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4.w = vmin(v4.w,v7.w)
; CHECK-NEXT:     v2 = vor(v8,v2)
; CHECK-NEXT:     q1 = vcmp.gt(v28.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q0 = vcmp.gt(v28.w,v4.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v5.w = vlsr(v5.w,v4.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2.w = vlsr(v2.w,v3.w)
; CHECK-NEXT:     v30 = vmux(q0,v29,v5)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v31 = vmux(q1,v29,v2)
; CHECK-NEXT:     v0 = vmux(q2,v28,v30)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v1 = vmux(q3,v28,v31)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0.uh = vpack(v1.w,v0.w):sat
; CHECK-NEXT:     jumpr r31
; CHECK-NEXT:     vmem(r1+#0) = v0.new
; CHECK-NEXT:    }
  %v0 = load <64 x float>, ptr %a0, align 128
  %v1 = fptoui <64 x float> %v0 to <64 x i16>
  store <64 x i16> %v1, ptr %a1, align 128
  ret void
}

; Widen result
define void @f32u16_1(ptr %a0, ptr %a1) #0 {
; CHECK-LABEL: f32u16_1:
; CHECK:         .cfi_startproc
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    {
; CHECK-NEXT:     r7 = ##-2147483648
; CHECK-NEXT:     r3:2 = combine(#8,#1)
; CHECK-NEXT:     v0 = vmem(r0+#0)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2 = vsplat(r7)
; CHECK-NEXT:     v3.w = vasl(v0.w,r2)
; CHECK-NEXT:     r6 = #30
; CHECK-NEXT:     r5 = #24
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4 = vsplat(r6)
; CHECK-NEXT:     r4 = #32
; CHECK-NEXT:     v6.w = vasl(v0.w,r3)
; CHECK-NEXT:     v3.w = vsub(v3.w,v2.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v5 = vsplat(r4)
; CHECK-NEXT:     v2 = vor(v6,v2)
; CHECK-NEXT:     v1 = vxor(v1,v1)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     r3 = ##2147483647
; CHECK-NEXT:     r2 = #64
; CHECK-NEXT:     v3.w = vasr(v3.w,r5)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v30 = vsplat(r3)
; CHECK-NEXT:     q3 = vsetq(r2)
; CHECK-NEXT:     q1 = vcmp.gt(v1.w,v0.w)
; CHECK-NEXT:     v3.w = vsub(v4.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v3.w = vmin(v3.w,v5.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q0 = vcmp.gt(v1.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2.w = vlsr(v2.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v31 = vmux(q0,v30,v2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0 = vmux(q1,v1,v31)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0.uh = vpack(v0.w,v0.w):sat
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     jumpr r31
; CHECK-NEXT:     if (q3) vmem(r1+#0) = v0
; CHECK-NEXT:    }
  %v0 = load <32 x float>, ptr %a0, align 128
  %v1 = fptoui <32 x float> %v0 to <32 x i16>
  store <32 x i16> %v1, ptr %a1, align 128
  ret void
}

; f32 -> u32
; No widening
define void @f32u32_0(ptr %a0, ptr %a1) #0 {
; CHECK-LABEL: f32u32_0:
; CHECK:         .cfi_startproc
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    {
; CHECK-NEXT:     r7 = ##-2147483648
; CHECK-NEXT:     r3:2 = combine(#8,#1)
; CHECK-NEXT:     v0 = vmem(r0+#0)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2 = vsplat(r7)
; CHECK-NEXT:     v3.w = vasl(v0.w,r2)
; CHECK-NEXT:     r6 = #30
; CHECK-NEXT:     r5 = #24
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4 = vsplat(r6)
; CHECK-NEXT:     r4 = #32
; CHECK-NEXT:     v6.w = vasl(v0.w,r3)
; CHECK-NEXT:     v3.w = vsub(v3.w,v2.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v5 = vsplat(r4)
; CHECK-NEXT:     v2 = vor(v6,v2)
; CHECK-NEXT:     v1 = vxor(v1,v1)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     r2 = ##2147483647
; CHECK-NEXT:     v3.w = vasr(v3.w,r5)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v30 = vsplat(r2)
; CHECK-NEXT:     q1 = vcmp.gt(v1.w,v0.w)
; CHECK-NEXT:     v3.w = vsub(v4.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v3.w = vmin(v3.w,v5.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q0 = vcmp.gt(v1.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2.w = vlsr(v2.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v31 = vmux(q0,v30,v2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0 = vmux(q1,v1,v31)
; CHECK-NEXT:     jumpr r31
; CHECK-NEXT:     vmem(r1+#0) = v0.new
; CHECK-NEXT:    }
  %v0 = load <32 x float>, ptr %a0, align 128
  %v1 = fptoui <32 x float> %v0 to <32 x i32>
  store <32 x i32> %v1, ptr %a1, align 128
  ret void
}

; Widen input and result
define void @f32u32_1(ptr %a0, ptr %a1) #0 {
; CHECK-LABEL: f32u32_1:
; CHECK:         .cfi_startproc
; CHECK-NEXT:  // %bb.0:
; CHECK-NEXT:    {
; CHECK-NEXT:     r7 = ##-2147483648
; CHECK-NEXT:     r3:2 = combine(#8,#1)
; CHECK-NEXT:     v0 = vmem(r0+#0)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2 = vsplat(r7)
; CHECK-NEXT:     v3.w = vasl(v0.w,r2)
; CHECK-NEXT:     r6 = #30
; CHECK-NEXT:     r5 = #24
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v4 = vsplat(r6)
; CHECK-NEXT:     r4 = #32
; CHECK-NEXT:     v6.w = vasl(v0.w,r3)
; CHECK-NEXT:     v3.w = vsub(v3.w,v2.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v5 = vsplat(r4)
; CHECK-NEXT:     v2 = vor(v6,v2)
; CHECK-NEXT:     v1 = vxor(v1,v1)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     r3 = ##2147483647
; CHECK-NEXT:     r2 = #64
; CHECK-NEXT:     v3.w = vasr(v3.w,r5)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v30 = vsplat(r3)
; CHECK-NEXT:     q3 = vsetq(r2)
; CHECK-NEXT:     q1 = vcmp.gt(v1.w,v0.w)
; CHECK-NEXT:     v3.w = vsub(v4.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v3.w = vmin(v3.w,v5.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     q0 = vcmp.gt(v1.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v2.w = vlsr(v2.w,v3.w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v31 = vmux(q0,v30,v2)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     v0 = vmux(q1,v1,v31)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     jumpr r31
; CHECK-NEXT:     if (q3) vmem(r1+#0) = v0
; CHECK-NEXT:    }
  %v0 = load <16 x float>, ptr %a0, align 128
  %v1 = fptoui <16 x float> %v0 to <16 x i32>
  store <16 x i32> %v1, ptr %a1, align 128
  ret void
}


attributes #0 = { "target-features"="+v68,+hvxv68,+hvx-length128b,+hvx-qfloat" }