llvm/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s

;
; SABA
;

define <vscale x 16 x i8> @saba_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
; CHECK-LABEL: saba_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    saba z0.b, z1.b, z2.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.saba.nxv16i8(<vscale x 16 x i8> %a,
                                                                <vscale x 16 x i8> %b,
                                                                <vscale x 16 x i8> %c)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @saba_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
; CHECK-LABEL: saba_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    saba z0.h, z1.h, z2.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.saba.nxv8i16(<vscale x 8 x i16> %a,
                                                                <vscale x 8 x i16> %b,
                                                                <vscale x 8 x i16> %c)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @saba_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
; CHECK-LABEL: saba_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    saba z0.s, z1.s, z2.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.saba.nxv4i32(<vscale x 4 x i32> %a,
                                                                <vscale x 4 x i32> %b,
                                                                <vscale x 4 x i32> %c)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @saba_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
; CHECK-LABEL: saba_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    saba z0.d, z1.d, z2.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.saba.nxv2i64(<vscale x 2 x i64> %a,
                                                                <vscale x 2 x i64> %b,
                                                                <vscale x 2 x i64> %c)
  ret <vscale x 2 x i64> %out
}

;
; SHADD
;

define <vscale x 16 x i8> @shadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: shadd_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    shadd z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.shadd.nxv16i8(<vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @shadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: shadd_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    shadd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.shadd.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @shadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: shadd_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    shadd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.shadd.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @shadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: shadd_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    shadd z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.shadd.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b)
  ret <vscale x 2 x i64> %out
}

;
; SHSUB
;

define <vscale x 16 x i8> @shsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: shsub_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    shsub z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.nxv16i8(<vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @shsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: shsub_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    shsub z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @shsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: shsub_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    shsub z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @shsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: shsub_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    shsub z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b)
  ret <vscale x 2 x i64> %out
}

;
; SHSUBR
;

define <vscale x 16 x i8> @shsubr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: shsubr_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    shsubr z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.shsubr.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @shsubr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: shsubr_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    shsubr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.shsubr.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @shsubr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: shsubr_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    shsubr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @shsubr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: shsubr_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    shsubr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.shsubr.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
  ret <vscale x 2 x i64> %out
}

;
; SLI
;

define <vscale x 16 x i8> @sli_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: sli_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sli z0.b, z1.b, #0
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sli.nxv16i8(<vscale x 16 x i8> %a,
                                                               <vscale x 16 x i8> %b,
                                                               i32 0)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @sli_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: sli_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sli z0.h, z1.h, #1
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sli.nxv8i16(<vscale x 8 x i16> %a,
                                                               <vscale x 8 x i16> %b,
                                                               i32 1)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @sli_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: sli_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sli z0.s, z1.s, #30
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sli.nxv4i32(<vscale x 4 x i32> %a,
                                                               <vscale x 4 x i32> %b,
                                                               i32 30);
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @sli_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: sli_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sli z0.d, z1.d, #63
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sli.nxv2i64(<vscale x 2 x i64> %a,
                                                               <vscale x 2 x i64> %b,
                                                               i32 63)
  ret <vscale x 2 x i64> %out
}

;
; SQABS
;

define <vscale x 16 x i8> @sqabs_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) {
; CHECK-LABEL: sqabs_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqabs z0.b, p0/m, z1.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %b)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @sqabs_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) {
; CHECK-LABEL: sqabs_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqabs z0.h, p0/m, z1.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %b)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @sqabs_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
; CHECK-LABEL: sqabs_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqabs z0.s, p0/m, z1.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %b)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @sqabs_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
; CHECK-LABEL: sqabs_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqabs z0.d, p0/m, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %b)
  ret <vscale x 2 x i64> %out
}

;
; SQADD
;

define <vscale x 16 x i8> @sqadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: sqadd_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqadd z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.nxv16i8(<vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @sqadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: sqadd_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqadd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @sqadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: sqadd_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqadd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @sqadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: sqadd_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqadd z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b)
  ret <vscale x 2 x i64> %out
}

;
; SQDMULH (Vector)
;

define <vscale x 16 x i8> @sqdmulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: sqdmulh_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqdmulh z0.b, z0.b, z1.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqdmulh.nxv16i8(<vscale x 16 x i8> %a,
                                                                   <vscale x 16 x i8> %b)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @sqdmulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: sqdmulh_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqdmulh z0.h, z0.h, z1.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.nxv8i16(<vscale x 8 x i16> %a,
                                                                   <vscale x 8 x i16> %b)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @sqdmulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: sqdmulh_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqdmulh z0.s, z0.s, z1.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdmulh.nxv4i32(<vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i32> %b)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @sqdmulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: sqdmulh_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqdmulh z0.d, z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdmulh.nxv2i64(<vscale x 2 x i64> %a,
                                                                   <vscale x 2 x i64> %b)
  ret <vscale x 2 x i64> %out
}

;
; SQDMULH (Indexed)
;

define <vscale x 8 x i16> @sqdmulh_lane_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: sqdmulh_lane_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqdmulh z0.h, z0.h, z1.h[7]
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.lane.nxv8i16(<vscale x 8 x i16> %a,
                                                                        <vscale x 8 x i16> %b,
                                                                        i32 7)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @sqdmulh_lane_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: sqdmulh_lane_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqdmulh z0.s, z0.s, z1.s[3]
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdmulh.lane.nxv4i32(<vscale x 4 x i32> %a,
                                                                        <vscale x 4 x i32> %b,
                                                                        i32 3);
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @sqdmulh_lane_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: sqdmulh_lane_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqdmulh z0.d, z0.d, z1.d[1]
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdmulh.lane.nxv2i64(<vscale x 2 x i64> %a,
                                                                        <vscale x 2 x i64> %b,
                                                                        i32 1)
  ret <vscale x 2 x i64> %out
}

;
; SQNEG
;

define <vscale x 16 x i8> @sqneg_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) {
; CHECK-LABEL: sqneg_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqneg z0.b, p0/m, z1.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqneg.nxv16i8(<vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %b)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @sqneg_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) {
; CHECK-LABEL: sqneg_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqneg z0.h, p0/m, z1.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqneg.nxv8i16(<vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %b)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @sqneg_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
; CHECK-LABEL: sqneg_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqneg z0.s, p0/m, z1.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqneg.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %b)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @sqneg_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
; CHECK-LABEL: sqneg_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqneg z0.d, p0/m, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqneg.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %b)
  ret <vscale x 2 x i64> %out
}

;
; SQRDMALH (Vectors)
;

define <vscale x 16 x i8> @sqrdmlah_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
; CHECK-LABEL: sqrdmlah_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqrdmlah z0.b, z1.b, z2.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrdmlah.nxv16i8(<vscale x 16 x i8> %a,
                                                                    <vscale x 16 x i8> %b,
                                                                    <vscale x 16 x i8> %c)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @sqrdmlah_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
; CHECK-LABEL: sqrdmlah_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqrdmlah z0.h, z1.h, z2.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmlah.nxv8i16(<vscale x 8 x i16> %a,
                                                                    <vscale x 8 x i16> %b,
                                                                    <vscale x 8 x i16> %c)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @sqrdmlah_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
; CHECK-LABEL: sqrdmlah_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqrdmlah z0.s, z1.s, z2.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmlah.nxv4i32(<vscale x 4 x i32> %a,
                                                                    <vscale x 4 x i32> %b,
                                                                    <vscale x 4 x i32> %c)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @sqrdmlah_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
; CHECK-LABEL: sqrdmlah_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqrdmlah z0.d, z1.d, z2.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmlah.nxv2i64(<vscale x 2 x i64> %a,
                                                                    <vscale x 2 x i64> %b,
                                                                    <vscale x 2 x i64> %c)
  ret <vscale x 2 x i64> %out
}

;
; SQRDMALH (Indexed)
;

define <vscale x 8 x i16> @sqrdmlah_lane_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
; CHECK-LABEL: sqrdmlah_lane_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqrdmlah z0.h, z1.h, z2.h[5]
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmlah.lane.nxv8i16(<vscale x 8 x i16> %a,
                                                                         <vscale x 8 x i16> %b,
                                                                         <vscale x 8 x i16> %c,
                                                                         i32 5)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @sqrdmlah_lane_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
; CHECK-LABEL: sqrdmlah_lane_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqrdmlah z0.s, z1.s, z2.s[1]
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmlah.lane.nxv4i32(<vscale x 4 x i32> %a,
                                                                         <vscale x 4 x i32> %b,
                                                                         <vscale x 4 x i32> %c,
                                                                         i32 1);
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @sqrdmlah_lane_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
; CHECK-LABEL: sqrdmlah_lane_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqrdmlah z0.d, z1.d, z2.d[1]
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmlah.lane.nxv2i64(<vscale x 2 x i64> %a,
                                                                         <vscale x 2 x i64> %b,
                                                                         <vscale x 2 x i64> %c,
                                                                         i32 1)
  ret <vscale x 2 x i64> %out
}

;
; SQRDMSLH (Vectors)
;

define <vscale x 16 x i8> @sqrdmlsh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
; CHECK-LABEL: sqrdmlsh_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqrdmlsh z0.b, z1.b, z2.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrdmlsh.nxv16i8(<vscale x 16 x i8> %a,
                                                                    <vscale x 16 x i8> %b,
                                                                    <vscale x 16 x i8> %c)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @sqrdmlsh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
; CHECK-LABEL: sqrdmlsh_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqrdmlsh z0.h, z1.h, z2.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmlsh.nxv8i16(<vscale x 8 x i16> %a,
                                                                    <vscale x 8 x i16> %b,
                                                                    <vscale x 8 x i16> %c)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @sqrdmlsh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
; CHECK-LABEL: sqrdmlsh_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqrdmlsh z0.s, z1.s, z2.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmlsh.nxv4i32(<vscale x 4 x i32> %a,
                                                                    <vscale x 4 x i32> %b,
                                                                    <vscale x 4 x i32> %c)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @sqrdmlsh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
; CHECK-LABEL: sqrdmlsh_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqrdmlsh z0.d, z1.d, z2.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmlsh.nxv2i64(<vscale x 2 x i64> %a,
                                                                    <vscale x 2 x i64> %b,
                                                                    <vscale x 2 x i64> %c)
  ret <vscale x 2 x i64> %out
}

;
; SQRDMSLH (Indexed)
;

define <vscale x 8 x i16> @sqrdmlsh_lane_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
; CHECK-LABEL: sqrdmlsh_lane_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqrdmlsh z0.h, z1.h, z2.h[4]
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmlsh.lane.nxv8i16(<vscale x 8 x i16> %a,
                                                                         <vscale x 8 x i16> %b,
                                                                         <vscale x 8 x i16> %c,
                                                                         i32 4)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @sqrdmlsh_lane_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
; CHECK-LABEL: sqrdmlsh_lane_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqrdmlsh z0.s, z1.s, z2.s[0]
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmlsh.lane.nxv4i32(<vscale x 4 x i32> %a,
                                                                         <vscale x 4 x i32> %b,
                                                                         <vscale x 4 x i32> %c,
                                                                         i32 0);
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @sqrdmlsh_lane_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
; CHECK-LABEL: sqrdmlsh_lane_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqrdmlsh z0.d, z1.d, z2.d[1]
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmlsh.lane.nxv2i64(<vscale x 2 x i64> %a,
                                                                         <vscale x 2 x i64> %b,
                                                                         <vscale x 2 x i64> %c,
                                                                         i32 1)
  ret <vscale x 2 x i64> %out
}

;
; SQRDMULH (Vectors)
;

define <vscale x 16 x i8> @sqrdmulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: sqrdmulh_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqrdmulh z0.b, z0.b, z1.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrdmulh.nxv16i8(<vscale x 16 x i8> %a,
                                                                    <vscale x 16 x i8> %b)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @sqrdmulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: sqrdmulh_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqrdmulh z0.h, z0.h, z1.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmulh.nxv8i16(<vscale x 8 x i16> %a,
                                                                    <vscale x 8 x i16> %b)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @sqrdmulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: sqrdmulh_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqrdmulh z0.s, z0.s, z1.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmulh.nxv4i32(<vscale x 4 x i32> %a,
                                                                    <vscale x 4 x i32> %b)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @sqrdmulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: sqrdmulh_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqrdmulh z0.d, z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmulh.nxv2i64(<vscale x 2 x i64> %a,
                                                                    <vscale x 2 x i64> %b)
  ret <vscale x 2 x i64> %out
}

;
; SQRDMULH (Indexed)
;

define <vscale x 8 x i16> @sqrdmulh_lane_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: sqrdmulh_lane_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqrdmulh z0.h, z0.h, z1.h[6]
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmulh.lane.nxv8i16(<vscale x 8 x i16> %a,
                                                                         <vscale x 8 x i16> %b,
                                                                         i32 6)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @sqrdmulh_lane_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: sqrdmulh_lane_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqrdmulh z0.s, z0.s, z1.s[2]
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmulh.lane.nxv4i32(<vscale x 4 x i32> %a,
                                                                         <vscale x 4 x i32> %b,
                                                                         i32 2);
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @sqrdmulh_lane_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: sqrdmulh_lane_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqrdmulh z0.d, z0.d, z1.d[1]
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmulh.lane.nxv2i64(<vscale x 2 x i64> %a,
                                                                         <vscale x 2 x i64> %b,
                                                                         i32 1)
  ret <vscale x 2 x i64> %out
}

;
; SQRSHL
;

define <vscale x 16 x i8> @sqrshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: sqrshl_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqrshl z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @sqrshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: sqrshl_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqrshl z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @sqrshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: sqrshl_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqrshl z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @sqrshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: sqrshl_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqrshl z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
  ret <vscale x 2 x i64> %out
}

;
; SQRSHLR
;

define <vscale x 16 x i8> @sqrshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: sqrshlr_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ptrue p0.b
; CHECK-NEXT:    sqrshlr z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %b,
                                                                 <vscale x 16 x i8> %a)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @sqrshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: sqrshlr_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ptrue p0.h
; CHECK-NEXT:    sqrshlr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %b,
                                                                 <vscale x 8 x i16> %a)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @sqrshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: sqrshlr_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ptrue p0.s
; CHECK-NEXT:    sqrshlr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %b,
                                                                 <vscale x 4 x i32> %a)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @sqrshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: sqrshlr_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ptrue p0.d
; CHECK-NEXT:    sqrshlr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %b,
                                                                 <vscale x 2 x i64> %a)
  ret <vscale x 2 x i64> %out
}

define <vscale x 2 x i64> @sqrshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: sqrshlr_i64_noptrue:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqrshl z1.d, p0/m, z1.d, z0.d
; CHECK-NEXT:    mov z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %b,
                                                                 <vscale x 2 x i64> %a)
  ret <vscale x 2 x i64> %out
}

;
; SQSHL (Vectors)
;

define <vscale x 16 x i8> @sqshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: sqshl_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqshl z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @sqshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: sqshl_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqshl z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @sqshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: sqshl_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqshl z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @sqshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: sqshl_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqshl z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b)
  ret <vscale x 2 x i64> %out
}

;
; SQSHLR
;

define <vscale x 16 x i8> @sqshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: sqshlr_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ptrue p0.b
; CHECK-NEXT:    sqshlr z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %b,
                                                                 <vscale x 16 x i8> %a)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @sqshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: sqshlr_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ptrue p0.h
; CHECK-NEXT:    sqshlr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %b,
                                                                 <vscale x 8 x i16> %a)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @sqshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: sqshlr_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ptrue p0.s
; CHECK-NEXT:    sqshlr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %b,
                                                                 <vscale x 4 x i32> %a)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @sqshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: sqshlr_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ptrue p0.d
; CHECK-NEXT:    sqshlr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %b,
                                                                 <vscale x 2 x i64> %a)
  ret <vscale x 2 x i64> %out
}

define <vscale x 2 x i64> @sqshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: sqshlr_i64_noptrue:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqshl z1.d, p0/m, z1.d, z0.d
; CHECK-NEXT:    mov z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %b,
                                                                 <vscale x 2 x i64> %a)
  ret <vscale x 2 x i64> %out
}

;
; SQSHL (Scalar)
;

define <vscale x 16 x i8> @sqshl_n_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
; CHECK-LABEL: sqshl_n_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqshl z0.b, p0/m, z0.b, #7
; CHECK-NEXT:    ret
  %dup = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 7)
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %dup)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @sqshl_n_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
; CHECK-LABEL: sqshl_n_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqshl z0.h, p0/m, z0.h, #15
; CHECK-NEXT:    ret
  %dup = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 15)
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %dup)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @sqshl_n_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
; CHECK-LABEL: sqshl_n_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqshl z0.s, p0/m, z0.s, #31
; CHECK-NEXT:    ret
  %dup = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 31)
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %dup)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @sqshl_n_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
; CHECK-LABEL: sqshl_n_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqshl z0.d, p0/m, z0.d, #63
; CHECK-NEXT:    ret
  %dup = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 63)
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %dup)
  ret <vscale x 2 x i64> %out
}

define <vscale x 16 x i8> @sqshl_n_i8_range(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
; CHECK-LABEL: sqshl_n_i8_range:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z1.b, #8 // =0x8
; CHECK-NEXT:    sqshl z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %dup = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 8)
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %dup)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @sqshl_n_i16_range(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
; CHECK-LABEL: sqshl_n_i16_range:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z1.h, #16 // =0x10
; CHECK-NEXT:    sqshl z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %dup = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 16)
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %dup)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @sqshl_n_i32_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
; CHECK-LABEL: sqshl_n_i32_range:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z1.s, #32 // =0x20
; CHECK-NEXT:    sqshl z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %dup = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 32)
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %dup)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @sqshl_n_i64_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
; CHECK-LABEL: sqshl_n_i64_range:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z1.d, #64 // =0x40
; CHECK-NEXT:    sqshl z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %dup = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 64)
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %dup)
  ret <vscale x 2 x i64> %out
}

;
; SQSHLU
;

define <vscale x 16 x i8> @sqshlu_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
; CHECK-LABEL: sqshlu_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqshlu z0.b, p0/m, z0.b, #2
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshlu.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  i32 2)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @sqshlu_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
; CHECK-LABEL: sqshlu_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqshlu z0.h, p0/m, z0.h, #3
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshlu.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  i32 3)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @sqshlu_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
; CHECK-LABEL: sqshlu_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqshlu z0.s, p0/m, z0.s, #29
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshlu.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
                                                                  i32 29)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @sqshlu_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
; CHECK-LABEL: sqshlu_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqshlu z0.d, p0/m, z0.d, #62
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshlu.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
                                                                  i32 62)
  ret <vscale x 2 x i64> %out
}

;
; SQSUB
;

define <vscale x 16 x i8> @sqsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: sqsub_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqsub z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.nxv16i8(<vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @sqsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: sqsub_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqsub z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @sqsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: sqsub_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqsub z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @sqsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: sqsub_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqsub z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b)
  ret <vscale x 2 x i64> %out
}

;
; SQSUBR
;

define <vscale x 16 x i8> @sqsubr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: sqsubr_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqsubr z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsubr.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @sqsubr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: sqsubr_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqsubr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsubr.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @sqsubr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: sqsubr_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqsubr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsubr.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @sqsubr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: sqsubr_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sqsubr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsubr.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
  ret <vscale x 2 x i64> %out
}

;
; SRHADD
;

define <vscale x 16 x i8> @srhadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: srhadd_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    srhadd z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srhadd.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @srhadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: srhadd_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    srhadd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srhadd.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @srhadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: srhadd_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    srhadd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srhadd.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @srhadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: srhadd_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    srhadd z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srhadd.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
  ret <vscale x 2 x i64> %out
}

;
; SRI
;

define <vscale x 16 x i8> @sri_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: sri_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sri z0.b, z1.b, #1
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sri.nxv16i8(<vscale x 16 x i8> %a,
                                                               <vscale x 16 x i8> %b,
                                                               i32 1)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @sri_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: sri_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sri z0.h, z1.h, #16
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sri.nxv8i16(<vscale x 8 x i16> %a,
                                                               <vscale x 8 x i16> %b,
                                                               i32 16)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @sri_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: sri_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sri z0.s, z1.s, #32
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sri.nxv4i32(<vscale x 4 x i32> %a,
                                                               <vscale x 4 x i32> %b,
                                                               i32 32);
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @sri_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: sri_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sri z0.d, z1.d, #64
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sri.nxv2i64(<vscale x 2 x i64> %a,
                                                               <vscale x 2 x i64> %b,
                                                               i32 64)
  ret <vscale x 2 x i64> %out
}

;
; SRSHL
;

define <vscale x 16 x i8> @srshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: srshl_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    srshl z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @srshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: srshl_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    srshl z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @srshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: srshl_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    srshl z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @srshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: srshl_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    srshl z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b)
  ret <vscale x 2 x i64> %out
}

;
; SRSHLR
;

define <vscale x 16 x i8> @srshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: srshlr_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ptrue p0.b
; CHECK-NEXT:    srshlr z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %b,
                                                                 <vscale x 16 x i8> %a)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @srshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: srshlr_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ptrue p0.h
; CHECK-NEXT:    srshlr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %b,
                                                                 <vscale x 8 x i16> %a)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @srshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: srshlr_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ptrue p0.s
; CHECK-NEXT:    srshlr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %b,
                                                                 <vscale x 4 x i32> %a)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @srshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: srshlr_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ptrue p0.d
; CHECK-NEXT:    srshlr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %b,
                                                                 <vscale x 2 x i64> %a)
  ret <vscale x 2 x i64> %out
}

define <vscale x 2 x i64> @srshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: srshlr_i64_noptrue:
; CHECK:       // %bb.0:
; CHECK-NEXT:    srshl z1.d, p0/m, z1.d, z0.d
; CHECK-NEXT:    mov z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %b,
                                                                 <vscale x 2 x i64> %a)
  ret <vscale x 2 x i64> %out
}

;
; SRSHR
;

define <vscale x 16 x i8> @srshr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
; CHECK-LABEL: srshr_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    srshr z0.b, p0/m, z0.b, #8
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srshr.nxv16i8(<vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %a,
                                                                 i32 8)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @srshr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
; CHECK-LABEL: srshr_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    srshr z0.h, p0/m, z0.h, #1
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srshr.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %a,
                                                                 i32 1)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @srshr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
; CHECK-LABEL: srshr_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    srshr z0.s, p0/m, z0.s, #22
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srshr.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %a,
                                                                 i32 22)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @srshr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
; CHECK-LABEL: srshr_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    srshr z0.d, p0/m, z0.d, #54
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srshr.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %a,
                                                                 i32 54)
  ret <vscale x 2 x i64> %out
}

;
; SRSRA
;

define <vscale x 16 x i8> @srsra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: srsra_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    srsra z0.b, z1.b, #2
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srsra.nxv16i8(<vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b,
                                                                 i32 2)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @srsra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: srsra_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    srsra z0.h, z1.h, #15
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srsra.nxv8i16(<vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b,
                                                                 i32 15)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @srsra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: srsra_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    srsra z0.s, z1.s, #12
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srsra.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b,
                                                                 i32 12)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @srsra_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: srsra_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    srsra z0.d, z1.d, #44
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srsra.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b,
                                                                 i32 44)
  ret <vscale x 2 x i64> %out
}

;
; SSRA
;

define <vscale x 16 x i8> @ssra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: ssra_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ssra z0.b, z1.b, #3
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.ssra.nxv16i8(<vscale x 16 x i8> %a,
                                                                <vscale x 16 x i8> %b,
                                                                i32 3)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @ssra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: ssra_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ssra z0.h, z1.h, #14
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ssra.nxv8i16(<vscale x 8 x i16> %a,
                                                                <vscale x 8 x i16> %b,
                                                                i32 14)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @ssra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: ssra_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ssra z0.s, z1.s, #2
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ssra.nxv4i32(<vscale x 4 x i32> %a,
                                                                <vscale x 4 x i32> %b,
                                                                i32 2)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @ssra_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: ssra_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ssra z0.d, z1.d, #34
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ssra.nxv2i64(<vscale x 2 x i64> %a,
                                                                <vscale x 2 x i64> %b,
                                                                i32 34)
  ret <vscale x 2 x i64> %out
}

;
; SUQADD
;

define <vscale x 16 x i8> @suqadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: suqadd_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    suqadd z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.suqadd.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @suqadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: suqadd_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    suqadd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.suqadd.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @suqadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: suqadd_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    suqadd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.suqadd.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @suqadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: suqadd_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    suqadd z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.suqadd.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
  ret <vscale x 2 x i64> %out
}

;
; UABA
;

define <vscale x 16 x i8> @uaba_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
; CHECK-LABEL: uaba_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uaba z0.b, z1.b, z2.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uaba.nxv16i8(<vscale x 16 x i8> %a,
                                                                <vscale x 16 x i8> %b,
                                                                <vscale x 16 x i8> %c)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @uaba_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
; CHECK-LABEL: uaba_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uaba z0.h, z1.h, z2.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uaba.nxv8i16(<vscale x 8 x i16> %a,
                                                                <vscale x 8 x i16> %b,
                                                                <vscale x 8 x i16> %c)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @uaba_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
; CHECK-LABEL: uaba_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uaba z0.s, z1.s, z2.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uaba.nxv4i32(<vscale x 4 x i32> %a,
                                                                <vscale x 4 x i32> %b,
                                                                <vscale x 4 x i32> %c)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @uaba_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
; CHECK-LABEL: uaba_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uaba z0.d, z1.d, z2.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uaba.nxv2i64(<vscale x 2 x i64> %a,
                                                                <vscale x 2 x i64> %b,
                                                                <vscale x 2 x i64> %c)
  ret <vscale x 2 x i64> %out
}

;
; UHADD
;

define <vscale x 16 x i8> @uhadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: uhadd_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uhadd z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhadd.nxv16i8(<vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @uhadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: uhadd_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uhadd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhadd.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @uhadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: uhadd_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uhadd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhadd.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @uhadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: uhadd_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uhadd z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhadd.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b)
  ret <vscale x 2 x i64> %out
}

;
; UHSUB
;

define <vscale x 16 x i8> @uhsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: uhsub_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uhsub z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.nxv16i8(<vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @uhsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: uhsub_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uhsub z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @uhsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: uhsub_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uhsub z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @uhsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: uhsub_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uhsub z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b)
  ret <vscale x 2 x i64> %out
}

;
; UHSUBR
;

define <vscale x 16 x i8> @uhsubr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: uhsubr_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uhsubr z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhsubr.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @uhsubr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: uhsubr_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uhsubr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhsubr.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @uhsubr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: uhsubr_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uhsubr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @uhsubr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: uhsubr_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uhsubr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhsubr.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
  ret <vscale x 2 x i64> %out
}

;
; UQADD
;

define <vscale x 16 x i8> @uqadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: uqadd_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uqadd z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.nxv16i8(<vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @uqadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: uqadd_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uqadd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @uqadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: uqadd_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uqadd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @uqadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: uqadd_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uqadd z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b)
  ret <vscale x 2 x i64> %out
}

;
; UQRSHL
;

define <vscale x 16 x i8> @uqrshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: uqrshl_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uqrshl z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @uqrshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: uqrshl_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uqrshl z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @uqrshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: uqrshl_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uqrshl z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @uqrshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: uqrshl_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uqrshl z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
  ret <vscale x 2 x i64> %out
}

;
; UQRSHLR
;

define <vscale x 16 x i8> @uqrshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: uqrshlr_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ptrue p0.b
; CHECK-NEXT:    uqrshlr z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %b,
                                                                 <vscale x 16 x i8> %a)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @uqrshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: uqrshlr_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ptrue p0.h
; CHECK-NEXT:    uqrshlr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %b,
                                                                 <vscale x 8 x i16> %a)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @uqrshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: uqrshlr_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ptrue p0.s
; CHECK-NEXT:    uqrshlr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %b,
                                                                 <vscale x 4 x i32> %a)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @uqrshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: uqrshlr_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ptrue p0.d
; CHECK-NEXT:    uqrshlr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %b,
                                                                 <vscale x 2 x i64> %a)
  ret <vscale x 2 x i64> %out
}

define <vscale x 2 x i64> @uqrshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: uqrshlr_i64_noptrue:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uqrshl z1.d, p0/m, z1.d, z0.d
; CHECK-NEXT:    mov z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %b,
                                                                 <vscale x 2 x i64> %a)
  ret <vscale x 2 x i64> %out
}

;
; UQSHL (Vectors)
;

define <vscale x 16 x i8> @uqshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: uqshl_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uqshl z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @uqshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: uqshl_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uqshl z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @uqshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: uqshl_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uqshl z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @uqshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: uqshl_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uqshl z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b)
  ret <vscale x 2 x i64> %out
}

;
; UQSHLR
;

define <vscale x 16 x i8> @uqshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: uqshlr_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ptrue p0.b
; CHECK-NEXT:    uqshlr z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %b,
                                                                 <vscale x 16 x i8> %a)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @uqshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: uqshlr_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ptrue p0.h
; CHECK-NEXT:    uqshlr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %b,
                                                                 <vscale x 8 x i16> %a)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @uqshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: uqshlr_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ptrue p0.s
; CHECK-NEXT:    uqshlr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %b,
                                                                 <vscale x 4 x i32> %a)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @uqshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: uqshlr_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ptrue p0.d
; CHECK-NEXT:    uqshlr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %b,
                                                                 <vscale x 2 x i64> %a)
  ret <vscale x 2 x i64> %out
}

define <vscale x 2 x i64> @uqshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: uqshlr_i64_noptrue:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uqshl z1.d, p0/m, z1.d, z0.d
; CHECK-NEXT:    mov z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %b,
                                                                 <vscale x 2 x i64> %a)
  ret <vscale x 2 x i64> %out
}

;
; UQSHL (Scalar)
;

define <vscale x 16 x i8> @uqshl_n_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
; CHECK-LABEL: uqshl_n_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uqshl z0.b, p0/m, z0.b, #7
; CHECK-NEXT:    ret
  %dup = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 7)
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %dup)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @uqshl_n_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
; CHECK-LABEL: uqshl_n_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uqshl z0.h, p0/m, z0.h, #15
; CHECK-NEXT:    ret
  %dup = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 15)
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %dup)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @uqshl_n_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
; CHECK-LABEL: uqshl_n_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uqshl z0.s, p0/m, z0.s, #31
; CHECK-NEXT:    ret
  %dup = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 31)
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %dup)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @uqshl_n_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
; CHECK-LABEL: uqshl_n_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uqshl z0.d, p0/m, z0.d, #63
; CHECK-NEXT:    ret
  %dup = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 63)
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %dup)
  ret <vscale x 2 x i64> %out
}

define <vscale x 16 x i8> @uqshl_n_i8_range(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
; CHECK-LABEL: uqshl_n_i8_range:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z1.b, #8 // =0x8
; CHECK-NEXT:    uqshl z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %dup = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 8)
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %dup)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @uqshl_n_i16_range(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
; CHECK-LABEL: uqshl_n_i16_range:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z1.h, #16 // =0x10
; CHECK-NEXT:    uqshl z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %dup = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 16)
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %dup)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @uqshl_n_i32_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
; CHECK-LABEL: uqshl_n_i32_range:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z1.s, #32 // =0x20
; CHECK-NEXT:    uqshl z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %dup = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 32)
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %dup)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @uqshl_n_i64_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
; CHECK-LABEL: uqshl_n_i64_range:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z1.d, #64 // =0x40
; CHECK-NEXT:    uqshl z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %dup = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 64)
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %dup)
  ret <vscale x 2 x i64> %out
}

;
; UQSUB
;

define <vscale x 16 x i8> @uqsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: uqsub_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uqsub z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.nxv16i8(<vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @uqsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: uqsub_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uqsub z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @uqsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: uqsub_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uqsub z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @uqsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: uqsub_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uqsub z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b)
  ret <vscale x 2 x i64> %out
}

;
; UQSUBR
;

define <vscale x 16 x i8> @uqsubr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: uqsubr_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uqsubr z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqsubr.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @uqsubr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: uqsubr_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uqsubr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsubr.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @uqsubr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: uqsubr_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uqsubr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsubr.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @uqsubr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: uqsubr_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    uqsubr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsubr.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
  ret <vscale x 2 x i64> %out
}

;
; URECPE
;

define <vscale x 4 x i32> @urecpe_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
; CHECK-LABEL: urecpe_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    urecpe z0.s, p0/m, z1.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %b)
  ret <vscale x 4 x i32> %out
}

;
; URHADD
;

define <vscale x 16 x i8> @urhadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: urhadd_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    urhadd z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.urhadd.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @urhadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: urhadd_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    urhadd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.urhadd.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @urhadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: urhadd_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    urhadd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urhadd.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @urhadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: urhadd_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    urhadd z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urhadd.nxv2i64(<vscale x 2 x i1> %pg,
                                                             <vscale x 2 x i64> %a,
                                                             <vscale x 2 x i64> %b)
  ret <vscale x 2 x i64> %out
}

;
; URSHL
;

define <vscale x 16 x i8> @urshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: urshl_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    urshl z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.urshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @urshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: urshl_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    urshl z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.urshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @urshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: urshl_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    urshl z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @urshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: urshl_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    urshl z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b)
  ret <vscale x 2 x i64> %out
}

;
; URSHLR
;

define <vscale x 16 x i8> @urshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: urshlr_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ptrue p0.b
; CHECK-NEXT:    urshlr z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.urshl.nxv16i8(<vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %b,
                                                                 <vscale x 16 x i8> %a)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @urshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: urshlr_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ptrue p0.h
; CHECK-NEXT:    urshlr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.urshl.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %b,
                                                                 <vscale x 8 x i16> %a)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @urshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: urshlr_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ptrue p0.s
; CHECK-NEXT:    urshlr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %b,
                                                                 <vscale x 4 x i32> %a)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @urshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: urshlr_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ptrue p0.d
; CHECK-NEXT:    urshlr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %b,
                                                                 <vscale x 2 x i64> %a)
  ret <vscale x 2 x i64> %out
}

define <vscale x 2 x i64> @urshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: urshlr_i64_noptrue:
; CHECK:       // %bb.0:
; CHECK-NEXT:    urshl z1.d, p0/m, z1.d, z0.d
; CHECK-NEXT:    mov z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urshl.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %b,
                                                                 <vscale x 2 x i64> %a)
  ret <vscale x 2 x i64> %out
}

;
; URSHR
;

define <vscale x 16 x i8> @urshr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
; CHECK-LABEL: urshr_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    urshr z0.b, p0/m, z0.b, #4
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.urshr.nxv16i8(<vscale x 16 x i1> %pg,
                                                                 <vscale x 16 x i8> %a,
                                                                 i32 4)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @urshr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
; CHECK-LABEL: urshr_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    urshr z0.h, p0/m, z0.h, #13
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.urshr.nxv8i16(<vscale x 8 x i1> %pg,
                                                                 <vscale x 8 x i16> %a,
                                                                 i32 13)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @urshr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
; CHECK-LABEL: urshr_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    urshr z0.s, p0/m, z0.s, #1
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urshr.nxv4i32(<vscale x 4 x i1> %pg,
                                                                 <vscale x 4 x i32> %a,
                                                                 i32 1)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @urshr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
; CHECK-LABEL: urshr_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    urshr z0.d, p0/m, z0.d, #24
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urshr.nxv2i64(<vscale x 2 x i1> %pg,
                                                                 <vscale x 2 x i64> %a,
                                                                 i32 24)
  ret <vscale x 2 x i64> %out
}

;
; URSQRTE
;

define <vscale x 4 x i32> @ursqrte_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
; CHECK-LABEL: ursqrte_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ursqrte z0.s, p0/m, z1.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ursqrte.nxv4i32(<vscale x 4 x i32> %a,
                                                                   <vscale x 4 x i1> %pg,
                                                                   <vscale x 4 x i32> %b)
  ret <vscale x 4 x i32> %out
}

;
; URSRA
;

define <vscale x 16 x i8> @ursra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: ursra_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ursra z0.b, z1.b, #5
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.ursra.nxv16i8(<vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b,
                                                                 i32 5)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @ursra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: ursra_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ursra z0.h, z1.h, #12
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ursra.nxv8i16(<vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b,
                                                                 i32 12)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @ursra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: ursra_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ursra z0.s, z1.s, #31
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ursra.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b,
                                                                 i32 31)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @ursra_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: ursra_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ursra z0.d, z1.d, #14
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ursra.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b,
                                                                 i32 14)
  ret <vscale x 2 x i64> %out
}

;
; USQADD
;

define <vscale x 16 x i8> @usqadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: usqadd_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    usqadd z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.usqadd.nxv16i8(<vscale x 16 x i1> %pg,
                                                                  <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @usqadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: usqadd_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    usqadd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.usqadd.nxv8i16(<vscale x 8 x i1> %pg,
                                                                  <vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @usqadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: usqadd_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    usqadd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.usqadd.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @usqadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: usqadd_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    usqadd z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.usqadd.nxv2i64(<vscale x 2 x i1> %pg,
                                                                  <vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
  ret <vscale x 2 x i64> %out
}

;
; USRA
;

define <vscale x 16 x i8> @usra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: usra_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    usra z0.b, z1.b, #6
; CHECK-NEXT:    ret
  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.usra.nxv16i8(<vscale x 16 x i8> %a,
                                                                <vscale x 16 x i8> %b,
                                                                i32 6)
  ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @usra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: usra_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    usra z0.h, z1.h, #11
; CHECK-NEXT:    ret
  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.usra.nxv8i16(<vscale x 8 x i16> %a,
                                                                <vscale x 8 x i16> %b,
                                                                i32 11)
  ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @usra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: usra_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    usra z0.s, z1.s, #21
; CHECK-NEXT:    ret
  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.usra.nxv4i32(<vscale x 4 x i32> %a,
                                                                <vscale x 4 x i32> %b,
                                                                i32 21)
  ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @usra_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: usra_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    usra z0.d, z1.d, #4
; CHECK-NEXT:    ret
  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.usra.nxv2i64(<vscale x 2 x i64> %a,
                                                                <vscale x 2 x i64> %b,
                                                                i32 4)
  ret <vscale x 2 x i64> %out
}

declare <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8)
declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16)
declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32)
declare <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64)

declare <vscale x 16 x i8> @llvm.aarch64.sve.saba.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.saba.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.saba.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.saba.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.shadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.shadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.shadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.shadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.shsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.shsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.shsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.shsubr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.shsubr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.shsubr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.sli.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
declare <vscale x 8 x i16> @llvm.aarch64.sve.sli.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sli.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
declare <vscale x 2 x i64> @llvm.aarch64.sve.sli.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)

declare <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.sqdmulh.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sqdmulh.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.sqdmulh.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.lane.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sqdmulh.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
declare <vscale x 2 x i64> @llvm.aarch64.sve.sqdmulh.lane.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)

declare <vscale x 16 x i8> @llvm.aarch64.sve.sqneg.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.sqneg.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sqneg.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.sqneg.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrdmlah.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmlah.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmlah.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmlah.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmlah.lane.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmlah.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32)
declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmlah.lane.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32)

declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrdmlsh.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmlsh.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmlsh.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmlsh.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmlsh.lane.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmlsh.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32)
declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmlsh.lane.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32)

declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrdmulh.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmulh.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmulh.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmulh.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmulh.lane.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmulh.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmulh.lane.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)

declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.sqshlu.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, i32)
declare <vscale x 8 x i16> @llvm.aarch64.sve.sqshlu.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sqshlu.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i32)
declare <vscale x 2 x i64> @llvm.aarch64.sve.sqshlu.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i32)

declare <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.sqsubr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.sqsubr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.sqsubr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.srhadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.srhadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.srhadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.srhadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.sri.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
declare <vscale x 8 x i16> @llvm.aarch64.sve.sri.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sri.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
declare <vscale x 2 x i64> @llvm.aarch64.sve.sri.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)

declare <vscale x 16 x i8> @llvm.aarch64.sve.srshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.srshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.srshr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, i32)
declare <vscale x 8 x i16> @llvm.aarch64.sve.srshr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.srshr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i32)
declare <vscale x 2 x i64> @llvm.aarch64.sve.srshr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i32)

declare <vscale x 16 x i8> @llvm.aarch64.sve.srsra.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
declare <vscale x 8 x i16> @llvm.aarch64.sve.srsra.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.srsra.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
declare <vscale x 2 x i64> @llvm.aarch64.sve.srsra.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)

declare <vscale x 16 x i8> @llvm.aarch64.sve.ssra.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
declare <vscale x 8 x i16> @llvm.aarch64.sve.ssra.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.ssra.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
declare <vscale x 2 x i64> @llvm.aarch64.sve.ssra.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)

declare <vscale x 16 x i8> @llvm.aarch64.sve.suqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.suqadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.suqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.suqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.uaba.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.uaba.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.uaba.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.uaba.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.uhadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.uhadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.uhadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.uhadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.uhsubr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.uhsubr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.uhsubr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.uqrshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.uqrshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.uqsubr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsubr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsubr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.urhadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.urhadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.urhadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.urhadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.urshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.urshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.urshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.urshr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, i32)
declare <vscale x 8 x i16> @llvm.aarch64.sve.urshr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.urshr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i32)
declare <vscale x 2 x i64> @llvm.aarch64.sve.urshr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i32)

declare <vscale x 4 x i32> @llvm.aarch64.sve.ursqrte.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.ursra.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
declare <vscale x 8 x i16> @llvm.aarch64.sve.ursra.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.ursra.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
declare <vscale x 2 x i64> @llvm.aarch64.sve.ursra.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)

declare <vscale x 16 x i8> @llvm.aarch64.sve.usqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.usqadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.usqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.usqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.usra.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
declare <vscale x 8 x i16> @llvm.aarch64.sve.usra.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.usra.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
declare <vscale x 2 x i64> @llvm.aarch64.sve.usra.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)

declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)