llvm/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -O2 -emit-llvm %s -o - | FileCheck %s

#include <lasxintrin.h>

// CHECK-LABEL: @xvsll_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __lasx_xvsll_b(_1, _2); }
// CHECK-LABEL: @xvsll_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __lasx_xvsll_h(_1, _2); }
// CHECK-LABEL: @xvsll_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __lasx_xvsll_w(_1, _2); }
// CHECK-LABEL: @xvsll_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __lasx_xvsll_d(_1, _2); }
// CHECK-LABEL: @xvslli_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvslli_b(v32i8 _1) { return __lasx_xvslli_b(_1, 1); }
// CHECK-LABEL: @xvslli_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvslli_h(v16i16 _1) { return __lasx_xvslli_h(_1, 1); }
// CHECK-LABEL: @xvslli_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvslli_w(v8i32 _1) { return __lasx_xvslli_w(_1, 1); }
// CHECK-LABEL: @xvslli_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvslli_d(v4i64 _1) { return __lasx_xvslli_d(_1, 1); }
// CHECK-LABEL: @xvsra_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __lasx_xvsra_b(_1, _2); }
// CHECK-LABEL: @xvsra_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __lasx_xvsra_h(_1, _2); }
// CHECK-LABEL: @xvsra_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __lasx_xvsra_w(_1, _2); }
// CHECK-LABEL: @xvsra_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __lasx_xvsra_d(_1, _2); }
// CHECK-LABEL: @xvsrai_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvsrai_b(v32i8 _1) { return __lasx_xvsrai_b(_1, 1); }
// CHECK-LABEL: @xvsrai_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvsrai_h(v16i16 _1) { return __lasx_xvsrai_h(_1, 1); }
// CHECK-LABEL: @xvsrai_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvsrai_w(v8i32 _1) { return __lasx_xvsrai_w(_1, 1); }
// CHECK-LABEL: @xvsrai_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsrai_d(v4i64 _1) { return __lasx_xvsrai_d(_1, 1); }
// CHECK-LABEL: @xvsrar_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrar_b(_1, _2); }
// CHECK-LABEL: @xvsrar_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrar_h(_1, _2); }
// CHECK-LABEL: @xvsrar_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrar_w(_1, _2); }
// CHECK-LABEL: @xvsrar_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrar_d(_1, _2); }
// CHECK-LABEL: @xvsrari_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvsrari_b(v32i8 _1) { return __lasx_xvsrari_b(_1, 1); }
// CHECK-LABEL: @xvsrari_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvsrari_h(v16i16 _1) { return __lasx_xvsrari_h(_1, 1); }
// CHECK-LABEL: @xvsrari_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvsrari_w(v8i32 _1) { return __lasx_xvsrari_w(_1, 1); }
// CHECK-LABEL: @xvsrari_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsrari_d(v4i64 _1) { return __lasx_xvsrari_d(_1, 1); }
// CHECK-LABEL: @xvsrl_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrl_b(_1, _2); }
// CHECK-LABEL: @xvsrl_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrl_h(_1, _2); }
// CHECK-LABEL: @xvsrl_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrl_w(_1, _2); }
// CHECK-LABEL: @xvsrl_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrl_d(_1, _2); }
// CHECK-LABEL: @xvsrli_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvsrli_b(v32i8 _1) { return __lasx_xvsrli_b(_1, 1); }
// CHECK-LABEL: @xvsrli_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvsrli_h(v16i16 _1) { return __lasx_xvsrli_h(_1, 1); }
// CHECK-LABEL: @xvsrli_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvsrli_w(v8i32 _1) { return __lasx_xvsrli_w(_1, 1); }
// CHECK-LABEL: @xvsrli_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsrli_d(v4i64 _1) { return __lasx_xvsrli_d(_1, 1); }
// CHECK-LABEL: @xvsrlr_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrlr_b(_1, _2); }
// CHECK-LABEL: @xvsrlr_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlr_h(_1, _2); }
// CHECK-LABEL: @xvsrlr_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlr_w(_1, _2); }
// CHECK-LABEL: @xvsrlr_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlr_d(_1, _2); }
// CHECK-LABEL: @xvsrlri_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvsrlri_b(v32i8 _1) { return __lasx_xvsrlri_b(_1, 1); }
// CHECK-LABEL: @xvsrlri_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvsrlri_h(v16i16 _1) { return __lasx_xvsrlri_h(_1, 1); }
// CHECK-LABEL: @xvsrlri_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvsrlri_w(v8i32 _1) { return __lasx_xvsrlri_w(_1, 1); }
// CHECK-LABEL: @xvsrlri_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsrlri_d(v4i64 _1) { return __lasx_xvsrlri_d(_1, 1); }
// CHECK-LABEL: @xvbitclr_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitclr_b(_1, _2); }
// CHECK-LABEL: @xvbitclr_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitclr_h(_1, _2); }
// CHECK-LABEL: @xvbitclr_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitclr_w(_1, _2); }
// CHECK-LABEL: @xvbitclr_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitclr_d(_1, _2); }
// CHECK-LABEL: @xvbitclri_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvbitclri_b(v32u8 _1) { return __lasx_xvbitclri_b(_1, 1); }
// CHECK-LABEL: @xvbitclri_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvbitclri_h(v16u16 _1) { return __lasx_xvbitclri_h(_1, 1); }
// CHECK-LABEL: @xvbitclri_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvbitclri_w(v8u32 _1) { return __lasx_xvbitclri_w(_1, 1); }
// CHECK-LABEL: @xvbitclri_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvbitclri_d(v4u64 _1) { return __lasx_xvbitclri_d(_1, 1); }
// CHECK-LABEL: @xvbitset_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitset_b(_1, _2); }
// CHECK-LABEL: @xvbitset_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitset_h(_1, _2); }
// CHECK-LABEL: @xvbitset_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitset_w(_1, _2); }
// CHECK-LABEL: @xvbitset_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitset_d(_1, _2); }
// CHECK-LABEL: @xvbitseti_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvbitseti_b(v32u8 _1) { return __lasx_xvbitseti_b(_1, 1); }
// CHECK-LABEL: @xvbitseti_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvbitseti_h(v16u16 _1) { return __lasx_xvbitseti_h(_1, 1); }
// CHECK-LABEL: @xvbitseti_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvbitseti_w(v8u32 _1) { return __lasx_xvbitseti_w(_1, 1); }
// CHECK-LABEL: @xvbitseti_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvbitseti_d(v4u64 _1) { return __lasx_xvbitseti_d(_1, 1); }
// CHECK-LABEL: @xvbitrev_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitrev_b(_1, _2); }
// CHECK-LABEL: @xvbitrev_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitrev_h(_1, _2); }
// CHECK-LABEL: @xvbitrev_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitrev_w(_1, _2); }
// CHECK-LABEL: @xvbitrev_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitrev_d(_1, _2); }
// CHECK-LABEL: @xvbitrevi_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvbitrevi_b(v32u8 _1) { return __lasx_xvbitrevi_b(_1, 1); }
// CHECK-LABEL: @xvbitrevi_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvbitrevi_h(v16u16 _1) { return __lasx_xvbitrevi_h(_1, 1); }
// CHECK-LABEL: @xvbitrevi_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvbitrevi_w(v8u32 _1) { return __lasx_xvbitrevi_w(_1, 1); }
// CHECK-LABEL: @xvbitrevi_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvbitrevi_d(v4u64 _1) { return __lasx_xvbitrevi_d(_1, 1); }
// CHECK-LABEL: @xvadd_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvadd_b(_1, _2); }
// CHECK-LABEL: @xvadd_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvadd_h(_1, _2); }
// CHECK-LABEL: @xvadd_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvadd_w(_1, _2); }
// CHECK-LABEL: @xvadd_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvadd_d(_1, _2); }
// CHECK-LABEL: @xvaddi_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvaddi_bu(v32i8 _1) { return __lasx_xvaddi_bu(_1, 1); }
// CHECK-LABEL: @xvaddi_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvaddi_hu(v16i16 _1) { return __lasx_xvaddi_hu(_1, 1); }
// CHECK-LABEL: @xvaddi_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvaddi_wu(v8i32 _1) { return __lasx_xvaddi_wu(_1, 1); }
// CHECK-LABEL: @xvaddi_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvaddi_du(v4i64 _1) { return __lasx_xvaddi_du(_1, 1); }
// CHECK-LABEL: @xvsub_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __lasx_xvsub_b(_1, _2); }
// CHECK-LABEL: @xvsub_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __lasx_xvsub_h(_1, _2); }
// CHECK-LABEL: @xvsub_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __lasx_xvsub_w(_1, _2); }
// CHECK-LABEL: @xvsub_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __lasx_xvsub_d(_1, _2); }
// CHECK-LABEL: @xvsubi_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvsubi_bu(v32i8 _1) { return __lasx_xvsubi_bu(_1, 1); }
// CHECK-LABEL: @xvsubi_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvsubi_hu(v16i16 _1) { return __lasx_xvsubi_hu(_1, 1); }
// CHECK-LABEL: @xvsubi_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvsubi_wu(v8i32 _1) { return __lasx_xvsubi_wu(_1, 1); }
// CHECK-LABEL: @xvsubi_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsubi_du(v4i64 _1) { return __lasx_xvsubi_du(_1, 1); }
// CHECK-LABEL: @xvmax_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __lasx_xvmax_b(_1, _2); }
// CHECK-LABEL: @xvmax_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __lasx_xvmax_h(_1, _2); }
// CHECK-LABEL: @xvmax_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __lasx_xvmax_w(_1, _2); }
// CHECK-LABEL: @xvmax_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __lasx_xvmax_d(_1, _2); }
// CHECK-LABEL: @xvmaxi_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvmaxi_b(v32i8 _1) { return __lasx_xvmaxi_b(_1, 1); }
// CHECK-LABEL: @xvmaxi_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvmaxi_h(v16i16 _1) { return __lasx_xvmaxi_h(_1, 1); }
// CHECK-LABEL: @xvmaxi_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvmaxi_w(v8i32 _1) { return __lasx_xvmaxi_w(_1, 1); }
// CHECK-LABEL: @xvmaxi_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmaxi_d(v4i64 _1) { return __lasx_xvmaxi_d(_1, 1); }
// CHECK-LABEL: @xvmax_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmax_bu(_1, _2); }
// CHECK-LABEL: @xvmax_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmax_hu(_1, _2); }
// CHECK-LABEL: @xvmax_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmax_wu(_1, _2); }
// CHECK-LABEL: @xvmax_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __lasx_xvmax_du(_1, _2); }
// CHECK-LABEL: @xvmaxi_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvmaxi_bu(v32u8 _1) { return __lasx_xvmaxi_bu(_1, 1); }
// CHECK-LABEL: @xvmaxi_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvmaxi_hu(v16u16 _1) { return __lasx_xvmaxi_hu(_1, 1); }
// CHECK-LABEL: @xvmaxi_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvmaxi_wu(v8u32 _1) { return __lasx_xvmaxi_wu(_1, 1); }
// CHECK-LABEL: @xvmaxi_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvmaxi_du(v4u64 _1) { return __lasx_xvmaxi_du(_1, 1); }
// CHECK-LABEL: @xvmin_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __lasx_xvmin_b(_1, _2); }
// CHECK-LABEL: @xvmin_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __lasx_xvmin_h(_1, _2); }
// CHECK-LABEL: @xvmin_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __lasx_xvmin_w(_1, _2); }
// CHECK-LABEL: @xvmin_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __lasx_xvmin_d(_1, _2); }
// CHECK-LABEL: @xvmini_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvmini_b(v32i8 _1) { return __lasx_xvmini_b(_1, 1); }
// CHECK-LABEL: @xvmini_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvmini_h(v16i16 _1) { return __lasx_xvmini_h(_1, 1); }
// CHECK-LABEL: @xvmini_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvmini_w(v8i32 _1) { return __lasx_xvmini_w(_1, 1); }
// CHECK-LABEL: @xvmini_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmini_d(v4i64 _1) { return __lasx_xvmini_d(_1, 1); }
// CHECK-LABEL: @xvmin_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmin_bu(_1, _2); }
// CHECK-LABEL: @xvmin_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmin_hu(_1, _2); }
// CHECK-LABEL: @xvmin_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmin_wu(_1, _2); }
// CHECK-LABEL: @xvmin_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __lasx_xvmin_du(_1, _2); }
// CHECK-LABEL: @xvmini_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvmini_bu(v32u8 _1) { return __lasx_xvmini_bu(_1, 1); }
// CHECK-LABEL: @xvmini_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvmini_hu(v16u16 _1) { return __lasx_xvmini_hu(_1, 1); }
// CHECK-LABEL: @xvmini_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvmini_wu(v8u32 _1) { return __lasx_xvmini_wu(_1, 1); }
// CHECK-LABEL: @xvmini_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvmini_du(v4u64 _1) { return __lasx_xvmini_du(_1, 1); }
// CHECK-LABEL: @xvseq_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __lasx_xvseq_b(_1, _2); }
// CHECK-LABEL: @xvseq_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __lasx_xvseq_h(_1, _2); }
// CHECK-LABEL: @xvseq_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __lasx_xvseq_w(_1, _2); }
// CHECK-LABEL: @xvseq_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __lasx_xvseq_d(_1, _2); }
// CHECK-LABEL: @xvseqi_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvseqi_b(v32i8 _1) { return __lasx_xvseqi_b(_1, 1); }
// CHECK-LABEL: @xvseqi_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvseqi_h(v16i16 _1) { return __lasx_xvseqi_h(_1, 1); }
// CHECK-LABEL: @xvseqi_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvseqi_w(v8i32 _1) { return __lasx_xvseqi_w(_1, 1); }
// CHECK-LABEL: @xvseqi_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvseqi_d(v4i64 _1) { return __lasx_xvseqi_d(_1, 1); }
// CHECK-LABEL: @xvslt_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __lasx_xvslt_b(_1, _2); }
// CHECK-LABEL: @xvslt_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __lasx_xvslt_h(_1, _2); }
// CHECK-LABEL: @xvslt_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __lasx_xvslt_w(_1, _2); }
// CHECK-LABEL: @xvslt_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __lasx_xvslt_d(_1, _2); }
// CHECK-LABEL: @xvslti_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvslti_b(v32i8 _1) { return __lasx_xvslti_b(_1, 1); }
// CHECK-LABEL: @xvslti_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvslti_h(v16i16 _1) { return __lasx_xvslti_h(_1, 1); }
// CHECK-LABEL: @xvslti_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvslti_w(v8i32 _1) { return __lasx_xvslti_w(_1, 1); }
// CHECK-LABEL: @xvslti_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvslti_d(v4i64 _1) { return __lasx_xvslti_d(_1, 1); }
// CHECK-LABEL: @xvslt_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __lasx_xvslt_bu(_1, _2); }
// CHECK-LABEL: @xvslt_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __lasx_xvslt_hu(_1, _2); }
// CHECK-LABEL: @xvslt_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __lasx_xvslt_wu(_1, _2); }
// CHECK-LABEL: @xvslt_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __lasx_xvslt_du(_1, _2); }
// CHECK-LABEL: @xvslti_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvslti_bu(v32u8 _1) { return __lasx_xvslti_bu(_1, 1); }
// CHECK-LABEL: @xvslti_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvslti_hu(v16u16 _1) { return __lasx_xvslti_hu(_1, 1); }
// CHECK-LABEL: @xvslti_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvslti_wu(v8u32 _1) { return __lasx_xvslti_wu(_1, 1); }
// CHECK-LABEL: @xvslti_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvslti_du(v4u64 _1) { return __lasx_xvslti_du(_1, 1); }
// CHECK-LABEL: @xvsle_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __lasx_xvsle_b(_1, _2); }
// CHECK-LABEL: @xvsle_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __lasx_xvsle_h(_1, _2); }
// CHECK-LABEL: @xvsle_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __lasx_xvsle_w(_1, _2); }
// CHECK-LABEL: @xvsle_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __lasx_xvsle_d(_1, _2); }
// CHECK-LABEL: @xvslei_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvslei_b(v32i8 _1) { return __lasx_xvslei_b(_1, 1); }
// CHECK-LABEL: @xvslei_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvslei_h(v16i16 _1) { return __lasx_xvslei_h(_1, 1); }
// CHECK-LABEL: @xvslei_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvslei_w(v8i32 _1) { return __lasx_xvslei_w(_1, 1); }
// CHECK-LABEL: @xvslei_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvslei_d(v4i64 _1) { return __lasx_xvslei_d(_1, 1); }
// CHECK-LABEL: @xvsle_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsle_bu(_1, _2); }
// CHECK-LABEL: @xvsle_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsle_hu(_1, _2); }
// CHECK-LABEL: @xvsle_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsle_wu(_1, _2); }
// CHECK-LABEL: @xvsle_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __lasx_xvsle_du(_1, _2); }
// CHECK-LABEL: @xvslei_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvslei_bu(v32u8 _1) { return __lasx_xvslei_bu(_1, 1); }
// CHECK-LABEL: @xvslei_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvslei_hu(v16u16 _1) { return __lasx_xvslei_hu(_1, 1); }
// CHECK-LABEL: @xvslei_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvslei_wu(v8u32 _1) { return __lasx_xvslei_wu(_1, 1); }
// CHECK-LABEL: @xvslei_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvslei_du(v4u64 _1) { return __lasx_xvslei_du(_1, 1); }
// CHECK-LABEL: @xvsat_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvsat_b(v32i8 _1) { return __lasx_xvsat_b(_1, 1); }
// CHECK-LABEL: @xvsat_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvsat_h(v16i16 _1) { return __lasx_xvsat_h(_1, 1); }
// CHECK-LABEL: @xvsat_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvsat_w(v8i32 _1) { return __lasx_xvsat_w(_1, 1); }
// CHECK-LABEL: @xvsat_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsat_d(v4i64 _1) { return __lasx_xvsat_d(_1, 1); }
// CHECK-LABEL: @xvsat_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvsat_bu(v32u8 _1) { return __lasx_xvsat_bu(_1, 1); }
// CHECK-LABEL: @xvsat_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvsat_hu(v16u16 _1) { return __lasx_xvsat_hu(_1, 1); }
// CHECK-LABEL: @xvsat_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvsat_wu(v8u32 _1) { return __lasx_xvsat_wu(_1, 1); }
// CHECK-LABEL: @xvsat_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvsat_du(v4u64 _1) { return __lasx_xvsat_du(_1, 1); }
// CHECK-LABEL: @xvadda_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __lasx_xvadda_b(_1, _2); }
// CHECK-LABEL: @xvadda_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __lasx_xvadda_h(_1, _2); }
// CHECK-LABEL: @xvadda_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __lasx_xvadda_w(_1, _2); }
// CHECK-LABEL: @xvadda_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __lasx_xvadda_d(_1, _2); }
// CHECK-LABEL: @xvsadd_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvsadd_b(_1, _2); }
// CHECK-LABEL: @xvsadd_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvsadd_h(_1, _2); }
// CHECK-LABEL: @xvsadd_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvsadd_w(_1, _2); }
// CHECK-LABEL: @xvsadd_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvsadd_d(_1, _2); }
// CHECK-LABEL: @xvsadd_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsadd_bu(_1, _2); }
// CHECK-LABEL: @xvsadd_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsadd_hu(_1, _2); }
// CHECK-LABEL: @xvsadd_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsadd_wu(_1, _2); }
// CHECK-LABEL: @xvsadd_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __lasx_xvsadd_du(_1, _2); }
// CHECK-LABEL: @xvavg_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __lasx_xvavg_b(_1, _2); }
// CHECK-LABEL: @xvavg_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __lasx_xvavg_h(_1, _2); }
// CHECK-LABEL: @xvavg_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __lasx_xvavg_w(_1, _2); }
// CHECK-LABEL: @xvavg_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __lasx_xvavg_d(_1, _2); }
// CHECK-LABEL: @xvavg_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavg_bu(_1, _2); }
// CHECK-LABEL: @xvavg_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavg_hu(_1, _2); }
// CHECK-LABEL: @xvavg_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavg_wu(_1, _2); }
// CHECK-LABEL: @xvavg_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __lasx_xvavg_du(_1, _2); }
// CHECK-LABEL: @xvavgr_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __lasx_xvavgr_b(_1, _2); }
// CHECK-LABEL: @xvavgr_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __lasx_xvavgr_h(_1, _2); }
// CHECK-LABEL: @xvavgr_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __lasx_xvavgr_w(_1, _2); }
// CHECK-LABEL: @xvavgr_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __lasx_xvavgr_d(_1, _2); }
// CHECK-LABEL: @xvavgr_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavgr_bu(_1, _2); }
// CHECK-LABEL: @xvavgr_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavgr_hu(_1, _2); }
// CHECK-LABEL: @xvavgr_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavgr_wu(_1, _2); }
// CHECK-LABEL: @xvavgr_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __lasx_xvavgr_du(_1, _2); }
// CHECK-LABEL: @xvssub_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __lasx_xvssub_b(_1, _2); }
// CHECK-LABEL: @xvssub_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __lasx_xvssub_h(_1, _2); }
// CHECK-LABEL: @xvssub_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __lasx_xvssub_w(_1, _2); }
// CHECK-LABEL: @xvssub_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __lasx_xvssub_d(_1, _2); }
// CHECK-LABEL: @xvssub_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __lasx_xvssub_bu(_1, _2); }
// CHECK-LABEL: @xvssub_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __lasx_xvssub_hu(_1, _2); }
// CHECK-LABEL: @xvssub_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __lasx_xvssub_wu(_1, _2); }
// CHECK-LABEL: @xvssub_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __lasx_xvssub_du(_1, _2); }
// CHECK-LABEL: @xvabsd_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __lasx_xvabsd_b(_1, _2); }
// CHECK-LABEL: @xvabsd_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __lasx_xvabsd_h(_1, _2); }
// CHECK-LABEL: @xvabsd_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __lasx_xvabsd_w(_1, _2); }
// CHECK-LABEL: @xvabsd_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __lasx_xvabsd_d(_1, _2); }
// CHECK-LABEL: @xvabsd_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvabsd_bu(_1, _2); }
// CHECK-LABEL: @xvabsd_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvabsd_hu(_1, _2); }
// CHECK-LABEL: @xvabsd_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvabsd_wu(_1, _2); }
// CHECK-LABEL: @xvabsd_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __lasx_xvabsd_du(_1, _2); }
// CHECK-LABEL: @xvmul_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __lasx_xvmul_b(_1, _2); }
// CHECK-LABEL: @xvmul_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __lasx_xvmul_h(_1, _2); }
// CHECK-LABEL: @xvmul_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __lasx_xvmul_w(_1, _2); }
// CHECK-LABEL: @xvmul_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __lasx_xvmul_d(_1, _2); }
// CHECK-LABEL: @xvmadd_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
// CHECK-NEXT:    store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmadd_b(_1, _2, _3); }
// CHECK-LABEL: @xvmadd_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmadd_h(_1, _2, _3); }
// CHECK-LABEL: @xvmadd_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]])
// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmadd_w(_1, _2, _3); }
// CHECK-LABEL: @xvmadd_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmadd_d(_1, _2, _3); }
// CHECK-LABEL: @xvmsub_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
// CHECK-NEXT:    store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmsub_b(_1, _2, _3); }
// CHECK-LABEL: @xvmsub_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmsub_h(_1, _2, _3); }
// CHECK-LABEL: @xvmsub_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]])
// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmsub_w(_1, _2, _3); }
// CHECK-LABEL: @xvmsub_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmsub_d(_1, _2, _3); }
// CHECK-LABEL: @xvdiv_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __lasx_xvdiv_b(_1, _2); }
// CHECK-LABEL: @xvdiv_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __lasx_xvdiv_h(_1, _2); }
// CHECK-LABEL: @xvdiv_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __lasx_xvdiv_w(_1, _2); }
// CHECK-LABEL: @xvdiv_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __lasx_xvdiv_d(_1, _2); }
// CHECK-LABEL: @xvdiv_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __lasx_xvdiv_bu(_1, _2); }
// CHECK-LABEL: @xvdiv_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __lasx_xvdiv_hu(_1, _2); }
// CHECK-LABEL: @xvdiv_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __lasx_xvdiv_wu(_1, _2); }
// CHECK-LABEL: @xvdiv_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __lasx_xvdiv_du(_1, _2); }
// CHECK-LABEL: @xvhaddw_h_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhaddw_h_b(_1, _2); }
// CHECK-LABEL: @xvhaddw_w_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhaddw_w_h(_1, _2); }
// CHECK-LABEL: @xvhaddw_d_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhaddw_d_w(_1, _2); }
// CHECK-LABEL: @xvhaddw_hu_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhaddw_hu_bu(_1, _2); }
// CHECK-LABEL: @xvhaddw_wu_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhaddw_wu_hu(_1, _2); }
// CHECK-LABEL: @xvhaddw_du_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhaddw_du_wu(_1, _2); }
// CHECK-LABEL: @xvhsubw_h_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhsubw_h_b(_1, _2); }
// CHECK-LABEL: @xvhsubw_w_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhsubw_w_h(_1, _2); }
// CHECK-LABEL: @xvhsubw_d_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhsubw_d_w(_1, _2); }
// CHECK-LABEL: @xvhsubw_hu_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhsubw_hu_bu(_1, _2); }
// CHECK-LABEL: @xvhsubw_wu_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhsubw_wu_hu(_1, _2); }
// CHECK-LABEL: @xvhsubw_du_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhsubw_du_wu(_1, _2); }
// CHECK-LABEL: @xvmod_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __lasx_xvmod_b(_1, _2); }
// CHECK-LABEL: @xvmod_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __lasx_xvmod_h(_1, _2); }
// CHECK-LABEL: @xvmod_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __lasx_xvmod_w(_1, _2); }
// CHECK-LABEL: @xvmod_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __lasx_xvmod_d(_1, _2); }
// CHECK-LABEL: @xvmod_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmod_bu(_1, _2); }
// CHECK-LABEL: @xvmod_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmod_hu(_1, _2); }
// CHECK-LABEL: @xvmod_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmod_wu(_1, _2); }
// CHECK-LABEL: @xvmod_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __lasx_xvmod_du(_1, _2); }
// CHECK-LABEL: @xvrepl128vei_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvrepl128vei_b(v32i8 _1) { return __lasx_xvrepl128vei_b(_1, 1); }
// CHECK-LABEL: @xvrepl128vei_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvrepl128vei_h(v16i16 _1) { return __lasx_xvrepl128vei_h(_1, 1); }
// CHECK-LABEL: @xvrepl128vei_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvrepl128vei_w(v8i32 _1) { return __lasx_xvrepl128vei_w(_1, 1); }
// CHECK-LABEL: @xvrepl128vei_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvrepl128vei_d(v4i64 _1) { return __lasx_xvrepl128vei_d(_1, 1); }
// CHECK-LABEL: @xvpickev_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickev_b(_1, _2); }
// CHECK-LABEL: @xvpickev_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickev_h(_1, _2); }
// CHECK-LABEL: @xvpickev_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickev_w(_1, _2); }
// CHECK-LABEL: @xvpickev_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickev_d(_1, _2); }
// CHECK-LABEL: @xvpickod_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickod_b(_1, _2); }
// CHECK-LABEL: @xvpickod_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickod_h(_1, _2); }
// CHECK-LABEL: @xvpickod_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickod_w(_1, _2); }
// CHECK-LABEL: @xvpickod_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickod_d(_1, _2); }
// CHECK-LABEL: @xvilvh_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvh_b(_1, _2); }
// CHECK-LABEL: @xvilvh_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvh_h(_1, _2); }
// CHECK-LABEL: @xvilvh_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvh_w(_1, _2); }
// CHECK-LABEL: @xvilvh_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvh_d(_1, _2); }
// CHECK-LABEL: @xvilvl_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvl_b(_1, _2); }
// CHECK-LABEL: @xvilvl_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvl_h(_1, _2); }
// CHECK-LABEL: @xvilvl_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvl_w(_1, _2); }
// CHECK-LABEL: @xvilvl_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvl_d(_1, _2); }
// CHECK-LABEL: @xvpackev_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackev_b(_1, _2); }
// CHECK-LABEL: @xvpackev_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackev_h(_1, _2); }
// CHECK-LABEL: @xvpackev_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackev_w(_1, _2); }
// CHECK-LABEL: @xvpackev_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackev_d(_1, _2); }
// CHECK-LABEL: @xvpackod_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackod_b(_1, _2); }
// CHECK-LABEL: @xvpackod_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackod_h(_1, _2); }
// CHECK-LABEL: @xvpackod_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackod_w(_1, _2); }
// CHECK-LABEL: @xvpackod_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackod_d(_1, _2); }
// CHECK-LABEL: @xvshuf_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
// CHECK-NEXT:    store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvshuf_b(_1, _2, _3); }
// CHECK-LABEL: @xvshuf_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvshuf_h(_1, _2, _3); }
// CHECK-LABEL: @xvshuf_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]])
// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvshuf_w(_1, _2, _3); }
// CHECK-LABEL: @xvshuf_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvshuf_d(_1, _2, _3); }
// CHECK-LABEL: @xvand_v(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __lasx_xvand_v(_1, _2); }
// CHECK-LABEL: @xvandi_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvandi_b(v32u8 _1) { return __lasx_xvandi_b(_1, 1); }
// CHECK-LABEL: @xvor_v(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __lasx_xvor_v(_1, _2); }
// CHECK-LABEL: @xvori_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvori_b(v32u8 _1) { return __lasx_xvori_b(_1, 1); }
// CHECK-LABEL: @xvnor_v(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __lasx_xvnor_v(_1, _2); }
// CHECK-LABEL: @xvnori_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvnori_b(v32u8 _1) { return __lasx_xvnori_b(_1, 1); }
// CHECK-LABEL: @xvxor_v(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __lasx_xvxor_v(_1, _2); }
// CHECK-LABEL: @xvxori_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvxori_b(v32u8 _1) { return __lasx_xvxori_b(_1, 1); }
// CHECK-LABEL: @xvbitsel_v(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
// CHECK-NEXT:    store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __lasx_xvbitsel_v(_1, _2, _3); }
// CHECK-LABEL: @xvbitseli_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitseli_b(_1, _2, 1); }
// CHECK-LABEL: @xvshuf4i_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvshuf4i_b(v32i8 _1) { return __lasx_xvshuf4i_b(_1, 1); }
// CHECK-LABEL: @xvshuf4i_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvshuf4i_h(v16i16 _1) { return __lasx_xvshuf4i_h(_1, 1); }
// CHECK-LABEL: @xvshuf4i_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvshuf4i_w(v8i32 _1) { return __lasx_xvshuf4i_w(_1, 1); }
// CHECK-LABEL: @xvreplgr2vr_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1:%.*]])
// CHECK-NEXT:    store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvreplgr2vr_b(int _1) { return __lasx_xvreplgr2vr_b(_1); }
// CHECK-LABEL: @xvreplgr2vr_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1:%.*]])
// CHECK-NEXT:    store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvreplgr2vr_h(int _1) { return __lasx_xvreplgr2vr_h(_1); }
// CHECK-LABEL: @xvreplgr2vr_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1:%.*]])
// CHECK-NEXT:    store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvreplgr2vr_w(int _1) { return __lasx_xvreplgr2vr_w(_1); }
// CHECK-LABEL: @xvreplgr2vr_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[_1:%.*]] to i64
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 [[CONV]])
// CHECK-NEXT:    store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvreplgr2vr_d(int _1) { return __lasx_xvreplgr2vr_d(_1); }
// CHECK-LABEL: @xvpcnt_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_112]])
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvpcnt_b(v32i8 _1) { return __lasx_xvpcnt_b(_1); }
// CHECK-LABEL: @xvpcnt_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_112]])
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvpcnt_h(v16i16 _1) { return __lasx_xvpcnt_h(_1); }
// CHECK-LABEL: @xvpcnt_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_112]])
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvpcnt_w(v8i32 _1) { return __lasx_xvpcnt_w(_1); }
// CHECK-LABEL: @xvpcnt_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvpcnt_d(v4i64 _1) { return __lasx_xvpcnt_d(_1); }
// CHECK-LABEL: @xvclo_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_112]])
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvclo_b(v32i8 _1) { return __lasx_xvclo_b(_1); }
// CHECK-LABEL: @xvclo_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_112]])
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvclo_h(v16i16 _1) { return __lasx_xvclo_h(_1); }
// CHECK-LABEL: @xvclo_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_112]])
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvclo_w(v8i32 _1) { return __lasx_xvclo_w(_1); }
// CHECK-LABEL: @xvclo_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvclo_d(v4i64 _1) { return __lasx_xvclo_d(_1); }
// CHECK-LABEL: @xvclz_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_112]])
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvclz_b(v32i8 _1) { return __lasx_xvclz_b(_1); }
// CHECK-LABEL: @xvclz_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_112]])
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvclz_h(v16i16 _1) { return __lasx_xvclz_h(_1); }
// CHECK-LABEL: @xvclz_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_112]])
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvclz_w(v8i32 _1) { return __lasx_xvclz_w(_1); }
// CHECK-LABEL: @xvclz_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvclz_d(v4i64 _1) { return __lasx_xvclz_d(_1); }
// CHECK-LABEL: @xvfadd_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __lasx_xvfadd_s(_1, _2); }
// CHECK-LABEL: @xvfadd_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __lasx_xvfadd_d(_1, _2); }
// CHECK-LABEL: @xvfsub_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __lasx_xvfsub_s(_1, _2); }
// CHECK-LABEL: @xvfsub_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __lasx_xvfsub_d(_1, _2); }
// CHECK-LABEL: @xvfmul_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmul_s(_1, _2); }
// CHECK-LABEL: @xvfmul_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmul_d(_1, _2); }
// CHECK-LABEL: @xvfdiv_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __lasx_xvfdiv_s(_1, _2); }
// CHECK-LABEL: @xvfdiv_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __lasx_xvfdiv_d(_1, _2); }
// CHECK-LABEL: @xvfcvt_h_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcvt_h_s(_1, _2); }
// CHECK-LABEL: @xvfcvt_s_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcvt_s_d(_1, _2); }
// CHECK-LABEL: @xvfmin_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmin_s(_1, _2); }
// CHECK-LABEL: @xvfmin_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmin_d(_1, _2); }
// CHECK-LABEL: @xvfmina_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmina_s(_1, _2); }
// CHECK-LABEL: @xvfmina_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmina_d(_1, _2); }
// CHECK-LABEL: @xvfmax_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmax_s(_1, _2); }
// CHECK-LABEL: @xvfmax_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmax_d(_1, _2); }
// CHECK-LABEL: @xvfmaxa_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmaxa_s(_1, _2); }
// CHECK-LABEL: @xvfmaxa_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmaxa_d(_1, _2); }
// CHECK-LABEL: @xvfclass_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1]])
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvfclass_s(v8f32 _1) { return __lasx_xvfclass_s(_1); }
// CHECK-LABEL: @xvfclass_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvfclass_d(v4f64 _1) { return __lasx_xvfclass_d(_1); }
// CHECK-LABEL: @xvfsqrt_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1]])
// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8f32 xvfsqrt_s(v8f32 _1) { return __lasx_xvfsqrt_s(_1); }
// CHECK-LABEL: @xvfsqrt_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1]])
// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4f64 xvfsqrt_d(v4f64 _1) { return __lasx_xvfsqrt_d(_1); }
// CHECK-LABEL: @xvfrecip_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1]])
// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8f32 xvfrecip_s(v8f32 _1) { return __lasx_xvfrecip_s(_1); }
// CHECK-LABEL: @xvfrecip_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1]])
// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4f64 xvfrecip_d(v4f64 _1) { return __lasx_xvfrecip_d(_1); }
// CHECK-LABEL: @xvfrint_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1]])
// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8f32 xvfrint_s(v8f32 _1) { return __lasx_xvfrint_s(_1); }
// CHECK-LABEL: @xvfrint_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1]])
// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4f64 xvfrint_d(v4f64 _1) { return __lasx_xvfrint_d(_1); }
// CHECK-LABEL: @xvfrsqrt_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1]])
// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8f32 xvfrsqrt_s(v8f32 _1) { return __lasx_xvfrsqrt_s(_1); }
// CHECK-LABEL: @xvfrsqrt_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1]])
// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4f64 xvfrsqrt_d(v4f64 _1) { return __lasx_xvfrsqrt_d(_1); }
// CHECK-LABEL: @xvflogb_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1]])
// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8f32 xvflogb_s(v8f32 _1) { return __lasx_xvflogb_s(_1); }
// CHECK-LABEL: @xvflogb_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1]])
// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4f64 xvflogb_d(v4f64 _1) { return __lasx_xvflogb_d(_1); }
// CHECK-LABEL: @xvfcvth_s_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_112]])
// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8f32 xvfcvth_s_h(v16i16 _1) { return __lasx_xvfcvth_s_h(_1); }
// CHECK-LABEL: @xvfcvth_d_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1]])
// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4f64 xvfcvth_d_s(v8f32 _1) { return __lasx_xvfcvth_d_s(_1); }
// CHECK-LABEL: @xvfcvtl_s_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_112]])
// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8f32 xvfcvtl_s_h(v16i16 _1) { return __lasx_xvfcvtl_s_h(_1); }
// CHECK-LABEL: @xvfcvtl_d_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1]])
// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4f64 xvfcvtl_d_s(v8f32 _1) { return __lasx_xvfcvtl_d_s(_1); }
// CHECK-LABEL: @xvftint_w_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1]])
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvftint_w_s(v8f32 _1) { return __lasx_xvftint_w_s(_1); }
// CHECK-LABEL: @xvftint_l_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvftint_l_d(v4f64 _1) { return __lasx_xvftint_l_d(_1); }
// CHECK-LABEL: @xvftint_wu_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1]])
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvftint_wu_s(v8f32 _1) { return __lasx_xvftint_wu_s(_1); }
// CHECK-LABEL: @xvftint_lu_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvftint_lu_d(v4f64 _1) { return __lasx_xvftint_lu_d(_1); }
// CHECK-LABEL: @xvftintrz_w_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1]])
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvftintrz_w_s(v8f32 _1) { return __lasx_xvftintrz_w_s(_1); }
// CHECK-LABEL: @xvftintrz_l_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvftintrz_l_d(v4f64 _1) { return __lasx_xvftintrz_l_d(_1); }
// CHECK-LABEL: @xvftintrz_wu_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1]])
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvftintrz_wu_s(v8f32 _1) { return __lasx_xvftintrz_wu_s(_1); }
// CHECK-LABEL: @xvftintrz_lu_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvftintrz_lu_d(v4f64 _1) { return __lasx_xvftintrz_lu_d(_1); }
// CHECK-LABEL: @xvffint_s_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_112]])
// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8f32 xvffint_s_w(v8i32 _1) { return __lasx_xvffint_s_w(_1); }
// CHECK-LABEL: @xvffint_d_l(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1]])
// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4f64 xvffint_d_l(v4i64 _1) { return __lasx_xvffint_d_l(_1); }
// CHECK-LABEL: @xvffint_s_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_112]])
// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8f32 xvffint_s_wu(v8u32 _1) { return __lasx_xvffint_s_wu(_1); }
// CHECK-LABEL: @xvffint_d_lu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1]])
// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4f64 xvffint_d_lu(v4u64 _1) { return __lasx_xvffint_d_lu(_1); }
// CHECK-LABEL: @xvreplve_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_112]], i32 [[_2:%.*]])
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvreplve_b(v32i8 _1, int _2) { return __lasx_xvreplve_b(_1, _2); }
// CHECK-LABEL: @xvreplve_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_112]], i32 [[_2:%.*]])
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvreplve_h(v16i16 _1, int _2) { return __lasx_xvreplve_h(_1, _2); }
// CHECK-LABEL: @xvreplve_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_112]], i32 [[_2:%.*]])
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvreplve_w(v8i32 _1, int _2) { return __lasx_xvreplve_w(_1, _2); }
// CHECK-LABEL: @xvreplve_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1]], i32 [[_2:%.*]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvreplve_d(v4i64 _1, int _2) { return __lasx_xvreplve_d(_1, _2); }
// CHECK-LABEL: @xvpermi_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __lasx_xvpermi_w(_1, _2, 1); }
// CHECK-LABEL: @xvandn_v(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __lasx_xvandn_v(_1, _2); }
// CHECK-LABEL: @xvneg_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_112]])
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvneg_b(v32i8 _1) { return __lasx_xvneg_b(_1); }
// CHECK-LABEL: @xvneg_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_112]])
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvneg_h(v16i16 _1) { return __lasx_xvneg_h(_1); }
// CHECK-LABEL: @xvneg_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_112]])
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvneg_w(v8i32 _1) { return __lasx_xvneg_w(_1); }
// CHECK-LABEL: @xvneg_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvneg_d(v4i64 _1) { return __lasx_xvneg_d(_1); }
// CHECK-LABEL: @xvmuh_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __lasx_xvmuh_b(_1, _2); }
// CHECK-LABEL: @xvmuh_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __lasx_xvmuh_h(_1, _2); }
// CHECK-LABEL: @xvmuh_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __lasx_xvmuh_w(_1, _2); }
// CHECK-LABEL: @xvmuh_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __lasx_xvmuh_d(_1, _2); }
// CHECK-LABEL: @xvmuh_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmuh_bu(_1, _2); }
// CHECK-LABEL: @xvmuh_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmuh_hu(_1, _2); }
// CHECK-LABEL: @xvmuh_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmuh_wu(_1, _2); }
// CHECK-LABEL: @xvmuh_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __lasx_xvmuh_du(_1, _2); }
// CHECK-LABEL: @xvsllwil_h_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvsllwil_h_b(v32i8 _1) { return __lasx_xvsllwil_h_b(_1, 1); }
// CHECK-LABEL: @xvsllwil_w_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvsllwil_w_h(v16i16 _1) { return __lasx_xvsllwil_w_h(_1, 1); }
// CHECK-LABEL: @xvsllwil_d_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsllwil_d_w(v8i32 _1) { return __lasx_xvsllwil_d_w(_1, 1); }
// CHECK-LABEL: @xvsllwil_hu_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvsllwil_hu_bu(v32u8 _1) { return __lasx_xvsllwil_hu_bu(_1, 1); }
// CHECK-LABEL: @xvsllwil_wu_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvsllwil_wu_hu(v16u16 _1) { return __lasx_xvsllwil_wu_hu(_1, 1); }
// CHECK-LABEL: @xvsllwil_du_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvsllwil_du_wu(v8u32 _1) { return __lasx_xvsllwil_du_wu(_1, 1); }
// CHECK-LABEL: @xvsran_b_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsran_b_h(_1, _2); }
// CHECK-LABEL: @xvsran_h_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsran_h_w(_1, _2); }
// CHECK-LABEL: @xvsran_w_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsran_w_d(_1, _2); }
// CHECK-LABEL: @xvssran_b_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssran_b_h(_1, _2); }
// CHECK-LABEL: @xvssran_h_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssran_h_w(_1, _2); }
// CHECK-LABEL: @xvssran_w_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssran_w_d(_1, _2); }
// CHECK-LABEL: @xvssran_bu_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssran_bu_h(_1, _2); }
// CHECK-LABEL: @xvssran_hu_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssran_hu_w(_1, _2); }
// CHECK-LABEL: @xvssran_wu_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssran_wu_d(_1, _2); }
// CHECK-LABEL: @xvsrarn_b_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrarn_b_h(_1, _2); }
// CHECK-LABEL: @xvsrarn_h_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrarn_h_w(_1, _2); }
// CHECK-LABEL: @xvsrarn_w_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrarn_w_d(_1, _2); }
// CHECK-LABEL: @xvssrarn_b_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrarn_b_h(_1, _2); }
// CHECK-LABEL: @xvssrarn_h_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrarn_h_w(_1, _2); }
// CHECK-LABEL: @xvssrarn_w_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrarn_w_d(_1, _2); }
// CHECK-LABEL: @xvssrarn_bu_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrarn_bu_h(_1, _2); }
// CHECK-LABEL: @xvssrarn_hu_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrarn_hu_w(_1, _2); }
// CHECK-LABEL: @xvssrarn_wu_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrarn_wu_d(_1, _2); }
// CHECK-LABEL: @xvsrln_b_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrln_b_h(_1, _2); }
// CHECK-LABEL: @xvsrln_h_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrln_h_w(_1, _2); }
// CHECK-LABEL: @xvsrln_w_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrln_w_d(_1, _2); }
// CHECK-LABEL: @xvssrln_bu_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrln_bu_h(_1, _2); }
// CHECK-LABEL: @xvssrln_hu_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrln_hu_w(_1, _2); }
// CHECK-LABEL: @xvssrln_wu_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrln_wu_d(_1, _2); }
// CHECK-LABEL: @xvsrlrn_b_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrn_b_h(_1, _2); }
// CHECK-LABEL: @xvsrlrn_h_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrn_h_w(_1, _2); }
// CHECK-LABEL: @xvsrlrn_w_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrn_w_d(_1, _2); }
// CHECK-LABEL: @xvssrlrn_bu_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrlrn_bu_h(_1, _2); }
// CHECK-LABEL: @xvssrlrn_hu_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrlrn_hu_w(_1, _2); }
// CHECK-LABEL: @xvssrlrn_wu_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrlrn_wu_d(_1, _2); }
// CHECK-LABEL: @xvfrstpi_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __lasx_xvfrstpi_b(_1, _2, 1); }
// CHECK-LABEL: @xvfrstpi_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __lasx_xvfrstpi_h(_1, _2, 1); }
// CHECK-LABEL: @xvfrstp_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
// CHECK-NEXT:    store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvfrstp_b(_1, _2, _3); }
// CHECK-LABEL: @xvfrstp_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvfrstp_h(_1, _2, _3); }
// CHECK-LABEL: @xvshuf4i_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __lasx_xvshuf4i_d(_1, _2, 1); }
// CHECK-LABEL: @xvbsrl_v(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvbsrl_v(v32i8 _1) { return __lasx_xvbsrl_v(_1, 1); }
// CHECK-LABEL: @xvbsll_v(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvbsll_v(v32i8 _1) { return __lasx_xvbsll_v(_1, 1); }
// CHECK-LABEL: @xvextrins_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __lasx_xvextrins_b(_1, _2, 1); }
// CHECK-LABEL: @xvextrins_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __lasx_xvextrins_h(_1, _2, 1); }
// CHECK-LABEL: @xvextrins_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __lasx_xvextrins_w(_1, _2, 1); }
// CHECK-LABEL: @xvextrins_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __lasx_xvextrins_d(_1, _2, 1); }
// CHECK-LABEL: @xvmskltz_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_112]])
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvmskltz_b(v32i8 _1) { return __lasx_xvmskltz_b(_1); }
// CHECK-LABEL: @xvmskltz_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_112]])
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvmskltz_h(v16i16 _1) { return __lasx_xvmskltz_h(_1); }
// CHECK-LABEL: @xvmskltz_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_112]])
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvmskltz_w(v8i32 _1) { return __lasx_xvmskltz_w(_1); }
// CHECK-LABEL: @xvmskltz_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmskltz_d(v4i64 _1) { return __lasx_xvmskltz_d(_1); }
// CHECK-LABEL: @xvsigncov_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __lasx_xvsigncov_b(_1, _2); }
// CHECK-LABEL: @xvsigncov_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __lasx_xvsigncov_h(_1, _2); }
// CHECK-LABEL: @xvsigncov_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __lasx_xvsigncov_w(_1, _2); }
// CHECK-LABEL: @xvsigncov_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __lasx_xvsigncov_d(_1, _2); }
// CHECK-LABEL: @xvfmadd_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]])
// CHECK-NEXT:    store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmadd_s(_1, _2, _3); }
// CHECK-LABEL: @xvfmadd_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]])
// CHECK-NEXT:    store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmadd_d(_1, _2, _3); }
// CHECK-LABEL: @xvfmsub_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]])
// CHECK-NEXT:    store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmsub_s(_1, _2, _3); }
// CHECK-LABEL: @xvfmsub_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]])
// CHECK-NEXT:    store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmsub_d(_1, _2, _3); }
// CHECK-LABEL: @xvfnmadd_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]])
// CHECK-NEXT:    store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmadd_s(_1, _2, _3); }
// CHECK-LABEL: @xvfnmadd_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]])
// CHECK-NEXT:    store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmadd_d(_1, _2, _3); }
// CHECK-LABEL: @xvfnmsub_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]])
// CHECK-NEXT:    store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmsub_s(_1, _2, _3); }
// CHECK-LABEL: @xvfnmsub_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]])
// CHECK-NEXT:    store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmsub_d(_1, _2, _3); }
// CHECK-LABEL: @xvftintrne_w_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1]])
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvftintrne_w_s(v8f32 _1) { return __lasx_xvftintrne_w_s(_1); }
// CHECK-LABEL: @xvftintrne_l_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvftintrne_l_d(v4f64 _1) { return __lasx_xvftintrne_l_d(_1); }
// CHECK-LABEL: @xvftintrp_w_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1]])
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvftintrp_w_s(v8f32 _1) { return __lasx_xvftintrp_w_s(_1); }
// CHECK-LABEL: @xvftintrp_l_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvftintrp_l_d(v4f64 _1) { return __lasx_xvftintrp_l_d(_1); }
// CHECK-LABEL: @xvftintrm_w_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1]])
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvftintrm_w_s(v8f32 _1) { return __lasx_xvftintrm_w_s(_1); }
// CHECK-LABEL: @xvftintrm_l_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvftintrm_l_d(v4f64 _1) { return __lasx_xvftintrm_l_d(_1); }
// CHECK-LABEL: @xvftint_w_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftint_w_d(_1, _2); }
// CHECK-LABEL: @xvffint_s_l(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __lasx_xvffint_s_l(_1, _2); }
// CHECK-LABEL: @xvftintrz_w_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrz_w_d(_1, _2); }
// CHECK-LABEL: @xvftintrp_w_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrp_w_d(_1, _2); }
// CHECK-LABEL: @xvftintrm_w_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrm_w_d(_1, _2); }
// CHECK-LABEL: @xvftintrne_w_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrne_w_d(_1, _2); }
// CHECK-LABEL: @xvftinth_l_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvftinth_l_s(v8f32 _1) { return __lasx_xvftinth_l_s(_1); }
// CHECK-LABEL: @xvftintl_l_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvftintl_l_s(v8f32 _1) { return __lasx_xvftintl_l_s(_1); }
// CHECK-LABEL: @xvffinth_d_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_112]])
// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4f64 xvffinth_d_w(v8i32 _1) { return __lasx_xvffinth_d_w(_1); }
// CHECK-LABEL: @xvffintl_d_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_112]])
// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4f64 xvffintl_d_w(v8i32 _1) { return __lasx_xvffintl_d_w(_1); }
// CHECK-LABEL: @xvftintrzh_l_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvftintrzh_l_s(v8f32 _1) { return __lasx_xvftintrzh_l_s(_1); }
// CHECK-LABEL: @xvftintrzl_l_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvftintrzl_l_s(v8f32 _1) { return __lasx_xvftintrzl_l_s(_1); }
// CHECK-LABEL: @xvftintrph_l_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvftintrph_l_s(v8f32 _1) { return __lasx_xvftintrph_l_s(_1); }
// CHECK-LABEL: @xvftintrpl_l_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvftintrpl_l_s(v8f32 _1) { return __lasx_xvftintrpl_l_s(_1); }
// CHECK-LABEL: @xvftintrmh_l_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvftintrmh_l_s(v8f32 _1) { return __lasx_xvftintrmh_l_s(_1); }
// CHECK-LABEL: @xvftintrml_l_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvftintrml_l_s(v8f32 _1) { return __lasx_xvftintrml_l_s(_1); }
// CHECK-LABEL: @xvftintrneh_l_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvftintrneh_l_s(v8f32 _1) { return __lasx_xvftintrneh_l_s(_1); }
// CHECK-LABEL: @xvftintrnel_l_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvftintrnel_l_s(v8f32 _1) { return __lasx_xvftintrnel_l_s(_1); }
// CHECK-LABEL: @xvfrintrne_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1]])
// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvfrintrne_s(v8f32 _1) { return __lasx_xvfrintrne_s(_1); }
// CHECK-LABEL: @xvfrintrne_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1]])
// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvfrintrne_d(v4f64 _1) { return __lasx_xvfrintrne_d(_1); }
// CHECK-LABEL: @xvfrintrz_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1]])
// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvfrintrz_s(v8f32 _1) { return __lasx_xvfrintrz_s(_1); }
// CHECK-LABEL: @xvfrintrz_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1]])
// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvfrintrz_d(v4f64 _1) { return __lasx_xvfrintrz_d(_1); }
// CHECK-LABEL: @xvfrintrp_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1]])
// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvfrintrp_s(v8f32 _1) { return __lasx_xvfrintrp_s(_1); }
// CHECK-LABEL: @xvfrintrp_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1]])
// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvfrintrp_d(v4f64 _1) { return __lasx_xvfrintrp_d(_1); }
// CHECK-LABEL: @xvfrintrm_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1]])
// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvfrintrm_s(v8f32 _1) { return __lasx_xvfrintrm_s(_1); }
// CHECK-LABEL: @xvfrintrm_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1]])
// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvfrintrm_d(v4f64 _1) { return __lasx_xvfrintrm_d(_1); }
// CHECK-LABEL: @xvld(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1:%.*]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvld(void * _1) { return __lasx_xvld(_1, 1); }
// CHECK-LABEL: @xvst(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1)
// CHECK-NEXT:    ret void
//
void xvst(v32i8 _1, void * _2) { return __lasx_xvst(_1, _2, 1); }
// CHECK-LABEL: @xvstelm_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1, i32 1)
// CHECK-NEXT:    ret void
//
void xvstelm_b(v32i8 _1, void * _2) { return __lasx_xvstelm_b(_1, _2, 1, 1); }
// CHECK-LABEL: @xvstelm_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1]], ptr [[_2:%.*]], i32 2, i32 1)
// CHECK-NEXT:    ret void
//
void xvstelm_h(v16i16 _1, void * _2) { return __lasx_xvstelm_h(_1, _2, 2, 1); }
// CHECK-LABEL: @xvstelm_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1]], ptr [[_2:%.*]], i32 4, i32 1)
// CHECK-NEXT:    ret void
//
void xvstelm_w(v8i32 _1, void * _2) { return __lasx_xvstelm_w(_1, _2, 4, 1); }
// CHECK-LABEL: @xvstelm_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1]], ptr [[_2:%.*]], i32 8, i32 1)
// CHECK-NEXT:    ret void
//
void xvstelm_d(v4i64 _1, void * _2) { return __lasx_xvstelm_d(_1, _2, 8, 1); }
// CHECK-LABEL: @xvinsve0_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __lasx_xvinsve0_w(_1, _2, 1); }
// CHECK-LABEL: @xvinsve0_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __lasx_xvinsve0_d(_1, _2, 1); }
// CHECK-LABEL: @xvpickve_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvpickve_w(v8i32 _1) { return __lasx_xvpickve_w(_1, 1); }
// CHECK-LABEL: @xvpickve_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvpickve_d(v4i64 _1) { return __lasx_xvpickve_d(_1, 1); }
// CHECK-LABEL: @xvssrlrn_b_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrn_b_h(_1, _2); }
// CHECK-LABEL: @xvssrlrn_h_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrn_h_w(_1, _2); }
// CHECK-LABEL: @xvssrlrn_w_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrn_w_d(_1, _2); }
// CHECK-LABEL: @xvssrln_b_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrln_b_h(_1, _2); }
// CHECK-LABEL: @xvssrln_h_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrln_h_w(_1, _2); }
// CHECK-LABEL: @xvssrln_w_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrln_w_d(_1, _2); }
// CHECK-LABEL: @xvorn_v(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __lasx_xvorn_v(_1, _2); }
// CHECK-LABEL: @xvldi(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvldi() { return __lasx_xvldi(1); }
// CHECK-LABEL: @xvldx(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1:%.*]], i64 1), !noalias [[META5:![0-9]+]]
// CHECK-NEXT:    store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvldx(void * _1) { return __lasx_xvldx(_1, 1); }
// CHECK-LABEL: @xvstx(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_112]], ptr [[_2:%.*]], i64 1)
// CHECK-NEXT:    ret void
//
void xvstx(v32i8 _1, void * _2) { return __lasx_xvstx(_1, _2, 1); }
// CHECK-LABEL: @xvextl_qu_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvextl_qu_du(v4u64 _1) { return __lasx_xvextl_qu_du(_1); }
// CHECK-LABEL: @xvinsgr2vr_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1]], i32 1, i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvinsgr2vr_w(v8i32 _1) { return __lasx_xvinsgr2vr_w(_1, 1, 1); }
// CHECK-LABEL: @xvinsgr2vr_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1]], i64 1, i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvinsgr2vr_d(v4i64 _1) { return __lasx_xvinsgr2vr_d(_1, 1, 1); }
// CHECK-LABEL: @xvreplve0_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_112]])
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvreplve0_b(v32i8 _1) { return __lasx_xvreplve0_b(_1); }
// CHECK-LABEL: @xvreplve0_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_112]])
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvreplve0_h(v16i16 _1) { return __lasx_xvreplve0_h(_1); }
// CHECK-LABEL: @xvreplve0_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_112]])
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvreplve0_w(v8i32 _1) { return __lasx_xvreplve0_w(_1); }
// CHECK-LABEL: @xvreplve0_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvreplve0_d(v4i64 _1) { return __lasx_xvreplve0_d(_1); }
// CHECK-LABEL: @xvreplve0_q(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_112]])
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvreplve0_q(v32i8 _1) { return __lasx_xvreplve0_q(_1); }
// CHECK-LABEL: @vext2xv_h_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_112]])
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 vext2xv_h_b(v32i8 _1) { return __lasx_vext2xv_h_b(_1); }
// CHECK-LABEL: @vext2xv_w_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_112]])
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 vext2xv_w_h(v16i16 _1) { return __lasx_vext2xv_w_h(_1); }
// CHECK-LABEL: @vext2xv_d_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_112]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 vext2xv_d_w(v8i32 _1) { return __lasx_vext2xv_d_w(_1); }
// CHECK-LABEL: @vext2xv_w_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_112]])
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 vext2xv_w_b(v32i8 _1) { return __lasx_vext2xv_w_b(_1); }
// CHECK-LABEL: @vext2xv_d_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_112]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 vext2xv_d_h(v16i16 _1) { return __lasx_vext2xv_d_h(_1); }
// CHECK-LABEL: @vext2xv_d_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_112]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 vext2xv_d_b(v32i8 _1) { return __lasx_vext2xv_d_b(_1); }
// CHECK-LABEL: @vext2xv_hu_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_112]])
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 vext2xv_hu_bu(v32i8 _1) { return __lasx_vext2xv_hu_bu(_1); }
// CHECK-LABEL: @vext2xv_wu_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_112]])
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 vext2xv_wu_hu(v16i16 _1) { return __lasx_vext2xv_wu_hu(_1); }
// CHECK-LABEL: @vext2xv_du_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_112]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 vext2xv_du_wu(v8i32 _1) { return __lasx_vext2xv_du_wu(_1); }
// CHECK-LABEL: @vext2xv_wu_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_112]])
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 vext2xv_wu_bu(v32i8 _1) { return __lasx_vext2xv_wu_bu(_1); }
// CHECK-LABEL: @vext2xv_du_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_112]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 vext2xv_du_hu(v16i16 _1) { return __lasx_vext2xv_du_hu(_1); }
// CHECK-LABEL: @vext2xv_du_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_112]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 vext2xv_du_bu(v32i8 _1) { return __lasx_vext2xv_du_bu(_1); }
// CHECK-LABEL: @xvpermi_q(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __lasx_xvpermi_q(_1, _2, 1); }
// CHECK-LABEL: @xvpermi_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvpermi_d(v4i64 _1) { return __lasx_xvpermi_d(_1, 1); }
// CHECK-LABEL: @xvperm_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __lasx_xvperm_w(_1, _2); }
// CHECK-LABEL: @xvldrepl_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1:%.*]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvldrepl_b(void * _1) { return __lasx_xvldrepl_b(_1, 1); }
// CHECK-LABEL: @xvldrepl_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1:%.*]], i32 2)
// CHECK-NEXT:    store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvldrepl_h(void * _1) { return __lasx_xvldrepl_h(_1, 2); }
// CHECK-LABEL: @xvldrepl_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1:%.*]], i32 4)
// CHECK-NEXT:    store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvldrepl_w(void * _1) { return __lasx_xvldrepl_w(_1, 4); }
// CHECK-LABEL: @xvldrepl_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1:%.*]], i32 8)
// CHECK-NEXT:    store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvldrepl_d(void * _1) { return __lasx_xvldrepl_d(_1, 8); }
// CHECK-LABEL: @xvpickve2gr_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1]], i32 1)
// CHECK-NEXT:    ret i32 [[TMP1]]
//
int xvpickve2gr_w(v8i32 _1) { return __lasx_xvpickve2gr_w(_1, 1); }
// CHECK-LABEL: @xvpickve2gr_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1]], i32 1)
// CHECK-NEXT:    ret i32 [[TMP1]]
//
unsigned int xvpickve2gr_wu(v8i32 _1) { return __lasx_xvpickve2gr_wu(_1, 1); }
// CHECK-LABEL: @xvpickve2gr_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1]], i32 1)
// CHECK-NEXT:    ret i64 [[TMP1]]
//
long xvpickve2gr_d(v4i64 _1) { return __lasx_xvpickve2gr_d(_1, 1); }
// CHECK-LABEL: @xvpickve2gr_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1]], i32 1)
// CHECK-NEXT:    ret i64 [[TMP1]]
//
unsigned long int xvpickve2gr_du(v4i64 _1) { return __lasx_xvpickve2gr_du(_1, 1); }
// CHECK-LABEL: @xvaddwev_q_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwev_q_d(_1, _2); }
// CHECK-LABEL: @xvaddwev_d_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwev_d_w(_1, _2); }
// CHECK-LABEL: @xvaddwev_w_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwev_w_h(_1, _2); }
// CHECK-LABEL: @xvaddwev_h_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwev_h_b(_1, _2); }
// CHECK-LABEL: @xvaddwev_q_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwev_q_du(_1, _2); }
// CHECK-LABEL: @xvaddwev_d_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwev_d_wu(_1, _2); }
// CHECK-LABEL: @xvaddwev_w_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwev_w_hu(_1, _2); }
// CHECK-LABEL: @xvaddwev_h_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwev_h_bu(_1, _2); }
// CHECK-LABEL: @xvsubwev_q_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwev_q_d(_1, _2); }
// CHECK-LABEL: @xvsubwev_d_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwev_d_w(_1, _2); }
// CHECK-LABEL: @xvsubwev_w_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwev_w_h(_1, _2); }
// CHECK-LABEL: @xvsubwev_h_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwev_h_b(_1, _2); }
// CHECK-LABEL: @xvsubwev_q_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwev_q_du(_1, _2); }
// CHECK-LABEL: @xvsubwev_d_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwev_d_wu(_1, _2); }
// CHECK-LABEL: @xvsubwev_w_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwev_w_hu(_1, _2); }
// CHECK-LABEL: @xvsubwev_h_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwev_h_bu(_1, _2); }
// CHECK-LABEL: @xvmulwev_q_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwev_q_d(_1, _2); }
// CHECK-LABEL: @xvmulwev_d_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwev_d_w(_1, _2); }
// CHECK-LABEL: @xvmulwev_w_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwev_w_h(_1, _2); }
// CHECK-LABEL: @xvmulwev_h_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwev_h_b(_1, _2); }
// CHECK-LABEL: @xvmulwev_q_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwev_q_du(_1, _2); }
// CHECK-LABEL: @xvmulwev_d_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwev_d_wu(_1, _2); }
// CHECK-LABEL: @xvmulwev_w_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwev_w_hu(_1, _2); }
// CHECK-LABEL: @xvmulwev_h_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwev_h_bu(_1, _2); }
// CHECK-LABEL: @xvaddwod_q_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwod_q_d(_1, _2); }
// CHECK-LABEL: @xvaddwod_d_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwod_d_w(_1, _2); }
// CHECK-LABEL: @xvaddwod_w_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwod_w_h(_1, _2); }
// CHECK-LABEL: @xvaddwod_h_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwod_h_b(_1, _2); }
// CHECK-LABEL: @xvaddwod_q_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwod_q_du(_1, _2); }
// CHECK-LABEL: @xvaddwod_d_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwod_d_wu(_1, _2); }
// CHECK-LABEL: @xvaddwod_w_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwod_w_hu(_1, _2); }
// CHECK-LABEL: @xvaddwod_h_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwod_h_bu(_1, _2); }
// CHECK-LABEL: @xvsubwod_q_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwod_q_d(_1, _2); }
// CHECK-LABEL: @xvsubwod_d_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwod_d_w(_1, _2); }
// CHECK-LABEL: @xvsubwod_w_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwod_w_h(_1, _2); }
// CHECK-LABEL: @xvsubwod_h_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwod_h_b(_1, _2); }
// CHECK-LABEL: @xvsubwod_q_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwod_q_du(_1, _2); }
// CHECK-LABEL: @xvsubwod_d_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwod_d_wu(_1, _2); }
// CHECK-LABEL: @xvsubwod_w_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwod_w_hu(_1, _2); }
// CHECK-LABEL: @xvsubwod_h_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwod_h_bu(_1, _2); }
// CHECK-LABEL: @xvmulwod_q_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwod_q_d(_1, _2); }
// CHECK-LABEL: @xvmulwod_d_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwod_d_w(_1, _2); }
// CHECK-LABEL: @xvmulwod_w_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwod_w_h(_1, _2); }
// CHECK-LABEL: @xvmulwod_h_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwod_h_b(_1, _2); }
// CHECK-LABEL: @xvmulwod_q_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwod_q_du(_1, _2); }
// CHECK-LABEL: @xvmulwod_d_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwod_d_wu(_1, _2); }
// CHECK-LABEL: @xvmulwod_w_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwod_w_hu(_1, _2); }
// CHECK-LABEL: @xvmulwod_h_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwod_h_bu(_1, _2); }
// CHECK-LABEL: @xvaddwev_d_wu_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwev_d_wu_w(_1, _2); }
// CHECK-LABEL: @xvaddwev_w_hu_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwev_w_hu_h(_1, _2); }
// CHECK-LABEL: @xvaddwev_h_bu_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwev_h_bu_b(_1, _2); }
// CHECK-LABEL: @xvmulwev_d_wu_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwev_d_wu_w(_1, _2); }
// CHECK-LABEL: @xvmulwev_w_hu_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwev_w_hu_h(_1, _2); }
// CHECK-LABEL: @xvmulwev_h_bu_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwev_h_bu_b(_1, _2); }
// CHECK-LABEL: @xvaddwod_d_wu_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwod_d_wu_w(_1, _2); }
// CHECK-LABEL: @xvaddwod_w_hu_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwod_w_hu_h(_1, _2); }
// CHECK-LABEL: @xvaddwod_h_bu_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwod_h_bu_b(_1, _2); }
// CHECK-LABEL: @xvmulwod_d_wu_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwod_d_wu_w(_1, _2); }
// CHECK-LABEL: @xvmulwod_w_hu_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwod_w_hu_h(_1, _2); }
// CHECK-LABEL: @xvmulwod_h_bu_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwod_h_bu_b(_1, _2); }
// CHECK-LABEL: @xvhaddw_q_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhaddw_q_d(_1, _2); }
// CHECK-LABEL: @xvhaddw_qu_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhaddw_qu_du(_1, _2); }
// CHECK-LABEL: @xvhsubw_q_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhsubw_q_d(_1, _2); }
// CHECK-LABEL: @xvhsubw_qu_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhsubw_qu_du(_1, _2); }
// CHECK-LABEL: @xvmaddwev_q_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_d(_1, _2, _3); }
// CHECK-LABEL: @xvmaddwev_d_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]])
// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_w(_1, _2, _3); }
// CHECK-LABEL: @xvmaddwev_w_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_h(_1, _2, _3); }
// CHECK-LABEL: @xvmaddwev_h_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_b(_1, _2, _3); }
// CHECK-LABEL: @xvmaddwev_q_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwev_q_du(_1, _2, _3); }
// CHECK-LABEL: @xvmaddwev_d_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]])
// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwev_d_wu(_1, _2, _3); }
// CHECK-LABEL: @xvmaddwev_w_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwev_w_hu(_1, _2, _3); }
// CHECK-LABEL: @xvmaddwev_h_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwev_h_bu(_1, _2, _3); }
// CHECK-LABEL: @xvmaddwod_q_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_d(_1, _2, _3); }
// CHECK-LABEL: @xvmaddwod_d_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]])
// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_w(_1, _2, _3); }
// CHECK-LABEL: @xvmaddwod_w_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_h(_1, _2, _3); }
// CHECK-LABEL: @xvmaddwod_h_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_b(_1, _2, _3); }
// CHECK-LABEL: @xvmaddwod_q_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwod_q_du(_1, _2, _3); }
// CHECK-LABEL: @xvmaddwod_d_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]])
// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwod_d_wu(_1, _2, _3); }
// CHECK-LABEL: @xvmaddwod_w_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwod_w_hu(_1, _2, _3); }
// CHECK-LABEL: @xvmaddwod_h_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwod_h_bu(_1, _2, _3); }
// CHECK-LABEL: @xvmaddwev_q_du_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_du_d(_1, _2, _3); }
// CHECK-LABEL: @xvmaddwev_d_wu_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]])
// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_wu_w(_1, _2, _3); }
// CHECK-LABEL: @xvmaddwev_w_hu_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_hu_h(_1, _2, _3); }
// CHECK-LABEL: @xvmaddwev_h_bu_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_bu_b(_1, _2, _3); }
// CHECK-LABEL: @xvmaddwod_q_du_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_du_d(_1, _2, _3); }
// CHECK-LABEL: @xvmaddwod_d_wu_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]])
// CHECK-NEXT:    store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_wu_w(_1, _2, _3); }
// CHECK-LABEL: @xvmaddwod_w_hu_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
// CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_hu_h(_1, _2, _3); }
// CHECK-LABEL: @xvmaddwod_h_bu_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
// CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_bu_b(_1, _2, _3); }
// CHECK-LABEL: @xvrotr_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __lasx_xvrotr_b(_1, _2); }
// CHECK-LABEL: @xvrotr_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __lasx_xvrotr_h(_1, _2); }
// CHECK-LABEL: @xvrotr_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __lasx_xvrotr_w(_1, _2); }
// CHECK-LABEL: @xvrotr_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __lasx_xvrotr_d(_1, _2); }
// CHECK-LABEL: @xvadd_q(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __lasx_xvadd_q(_1, _2); }
// CHECK-LABEL: @xvsub_q(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __lasx_xvsub_q(_1, _2); }
// CHECK-LABEL: @xvaddwev_q_du_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwev_q_du_d(_1, _2); }
// CHECK-LABEL: @xvaddwod_q_du_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwod_q_du_d(_1, _2); }
// CHECK-LABEL: @xvmulwev_q_du_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwev_q_du_d(_1, _2); }
// CHECK-LABEL: @xvmulwod_q_du_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwod_q_du_d(_1, _2); }
// CHECK-LABEL: @xvmskgez_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_112]])
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvmskgez_b(v32i8 _1) { return __lasx_xvmskgez_b(_1); }
// CHECK-LABEL: @xvmsknz_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_112]])
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvmsknz_b(v32i8 _1) { return __lasx_xvmsknz_b(_1); }
// CHECK-LABEL: @xvexth_h_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_112]])
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvexth_h_b(v32i8 _1) { return __lasx_xvexth_h_b(_1); }
// CHECK-LABEL: @xvexth_w_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_112]])
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvexth_w_h(v16i16 _1) { return __lasx_xvexth_w_h(_1); }
// CHECK-LABEL: @xvexth_d_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_112]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvexth_d_w(v8i32 _1) { return __lasx_xvexth_d_w(_1); }
// CHECK-LABEL: @xvexth_q_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvexth_q_d(v4i64 _1) { return __lasx_xvexth_q_d(_1); }
// CHECK-LABEL: @xvexth_hu_bu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_112]])
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvexth_hu_bu(v32u8 _1) { return __lasx_xvexth_hu_bu(_1); }
// CHECK-LABEL: @xvexth_wu_hu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_112]])
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvexth_wu_hu(v16u16 _1) { return __lasx_xvexth_wu_hu(_1); }
// CHECK-LABEL: @xvexth_du_wu(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_112]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvexth_du_wu(v8u32 _1) { return __lasx_xvexth_du_wu(_1); }
// CHECK-LABEL: @xvexth_qu_du(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvexth_qu_du(v4u64 _1) { return __lasx_xvexth_qu_du(_1); }
// CHECK-LABEL: @xvrotri_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvrotri_b(v32i8 _1) { return __lasx_xvrotri_b(_1, 1); }
// CHECK-LABEL: @xvrotri_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvrotri_h(v16i16 _1) { return __lasx_xvrotri_h(_1, 1); }
// CHECK-LABEL: @xvrotri_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvrotri_w(v8i32 _1) { return __lasx_xvrotri_w(_1, 1); }
// CHECK-LABEL: @xvrotri_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvrotri_d(v4i64 _1) { return __lasx_xvrotri_d(_1, 1); }
// CHECK-LABEL: @xvextl_q_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1]])
// CHECK-NEXT:    store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvextl_q_d(v4i64 _1) { return __lasx_xvextl_q_d(_1); }
// CHECK-LABEL: @xvsrlni_b_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlni_b_h(_1, _2, 1); }
// CHECK-LABEL: @xvsrlni_h_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlni_h_w(_1, _2, 1); }
// CHECK-LABEL: @xvsrlni_w_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlni_w_d(_1, _2, 1); }
// CHECK-LABEL: @xvsrlni_d_q(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlni_d_q(_1, _2, 1); }
// CHECK-LABEL: @xvsrlrni_b_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlrni_b_h(_1, _2, 1); }
// CHECK-LABEL: @xvsrlrni_h_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrni_h_w(_1, _2, 1); }
// CHECK-LABEL: @xvsrlrni_w_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrni_w_d(_1, _2, 1); }
// CHECK-LABEL: @xvsrlrni_d_q(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrni_d_q(_1, _2, 1); }
// CHECK-LABEL: @xvssrlni_b_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlni_b_h(_1, _2, 1); }
// CHECK-LABEL: @xvssrlni_h_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlni_h_w(_1, _2, 1); }
// CHECK-LABEL: @xvssrlni_w_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlni_w_d(_1, _2, 1); }
// CHECK-LABEL: @xvssrlni_d_q(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlni_d_q(_1, _2, 1); }
// CHECK-LABEL: @xvssrlni_bu_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlni_bu_h(_1, _2, 1); }
// CHECK-LABEL: @xvssrlni_hu_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlni_hu_w(_1, _2, 1); }
// CHECK-LABEL: @xvssrlni_wu_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlni_wu_d(_1, _2, 1); }
// CHECK-LABEL: @xvssrlni_du_q(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlni_du_q(_1, _2, 1); }
// CHECK-LABEL: @xvssrlrni_b_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlrni_b_h(_1, _2, 1); }
// CHECK-LABEL: @xvssrlrni_h_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrni_h_w(_1, _2, 1); }
// CHECK-LABEL: @xvssrlrni_w_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrni_w_d(_1, _2, 1); }
// CHECK-LABEL: @xvssrlrni_d_q(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrni_d_q(_1, _2, 1); }
// CHECK-LABEL: @xvssrlrni_bu_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlrni_bu_h(_1, _2, 1); }
// CHECK-LABEL: @xvssrlrni_hu_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlrni_hu_w(_1, _2, 1); }
// CHECK-LABEL: @xvssrlrni_wu_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlrni_wu_d(_1, _2, 1); }
// CHECK-LABEL: @xvssrlrni_du_q(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlrni_du_q(_1, _2, 1); }
// CHECK-LABEL: @xvsrani_b_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrani_b_h(_1, _2, 1); }
// CHECK-LABEL: @xvsrani_h_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrani_h_w(_1, _2, 1); }
// CHECK-LABEL: @xvsrani_w_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrani_w_d(_1, _2, 1); }
// CHECK-LABEL: @xvsrani_d_q(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrani_d_q(_1, _2, 1); }
// CHECK-LABEL: @xvsrarni_b_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrarni_b_h(_1, _2, 1); }
// CHECK-LABEL: @xvsrarni_h_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrarni_h_w(_1, _2, 1); }
// CHECK-LABEL: @xvsrarni_w_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrarni_w_d(_1, _2, 1); }
// CHECK-LABEL: @xvsrarni_d_q(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrarni_d_q(_1, _2, 1); }
// CHECK-LABEL: @xvssrani_b_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrani_b_h(_1, _2, 1); }
// CHECK-LABEL: @xvssrani_h_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrani_h_w(_1, _2, 1); }
// CHECK-LABEL: @xvssrani_w_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrani_w_d(_1, _2, 1); }
// CHECK-LABEL: @xvssrani_d_q(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrani_d_q(_1, _2, 1); }
// CHECK-LABEL: @xvssrani_bu_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrani_bu_h(_1, _2, 1); }
// CHECK-LABEL: @xvssrani_hu_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrani_hu_w(_1, _2, 1); }
// CHECK-LABEL: @xvssrani_wu_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrani_wu_d(_1, _2, 1); }
// CHECK-LABEL: @xvssrani_du_q(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrani_du_q(_1, _2, 1); }
// CHECK-LABEL: @xvssrarni_b_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrarni_b_h(_1, _2, 1); }
// CHECK-LABEL: @xvssrarni_h_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrarni_h_w(_1, _2, 1); }
// CHECK-LABEL: @xvssrarni_w_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrarni_w_d(_1, _2, 1); }
// CHECK-LABEL: @xvssrarni_d_q(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrarni_d_q(_1, _2, 1); }
// CHECK-LABEL: @xvssrarni_bu_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrarni_bu_h(_1, _2, 1); }
// CHECK-LABEL: @xvssrarni_hu_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrarni_hu_w(_1, _2, 1); }
// CHECK-LABEL: @xvssrarni_wu_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrarni_wu_d(_1, _2, 1); }
// CHECK-LABEL: @xvssrarni_du_q(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrarni_du_q(_1, _2, 1); }
// CHECK-LABEL: @xbnz_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1]])
// CHECK-NEXT:    ret i32 [[TMP1]]
//
int xbnz_b(v32u8 _1) { return __lasx_xbnz_b(_1); }
// CHECK-LABEL: @xbnz_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1]])
// CHECK-NEXT:    ret i32 [[TMP1]]
//
int xbnz_d(v4u64 _1) { return __lasx_xbnz_d(_1); }
// CHECK-LABEL: @xbnz_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1]])
// CHECK-NEXT:    ret i32 [[TMP1]]
//
int xbnz_h(v16u16 _1) { return __lasx_xbnz_h(_1); }
// CHECK-LABEL: @xbnz_v(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1]])
// CHECK-NEXT:    ret i32 [[TMP1]]
//
int xbnz_v(v32u8 _1) { return __lasx_xbnz_v(_1); }
// CHECK-LABEL: @xbnz_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1]])
// CHECK-NEXT:    ret i32 [[TMP1]]
//
int xbnz_w(v8u32 _1) { return __lasx_xbnz_w(_1); }
// CHECK-LABEL: @xbz_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1]])
// CHECK-NEXT:    ret i32 [[TMP1]]
//
int xbz_b(v32u8 _1) { return __lasx_xbz_b(_1); }
// CHECK-LABEL: @xbz_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1]])
// CHECK-NEXT:    ret i32 [[TMP1]]
//
int xbz_d(v4u64 _1) { return __lasx_xbz_d(_1); }
// CHECK-LABEL: @xbz_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1]])
// CHECK-NEXT:    ret i32 [[TMP1]]
//
int xbz_h(v16u16 _1) { return __lasx_xbz_h(_1); }
// CHECK-LABEL: @xbz_v(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1]])
// CHECK-NEXT:    ret i32 [[TMP1]]
//
int xbz_v(v32u8 _1) { return __lasx_xbz_v(_1); }
// CHECK-LABEL: @xbz_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1]])
// CHECK-NEXT:    ret i32 [[TMP1]]
//
int xbz_w(v8u32 _1) { return __lasx_xbz_w(_1); }
// CHECK-LABEL: @xvfcmp_caf_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_caf_d(_1, _2); }
// CHECK-LABEL: @xvfcmp_caf_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_caf_s(_1, _2); }
// CHECK-LABEL: @xvfcmp_ceq_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_ceq_d(_1, _2); }
// CHECK-LABEL: @xvfcmp_ceq_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_ceq_s(_1, _2); }
// CHECK-LABEL: @xvfcmp_cle_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cle_d(_1, _2); }
// CHECK-LABEL: @xvfcmp_cle_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cle_s(_1, _2); }
// CHECK-LABEL: @xvfcmp_clt_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_clt_d(_1, _2); }
// CHECK-LABEL: @xvfcmp_clt_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_clt_s(_1, _2); }
// CHECK-LABEL: @xvfcmp_cne_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cne_d(_1, _2); }
// CHECK-LABEL: @xvfcmp_cne_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cne_s(_1, _2); }
// CHECK-LABEL: @xvfcmp_cor_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cor_d(_1, _2); }
// CHECK-LABEL: @xvfcmp_cor_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cor_s(_1, _2); }
// CHECK-LABEL: @xvfcmp_cueq_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cueq_d(_1, _2); }
// CHECK-LABEL: @xvfcmp_cueq_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cueq_s(_1, _2); }
// CHECK-LABEL: @xvfcmp_cule_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cule_d(_1, _2); }
// CHECK-LABEL: @xvfcmp_cule_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cule_s(_1, _2); }
// CHECK-LABEL: @xvfcmp_cult_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cult_d(_1, _2); }
// CHECK-LABEL: @xvfcmp_cult_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cult_s(_1, _2); }
// CHECK-LABEL: @xvfcmp_cun_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cun_d(_1, _2); }
// CHECK-LABEL: @xvfcmp_cune_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cune_d(_1, _2); }
// CHECK-LABEL: @xvfcmp_cune_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cune_s(_1, _2); }
// CHECK-LABEL: @xvfcmp_cun_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cun_s(_1, _2); }
// CHECK-LABEL: @xvfcmp_saf_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_saf_d(_1, _2); }
// CHECK-LABEL: @xvfcmp_saf_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_saf_s(_1, _2); }
// CHECK-LABEL: @xvfcmp_seq_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_seq_d(_1, _2); }
// CHECK-LABEL: @xvfcmp_seq_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_seq_s(_1, _2); }
// CHECK-LABEL: @xvfcmp_sle_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sle_d(_1, _2); }
// CHECK-LABEL: @xvfcmp_sle_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sle_s(_1, _2); }
// CHECK-LABEL: @xvfcmp_slt_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_slt_d(_1, _2); }
// CHECK-LABEL: @xvfcmp_slt_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_slt_s(_1, _2); }
// CHECK-LABEL: @xvfcmp_sne_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sne_d(_1, _2); }
// CHECK-LABEL: @xvfcmp_sne_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sne_s(_1, _2); }
// CHECK-LABEL: @xvfcmp_sor_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sor_d(_1, _2); }
// CHECK-LABEL: @xvfcmp_sor_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sor_s(_1, _2); }
// CHECK-LABEL: @xvfcmp_sueq_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sueq_d(_1, _2); }
// CHECK-LABEL: @xvfcmp_sueq_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sueq_s(_1, _2); }
// CHECK-LABEL: @xvfcmp_sule_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sule_d(_1, _2); }
// CHECK-LABEL: @xvfcmp_sule_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sule_s(_1, _2); }
// CHECK-LABEL: @xvfcmp_sult_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sult_d(_1, _2); }
// CHECK-LABEL: @xvfcmp_sult_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sult_s(_1, _2); }
// CHECK-LABEL: @xvfcmp_sun_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sun_d(_1, _2); }
// CHECK-LABEL: @xvfcmp_sune_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1]], <4 x double> [[_2]])
// CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sune_d(_1, _2); }
// CHECK-LABEL: @xvfcmp_sune_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sune_s(_1, _2); }
// CHECK-LABEL: @xvfcmp_sun_s(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1]], <8 x float> [[_2]])
// CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sun_s(_1, _2); }
// CHECK-LABEL: @xvpickve_d_f(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1]], i32 1)
// CHECK-NEXT:    store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4f64 xvpickve_d_f(v4f64 _1) { return __lasx_xvpickve_d_f(_1, 1); }
// CHECK-LABEL: @xvpickve_w_f(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1]], i32 1)
// CHECK-NEXT:    store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8f32 xvpickve_w_f(v8f32 _1) { return __lasx_xvpickve_w_f(_1, 1); }
// CHECK-LABEL: @xvrepli_b(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1)
// CHECK-NEXT:    store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v32i8 xvrepli_b() { return __lasx_xvrepli_b(1); }
// CHECK-LABEL: @xvrepli_d(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1)
// CHECK-NEXT:    store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v4i64 xvrepli_d() { return __lasx_xvrepli_d(1); }
// CHECK-LABEL: @xvrepli_h(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1)
// CHECK-NEXT:    store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v16i16 xvrepli_h() { return __lasx_xvrepli_h(1); }
// CHECK-LABEL: @xvrepli_w(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1)
// CHECK-NEXT:    store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
// CHECK-NEXT:    ret void
//
v8i32 xvrepli_w() { return __lasx_xvrepli_w(1); }