llvm/clang/test/CodeGen/PowerPC/ppc-xmmintrin.c

// REQUIRES: powerpc-registered-target

// RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
// RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE
// RUN: %clang -x c++ -fsyntax-only -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
// RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns
// RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
// RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-LE
// RUN: %clang -x c++ -fsyntax-only -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
// RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns

// RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr10 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
// RUN:   -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-P10-LE

// RUN: %clang -S -emit-llvm -target powerpc64-unknown-freebsd13.0 -mcpu=pwr8 -ffreestanding -nostdlibinc -DNO_WARN_X86_INTRINSICS %s \
// RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE
// RUN: %clang -x c++ -fsyntax-only -target powerpc64-unknown-freebsd13.0 -mcpu=pwr8 -ffreestanding -nostdlibinc -DNO_WARN_X86_INTRINSICS %s \
// RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns
// RUN: %clang -S -emit-llvm -target powerpc64le-unknown-freebsd13.0 -mcpu=pwr8 -ffreestanding -nostdlibinc -DNO_WARN_X86_INTRINSICS %s \
// RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-LE
// RUN: %clang -x c++ -fsyntax-only -target powerpc64le-unknown-freebsd13.0 -mcpu=pwr8 -ffreestanding -nostdlibinc -DNO_WARN_X86_INTRINSICS %s \
// RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns

// RUN: %clang -S -emit-llvm -target powerpc64-ibm-aix -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
// RUN:   -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE
// RUN: %clang -x c++ -fsyntax-only -target powerpc64-ibm-aix -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
// RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns
// RUN: %clang -S -emit-llvm -target powerpc64-ibm-aix -mcpu=pwr10 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
// RUN:   -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-P10-BE

#include <xmmintrin.h>

__m128 res, m1, m2;
__m64 res64, ms[2];
float fs[4];
int i, i2;
long long i64;

// CHECK-LE-DAG: @_mm_shuffle_pi16.__permute_selectors = internal constant [4 x i16] [i16 2312, i16 2826, i16 3340, i16 3854], align 2
// CHECK-BE-DAG: @_mm_shuffle_pi16.__permute_selectors = internal constant [4 x i16] [i16 1543, i16 1029, i16 515, i16 1], align 2

// CHECK-LE-DAG: @_mm_shuffle_ps.__permute_selectors = internal constant [4 x i32] [i32 50462976, i32 117835012, i32 185207048, i32 252579084], align 4
// CHECK-BE-DAG: @_mm_shuffle_ps.__permute_selectors = internal constant [4 x i32] [i32 66051, i32 67438087, i32 134810123, i32 202182159], align 4

void __attribute__((noinline))
test_add() {
  res = _mm_add_ps(m1, m2);
  res = _mm_add_ss(m1, m2);
}

// CHECK-LABEL: @test_add

// CHECK-LABEL: define available_externally <4 x float> @_mm_add_ps
// CHECK: fadd <4 x float>

// CHECK-LABEL: define available_externally <4 x float> @_mm_add_ss
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
// CHECK: fadd <4 x float>
// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)

void __attribute__((noinline))
test_avg() {
  res64 = _mm_avg_pu16(ms[0], ms[1]);
  res64 = _mm_avg_pu8(ms[0], ms[1]);
}

// CHECK-LABEL: @test_avg

// CHECK-LABEL: define available_externally i64 @_mm_avg_pu16
// CHECK: call <2 x i64> @vec_splats(unsigned long long)
// CHECK: call <2 x i64> @vec_splats(unsigned long long)
// CHECK: call <8 x i16> @vec_avg(unsigned short vector[8], unsigned short vector[8])
// CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0

// CHECK-LABEL: define available_externally i64 @_mm_avg_pu8
// CHECK: call <2 x i64> @vec_splats(unsigned long long)
// CHECK: call <2 x i64> @vec_splats(unsigned long long)
// CHECK: call <16 x i8> @vec_avg(unsigned char vector[16], unsigned char vector[16])
// CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0

void __attribute__((noinline))
test_alt_name_avg() {
  res64 = _m_pavgw(ms[0], ms[1]);
  res64 = _m_pavgb(ms[0], ms[1]);
}

// CHECK-LABEL: @test_alt_name_avg

// CHECK-LABEL: define available_externally i64 @_m_pavgw
// CHECK: call i64 @_mm_avg_pu16

// CHECK-LABEL: define available_externally i64 @_m_pavgb
// CHECK: call i64 @_mm_avg_pu8

void __attribute__((noinline))
test_cmp() {
  res = _mm_cmpeq_ps(m1, m2);
  res = _mm_cmpeq_ss(m1, m2);
  res = _mm_cmpge_ps(m1, m2);
  res = _mm_cmpge_ss(m1, m2);
  res = _mm_cmpgt_ps(m1, m2);
  res = _mm_cmpgt_ss(m1, m2);
  res = _mm_cmple_ps(m1, m2);
  res = _mm_cmple_ss(m1, m2);
  res = _mm_cmplt_ps(m1, m2);
  res = _mm_cmplt_ss(m1, m2);
  res = _mm_cmpneq_ps(m1, m2);
  res = _mm_cmpneq_ss(m1, m2);
  res = _mm_cmpnge_ps(m1, m2);
  res = _mm_cmpnge_ss(m1, m2);
  res = _mm_cmpngt_ps(m1, m2);
  res = _mm_cmpngt_ss(m1, m2);
  res = _mm_cmpnle_ps(m1, m2);
  res = _mm_cmpnle_ss(m1, m2);
  res = _mm_cmpnlt_ps(m1, m2);
  res = _mm_cmpnlt_ss(m1, m2);
  res = _mm_cmpord_ps(m1, m2);
  res = _mm_cmpord_ss(m1, m2);
  res = _mm_cmpunord_ps(m1, m2);
  res = _mm_cmpunord_ss(m1, m2);
}

// CHECK-LABEL: @test_cmp

// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpeq_ps
// CHECK: call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])

// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpeq_ss
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
// CHECK: call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])
// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)

// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpge_ps
// CHECK: call <4 x i32> @vec_cmpge(float vector[4], float vector[4])

// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpge_ss
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
// CHECK: call <4 x i32> @vec_cmpge(float vector[4], float vector[4])
// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)

// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpgt_ps
// CHECK: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])

// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpgt_ss
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
// CHECK: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])
// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)

// CHECK-LABEL: define available_externally <4 x float> @_mm_cmple_ps
// CHECK: call <4 x i32> @vec_cmple(float vector[4], float vector[4])

// CHECK-LABEL: define available_externally <4 x float> @_mm_cmple_ss
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
// CHECK: call <4 x i32> @vec_cmple(float vector[4], float vector[4])
// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)

// CHECK-LABEL: define available_externally <4 x float> @_mm_cmplt_ps
// CHECK: call <4 x i32> @vec_cmplt(float vector[4], float vector[4])

// CHECK: @_mm_cmplt_ss
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
// CHECK: call <4 x i32> @vec_cmplt(float vector[4], float vector[4])
// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)

// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpneq_ps
// CHECK: call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])
// CHECK: call <4 x float> @vec_nor(float vector[4], float vector[4])

// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpneq_ss
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
// CHECK: call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])
// CHECK: call <4 x float> @vec_nor(float vector[4], float vector[4])
// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)

// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpnge_ps
// CHECK: call <4 x i32> @vec_cmplt(float vector[4], float vector[4])

// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpnge_ss
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
// CHECK: call <4 x i32> @vec_cmplt(float vector[4], float vector[4])
// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)

// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpngt_ps
// CHECK: call <4 x i32> @vec_cmple(float vector[4], float vector[4])

// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpngt_ss
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
// CHECK: call <4 x i32> @vec_cmple(float vector[4], float vector[4])
// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)

// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpnle_ps
// CHECK: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])

// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpnle_ss
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
// CHECK: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])
// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)

// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpnlt_ps
// CHECK: call <4 x i32> @vec_cmpge(float vector[4], float vector[4])

// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpnlt_ss
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
// CHECK: call <4 x i32> @vec_cmpge(float vector[4], float vector[4])
// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)

// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpord_ps
// CHECK: call <4 x float> @vec_abs(float vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}})
// CHECK: call <4 x float> @vec_abs(float vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}})
// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}})
// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}})
// CHECK: call <4 x i32> @vec_and(unsigned int vector[4], unsigned int vector[4])

// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpord_ss
// CHECK: call <4 x float> @vec_abs(float vector[4])
// CHECK: call <4 x float> @vec_abs(float vector[4])
// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}})
// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}})
// CHECK: call <4 x i32> @vec_and(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}})
// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)

// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpunord_ps
// CHECK: call <4 x float> @vec_abs(float vector[4])
// CHECK: call <4 x float> @vec_abs(float vector[4])
// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
// CHECK: call <4 x i32> @vec_or(unsigned int vector[4], unsigned int vector[4])

// CHECK-LABEL: define available_externally <4 x float> @_mm_cmpunord_ss
// CHECK: call <4 x float> @vec_abs(float vector[4])
// CHECK: call <4 x float> @vec_abs(float vector[4])
// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
// CHECK: call <4 x i32> @vec_or(unsigned int vector[4], unsigned int vector[4])
// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)

void __attribute__((noinline))
test_comi() {
  i = _mm_comieq_ss(m1, m2);
  i = _mm_comige_ss(m1, m2);
  i = _mm_comigt_ss(m1, m2);
  i = _mm_comile_ss(m1, m2);
  i = _mm_comilt_ss(m1, m2);
  i = _mm_comineq_ss(m1, m2);
}

// CHECK-LABEL: @test_comi

// CHECK-LABEL: define available_externally signext i32 @_mm_comieq_ss
// CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp oeq float %[[VAL1]], %[[VAL2]]
// CHECK: zext i1 %[[CMP]] to i32

// CHECK-LABEL: define available_externally signext i32 @_mm_comige_ss
// CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp oge float %[[VAL1]], %[[VAL2]]
// CHECK: zext i1 %[[CMP]] to i32

// CHECK-LABEL: define available_externally signext i32 @_mm_comigt_ss
// CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp ogt float %[[VAL1]], %[[VAL2]]
// CHECK: zext i1 %[[CMP]] to i32

// CHECK-LABEL: define available_externally signext i32 @_mm_comile_ss
// CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp ole float %[[VAL1]], %[[VAL2]]
// CHECK: zext i1 %[[CMP]] to i32

// CHECK-LABEL: define available_externally signext i32 @_mm_comilt_ss
// CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp olt float %[[VAL1]], %[[VAL2]]
// CHECK: zext i1 %[[CMP]] to i32

// CHECK-LABEL: define available_externally signext i32 @_mm_comineq_ss
// CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp une float %[[VAL1]], %[[VAL2]]
// CHECK: zext i1 %[[CMP]] to i32

void __attribute__((noinline))
test_convert() {
  res = _mm_cvt_pi2ps(m1, ms[1]);
  res64 = _mm_cvt_ps2pi(m1);
  res = _mm_cvt_si2ss(m1, i);
  i = _mm_cvt_ss2si(m1);
  res = _mm_cvtpi16_ps(ms[0]);
  res = _mm_cvtpi32_ps(m1, ms[1]);
  res = _mm_cvtpi32x2_ps(ms[0], ms[1]);
  res = _mm_cvtpi8_ps(ms[0]);
  res64 = _mm_cvtps_pi16(m1);
  res64 = _mm_cvtps_pi32(m1);
  res64 = _mm_cvtps_pi8(m1);
  res = _mm_cvtpu16_ps(ms[0]);
  res = _mm_cvtpu8_ps(ms[0]);
  res = _mm_cvtsi32_ss(m1, i);
  res = _mm_cvtsi64_ss(m1, i64);
  fs[0] = _mm_cvtss_f32(m1);
  i = _mm_cvtss_si32(m1);
  i64 = _mm_cvtss_si64(m1);
  res64 = _mm_cvtt_ps2pi(m1);
  i = _mm_cvtt_ss2si(m1);
  res64 = _mm_cvttps_pi32(m1);
  i = _mm_cvttss_si32(m1);
  i64 = _mm_cvttss_si64(m1);
}

// CHECK-LABEL: @test_convert

// CHECK-LABEL: define available_externally <4 x float> @_mm_cvt_pi2ps
// CHECK: call <4 x float> @_mm_cvtpi32_ps

// CHECK-LABEL: define available_externally i64 @_mm_cvt_ps2pi
// CHECK: call i64 @_mm_cvtps_pi32

// CHECK-LABEL: define available_externally <4 x float> @_mm_cvt_si2ss
// CHECK: call <4 x float> @_mm_cvtsi32_ss

// CHECK-LABEL: define available_externally signext i32 @_mm_cvt_ss2si
// CHECK: call signext i32 @_mm_cvtss_si32

// CHECK-LABEL: define available_externally <4 x float> @_mm_cvtpi16_ps
// CHECK: call <4 x i32> @vec_vupklsh(short vector[8])
// CHECK: call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0)

// CHECK-LABEL: define available_externally <4 x float> @_mm_cvtpi32_ps
// CHECK: call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0)

// CHECK-LABEL: define available_externally <4 x float> @_mm_cvtpi32x2_ps
// CHECK: call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0)

// CHECK-LABEL: define available_externally <4 x float> @_mm_cvtpi8_ps
// CHECK: call <8 x i16> @vec_vupkhsb(signed char vector[16])
// CHECK: call <4 x i32> @vec_vupkhsh(short vector[8])
// CHECK: call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0)

// CHECK-LABEL: define available_externally i64 @_mm_cvtps_pi16
// CHECK: call <4 x float> @vec_rint(float vector[4])
// CHECK: call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0)
// CHECK: call <8 x i16> @vec_pack(int vector[4], int vector[4])
// CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0

// CHECK-LABEL: define available_externally i64 @_mm_cvtps_pi32
// CHECK: call <2 x i64> @vec_splat(long long vector[2], unsigned int)(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
// CHECK: call <4 x float> @vec_rint(float vector[4])
// CHECK: call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0)
// CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0

// CHECK-LABEL: define available_externally i64 @_mm_cvtps_pi8
// CHECK: call <4 x float> @vec_rint(float vector[4])
// CHECK: call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0)
// CHECK: call <8 x i16> @vec_pack(int vector[4], int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef zeroinitializer)
// CHECK: call <16 x i8> @vec_pack(short vector[8], short vector[8])
// CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0

// CHECK-LABEL: define available_externally <4 x float> @_mm_cvtpu16_ps
// CHECK-LE: call <8 x i16> @vec_mergel(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef zeroinitializer)
// CHECK-BE: call <8 x i16> @vec_mergel(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef zeroinitializer, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}})
// CHECK: call <4 x float> @llvm.ppc.altivec.vcfux(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0)

// CHECK-LABEL: define available_externally <4 x float> @_mm_cvtpu8_ps
// CHECK-BE: call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef zeroinitializer, <16 x i8> noundef %{{[0-9a-zA-Z_.]+}})
// CHECK-BE: call <8 x i16> @vec_mergeh(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef zeroinitializer, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}})
// CHECK-LE: call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef zeroinitializer)
// CHECK-LE: call <8 x i16> @vec_mergeh(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef zeroinitializer)
// CHECK: call <4 x float> @llvm.ppc.altivec.vcfux(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0)

// CHECK-LABEL: define available_externally <4 x float> @_mm_cvtsi32_ss
// CHECK: sitofp i32 %{{[0-9a-zA-Z_.]+}} to float
// CHECK: insertelement <4 x float> %{{[0-9a-zA-Z_.]+}}, float %{{[0-9a-zA-Z_.]+}}, i32 0

// CHECK-LABEL: define available_externally <4 x float> @_mm_cvtsi64_ss
// CHECK: sitofp i64 %{{[0-9a-zA-Z_.]+}} to float
// CHECK: insertelement <4 x float> %{{[0-9a-zA-Z_.]+}}, float %{{[0-9a-zA-Z_.]+}}, i32 0

// CHECK-LABEL: define available_externally float @_mm_cvtss_f32
// CHECK: extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0

// CHECK-LABEL: define available_externally signext i32 @_mm_cvtss_si32
// CHECK-LE: %[[VEC:[0-9a-zA-Z_.]+]] = call { <4 x float>, i32, double } asm "xxsldwi ${0:x},${0:x},${0:x},3;\0Axscvspdp ${2:x},${0:x};\0Afctiw  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"
// CHECK-BE: %[[VEC:[0-9a-zA-Z_.]+]] = call { <4 x float>, i32, double } asm "xscvspdp ${2:x},${0:x};\0Afctiw  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"
// CHECK-P10-LE: %[[VEC:[0-9a-zA-Z_.]+]] = call { <4 x float>, i32, double } asm "xxsldwi ${0:x},${0:x},${0:x},3;\0Axscvspdp ${2:x},${0:x};\0Afctiw  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"
// CHECK-P10-BE: %[[VEC:[0-9a-zA-Z_.]+]] = call { <4 x float>, i32, double } asm "xscvspdp ${2:x},${0:x};\0Afctiw  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"
// CHECK: extractvalue { <4 x float>, i32, double } %[[VEC]], 0
// CHECK: extractvalue { <4 x float>, i32, double } %[[VEC]], 1
// CHECK: extractvalue { <4 x float>, i32, double } %[[VEC]], 2

// CHECK-LABEL: define available_externally i64 @_mm_cvtss_si64
// CHECK-LE: %[[VEC:[0-9a-zA-Z_.]+]] = call { <4 x float>, i64, double } asm "xxsldwi ${0:x},${0:x},${0:x},3;\0Axscvspdp ${2:x},${0:x};\0Afctid  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"
// CHECK-BE: %[[VEC:[0-9a-zA-Z_.]+]] = call { <4 x float>, i64, double } asm "xscvspdp ${2:x},${0:x};\0Afctid  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"
// CHECK: extractvalue { <4 x float>, i64, double } %[[VEC]], 0
// CHECK: extractvalue { <4 x float>, i64, double } %[[VEC]], 1
// CHECK: extractvalue { <4 x float>, i64, double } %[[VEC]], 2

// CHECK-LABEL: define available_externally i64 @_mm_cvtt_ps2pi
// CHECK: call i64 @_mm_cvttps_pi32(<4 x float> noundef %{{[0-9a-zA-Z_.]+}})

// CHECK-LABEL: define available_externally signext i32 @_mm_cvtt_ss2si
// CHECK: call signext i32 @_mm_cvttss_si32(<4 x float> noundef %{{[0-9a-zA-Z_.]+}})

// CHECK-LABEL: define available_externally i64 @_mm_cvttps_pi32
// CHECK: call <2 x i64> @vec_splat(long long vector[2], unsigned int)(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
// CHECK: call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0)
// CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0

// CHECK-LABEL: define available_externally signext i32 @_mm_cvttss_si32
// CHECK: extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: fptosi float %{{[0-9a-zA-Z_.]+}} to i32

// CHECK-LABEL: define available_externally i64 @_mm_cvttss_si64
// CHECK: extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: fptosi float %{{[0-9a-zA-Z_.]+}} to i64

void __attribute__((noinline))
test_div() {
  res = _mm_div_ps(m1, m2);
  res = _mm_div_ss(m1, m2);
}

// CHECK-LABEL: @test_div

// CHECK-LABEL: define available_externally <4 x float> @_mm_div_ps
// CHECK: fdiv <4 x float>

// CHECK-LABEL: define available_externally <4 x float> @_mm_div_ss
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
// CHECK: fdiv <4 x float>
// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)

void __attribute__((noinline))
test_extract() {
  i = _mm_extract_pi16(ms[0], i2);
  i = _m_pextrw(ms[0], i2);
}

// CHECK-LABEL: @test_extract

// CHECK-LABEL: define available_externally signext i32 @_mm_extract_pi16
// CHECK: and i32 %{{[0-9a-zA-Z_.]+}}, 3
// CHECK-BE: sub i32 3, %{{[0-9a-zA-Z_.]+}}
// CHECK: %[[MUL:[0-9a-zA-Z_.]+]] = mul i32 %{{[0-9a-zA-Z_.]+}}, 16
// CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = zext i32 %[[MUL]] to i64
// CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = lshr i64 %{{[0-9a-zA-Z_.]+}}, %[[EXT]]
// CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i64 %[[SHR]], 65535
// CHECK: trunc i64 %[[AND]] to i32

// CHECK-LABEL: define available_externally signext i32 @_m_pextrw
// CHECK: call signext i32 @_mm_extract_pi16

void __attribute__((noinline))
test_insert() {
  res64 = _mm_insert_pi16(ms[0], i, i2);
  res64 = _m_pinsrw(ms[0], i, i2);
}

// CHECK-LABEL: @test_insert

// CHECK-LABEL: define available_externally i64 @_mm_insert_pi16
// CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 3
// CHECK: mul nsw i32 %[[AND]], 16
// CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = sext i32 %{{[0-9a-zA-Z_.]+}} to i64
// CHECK: %[[EXT2:[0-9a-zA-Z_.]+]] = zext i32 %{{[0-9a-zA-Z_.]+}} to i64
// CHECK: shl i64 %[[EXT]], %[[EXT2]]
// CHECK: %[[EXT3:[0-9a-zA-Z_.]+]] = zext i32 %{{[0-9a-zA-Z_.]+}} to i64
// CHECK: shl i64 65535, %[[EXT3]]
// CHECK: %[[XOR:[0-9a-zA-Z_.]+]] = xor i64 %{{[0-9a-zA-Z_.]+}}, -1
// CHECK: %[[AND2:[0-9a-zA-Z_.]+]] = and i64 %{{[0-9a-zA-Z_.]+}}, %[[XOR]]
// CHECK: %[[AND3:[0-9a-zA-Z_.]+]] = and i64 %{{[0-9a-zA-Z_.]+}}, %{{[0-9a-zA-Z_.]+}}
// CHECK: or i64 %[[AND2]], %[[AND3]]

// CHECK-LABEL: define available_externally i64 @_m_pinsrw
// CHECK: call i64 @_mm_insert_pi16

void __attribute__((noinline))
test_load() {
  res = _mm_load_ps(fs);
  res = _mm_load_ps1(fs);
  res = _mm_load_ss(fs);
  res = _mm_load1_ps(fs);
  res = _mm_loadh_pi(m1, &ms[0]);
  res = _mm_loadl_pi(m1, &ms[0]);
  res = _mm_loadr_ps(fs);
  res = _mm_loadu_ps(fs);
}

// CHECK-LABEL: @test_load

// CHECK-LABEL: define available_externally <4 x float> @_mm_load_ps
// CHECK: call <4 x float> @vec_ld(long, float vector[4] const*)

// CHECK-LABEL: define available_externally <4 x float> @_mm_load_ps1
// CHECK: call <4 x float> @_mm_load1_ps

// CHECK-LABEL: define available_externally <4 x float> @_mm_load_ss
// CHECK: call <4 x float> @_mm_set_ss

// CHECK-LABEL: define available_externally <4 x float> @_mm_load1_ps
// CHECK: call <4 x float> @_mm_set1_ps

// CHECK-LABEL: define available_externally <4 x float> @_mm_loadh_pi
// CHECK: call <2 x i64> @vec_splats(unsigned long long)
// CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 1
// CHECK: insertelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i64 %[[VAL]], i32 1

// CHECK-LABEL: define available_externally <4 x float> @_mm_loadl_pi
// CHECK: call <2 x i64> @vec_splats(unsigned long long)
// CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: insertelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i64 %[[EXT]], i32 0

// CHECK-LABEL: define available_externally <4 x float> @_mm_loadr_ps
// CHECK: call <4 x float> @vec_ld(long, float vector[4] const*)
// CHECK: call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 28, i8 29, i8 30, i8 31, i8 24, i8 25, i8 26, i8 27, i8 20, i8 21, i8 22, i8 23, i8 16, i8 17, i8 18, i8 19>)

// CHECK-LABEL: define available_externally <4 x float> @_mm_loadu_ps
// CHECK: call <4 x float> @vec_vsx_ld(int, float const*)

void __attribute__((noinline))
test_logic() {
  res = _mm_or_ps(m1, m2);
  res = _mm_and_ps(m1, m2);
  res = _mm_andnot_ps(m1, m2);
  res = _mm_xor_ps(m1, m2);
}

// CHECK-LABEL: @test_logic

// CHECK-LABEL: define available_externally <4 x float> @_mm_or_ps
// CHECK: call <4 x float> @vec_or(float vector[4], float vector[4])

// CHECK-LABEL: define available_externally <4 x float> @_mm_and_ps
// CHECK: call <4 x float> @vec_and(float vector[4], float vector[4])

// CHECK-LABEL: define available_externally <4 x float> @_mm_andnot_ps
// CHECK: call <4 x float> @vec_andc(float vector[4], float vector[4])

// CHECK-LABEL: define available_externally <4 x float> @_mm_xor_ps
// CHECK: call <4 x float> @vec_xor(float vector[4], float vector[4])

void __attribute__((noinline))
test_max() {
  res = _mm_max_ps(m1, m2);
  res = _mm_max_ss(m1, m2);
  res64 = _mm_max_pi16(ms[0], ms[1]);
  res64 = _mm_max_pu8(ms[0], ms[1]);
}

// CHECK-LABEL: @test_max

// CHECK-LABEL: define available_externally <4 x float> @_mm_max_ps
// CHECK: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])
// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], bool vector[4])

// CHECK-LABEL: define available_externally <4 x float> @_mm_max_ss
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
// CHECK: call <4 x float> @vec_max(float vector[4], float vector[4])
// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)

// CHECK-LABEL: define available_externally i64 @_mm_max_pi16
// CHECK: call <2 x i64> @vec_splats(unsigned long long)
// CHECK: call <2 x i64> @vec_splats(unsigned long long)
// CHECK: call <8 x i16> @vec_cmpgt(short vector[8], short vector[8])
// CHECK: call <8 x i16> @vec_sel(short vector[8], short vector[8], bool vector[8])
// CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <8 x i16> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
// CHECK: extractelement <2 x i64> %[[CAST]], i32 0

// CHECK-LABEL: define available_externally i64 @_mm_max_pu8
// CHECK: call <2 x i64> @vec_splats(unsigned long long)
// CHECK: call <2 x i64> @vec_splats(unsigned long long)
// CHECK: call <16 x i8> @vec_cmpgt(unsigned char vector[16], unsigned char vector[16])
// CHECK: call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], bool vector[16])
// CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <16 x i8> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
// CHECK: extractelement <2 x i64> %[[CAST]], i32 0

void __attribute__((noinline))
test_alt_name_max() {
  res64 = _m_pmaxsw(ms[0], ms[1]);
  res64 = _m_pmaxub(ms[0], ms[1]);
}

// CHECK-LABEL: @test_alt_name_max

// CHECK-LABEL: define available_externally i64 @_m_pmaxsw
// CHECK: call i64 @_mm_max_pi16

// CHECK-LABEL: define available_externally i64 @_m_pmaxub
// CHECK: call i64 @_mm_max_pu8

void __attribute__((noinline))
test_min() {
  res = _mm_min_ps(m1, m2);
  res = _mm_min_ss(m1, m2);
  res64 = _mm_min_pi16(ms[0], ms[1]);
  res64 = _mm_min_pu8(ms[0], ms[1]);
}

// CHECK-LABEL: @test_min

// CHECK-LABEL: define available_externally <4 x float> @_mm_min_ps
// CHECK: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])
// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], bool vector[4])

// CHECK-LABEL: define available_externally <4 x float> @_mm_min_ss
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
// CHECK: call <4 x float> @vec_min(float vector[4], float vector[4])
// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)

// CHECK-LABEL: define available_externally i64 @_mm_min_pi16
// CHECK: call <2 x i64> @vec_splats(unsigned long long)
// CHECK: call <2 x i64> @vec_splats(unsigned long long)
// CHECK: call <8 x i16> @vec_cmplt(short vector[8], short vector[8])
// CHECK: call <8 x i16> @vec_sel(short vector[8], short vector[8], bool vector[8])
// CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <8 x i16> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
// CHECK: extractelement <2 x i64> %[[CAST]], i32 0

// CHECK-LABEL: define available_externally i64 @_mm_min_pu8
// CHECK: call <2 x i64> @vec_splats(unsigned long long)
// CHECK: call <2 x i64> @vec_splats(unsigned long long)
// CHECK: call <16 x i8> @vec_cmplt(unsigned char vector[16], unsigned char vector[16])
// CHECK: call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], bool vector[16])
// CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <16 x i8> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
// CHECK: extractelement <2 x i64> %[[CAST]], i32 0

void __attribute__((noinline))
test_alt_name_min() {
  res64 = _m_pminsw(ms[0], ms[1]);
    res64 = _m_pminub(ms[0], ms[1]);
}

// CHECK-LABEL: @test_alt_name_min

// CHECK-LABEL: define available_externally i64 @_m_pminsw
// CHECK: call i64 @_mm_min_pi16

// CHECK-LABEL: define available_externally i64 @_m_pminub
// CHECK: call i64 @_mm_min_pu8

void __attribute__((noinline))
test_move() {
  _mm_maskmove_si64(ms[0], ms[1], (char *)&res64);
  res = _mm_move_ss(m1, m2);
  res = _mm_movehl_ps(m1, m2);
  res = _mm_movelh_ps(m1, m2);
  i = _mm_movemask_pi8(ms[0]);
  i = _mm_movemask_ps(m1);
}

// CHECK-LABEL: @test_move

// CHECK-LABEL: define available_externally void @_mm_maskmove_si64
// CHECK: store i64 -9187201950435737472, ptr %{{[0-9a-zA-Z_.]+}}, align 8
// CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i64
// CHECK: call i64 @_mm_cmpeq_pi8(i64 noundef %[[AND]], i64 noundef %{{[0-9a-zA-Z_.]+}})
// CHECK: %[[XOR:[0-9a-zA-Z_.]+]] = xor i64 %{{[0-9a-zA-Z_.]+}}, -1
// CHECK: %[[AND2:[0-9a-zA-Z_.]+]] = and i64 %{{[0-9a-zA-Z_.]+}}, %[[XOR]]
// CHECK: %[[AND3:[0-9a-zA-Z_.]+]] = and i64
// CHECK: or i64 %[[AND2]], %[[AND3]]

// CHECK-LABEL: define available_externally <4 x float> @_mm_move_ss
// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)

// CHECK-LABEL: define available_externally <4 x float> @_mm_movehl_ps
// CHECK: call <2 x i64> @vec_mergel(unsigned long long vector[2], unsigned long long vector[2])

// CHECK-LABEL: define available_externally <4 x float> @_mm_movelh_ps
// CHECK: call <2 x i64> @vec_mergeh(unsigned long long vector[2], unsigned long long vector[2])

// CHECK-LABEL: define available_externally signext i32 @_mm_movemask_pi8
// CHECK-LE: store i64 2269495618449464, ptr %{{[0-9a-zA-Z_.]+}}, align 8
// CHECK-BE: store i64 4048780183313844224, ptr %{{[0-9a-zA-Z_.]+}}, align 8
// CHECK: %[[CALL:[0-9a-zA-Z_.]+]] = call i64 @llvm.ppc.bpermd
// CHECK: trunc i64 %[[CALL]] to i32

// CHECK-LABEL: define available_externally signext i32 @_mm_movemask_ps
// CHECK-LE: call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef bitcast (<4 x i32> <i32 2113632, i32 -2139062144, i32 -2139062144, i32 -2139062144> to <16 x i8>))
// CHECK-LE: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 1
// CHECK-LE: trunc i64 %[[EXT]] to i32
// CHECK-BE: call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef bitcast (<4 x i32> <i32 -2139062144, i32 -2139062144, i32 -2139062144, i32 2113632> to <16 x i8>))
// CHECK-BE: %[[EXT:[0-9a-zA-Z_.]+]] = extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK-BE: trunc i64 %[[EXT]] to i32
// CHECK-P10-LE: call zeroext i32 @vec_extractm(unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}})
// CHECK-P10-BE: call zeroext i32 @vec_extractm(unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}})

void __attribute__((noinline))
test_alt_name_move() {
  i = _m_pmovmskb(ms[0]);
  _m_maskmovq(ms[0], ms[1], (char *)&res64);
}

// CHECK-LABEL: @test_alt_name_move

// CHECK-LABEL: define available_externally signext i32 @_m_pmovmskb
// CHECK: call signext i32 @_mm_movemask_pi8

// CHECK-LABEL: define available_externally void @_m_maskmovq
// CHECK: call void @_mm_maskmove_si64

void __attribute__((noinline))
test_mul() {
  res = _mm_mul_ps(m1, m2);
  res = _mm_mul_ss(m1, m2);
  res64 = _mm_mulhi_pu16(ms[0], ms[1]);
  res64 = _m_pmulhuw(ms[0], ms[1]);
}

// CHECK-LABEL: @test_mul

// CHECK-LABEL: define available_externally <4 x float> @_mm_mul_ps
// CHECK: fmul <4 x float>

// CHECK-LABEL: define available_externally <4 x float> @_mm_mul_ss
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
// CHECK: fmul <4 x float>
// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)

// CHECK-LABEL: define available_externally i64 @_mm_mulhi_pu16
// CHECK-LE: store <16 x i8> <i8 2, i8 3, i8 18, i8 19, i8 6, i8 7, i8 22, i8 23, i8 10, i8 11, i8 26, i8 27, i8 14, i8 15, i8 30, i8 31>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
// CHECK-BE: store <16 x i8> <i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21, i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
// CHECK: call <2 x i64> @vec_splats(unsigned long long)
// CHECK: call <2 x i64> @vec_splats(unsigned long long)
// CHECK: call <4 x i32> @vec_vmuleuh(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}})
// CHECK: call <4 x i32> @vec_vmulouh(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}})
// CHECK: call <4 x i32> @vec_perm(unsigned int vector[4], unsigned int vector[4], unsigned char vector[16])
// CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0

// CHECK-LABEL: define available_externally i64 @_m_pmulhuw
// CHECK: call i64 @_mm_mulhi_pu16

void __attribute__((noinline))
test_prefetch() {
  _mm_prefetch(ms, _MM_HINT_NTA);
}

// CHECK-LABEL: @test_prefetch

// CHECK-LABEL: define available_externally void @_mm_prefetch
// CHECK: call void @llvm.prefetch.p0(ptr %{{[0-9a-zA-Z_.]+}}, i32 0, i32 3, i32 1)

void __attribute__((noinline))
test_rcp() {
  res = _mm_rcp_ps(m1);
  res = _mm_rcp_ss(m1);
}

// CHECK-LABEL: @test_rcp

// CHECK-LABEL: define available_externally <4 x float> @_mm_rcp_ps
// CHECK: call <4 x float> @vec_re(float vector[4])

// CHECK-LABEL: define available_externally <4 x float> @_mm_rcp_ss
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)
// CHECK: call <4 x float> @_mm_rcp_ps(<4 x float> noundef %{{[0-9a-zA-Z_.]+}})
// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)

void __attribute__((noinline))
test_rsqrt() {
  res = _mm_rsqrt_ps(m1);
  res = _mm_rsqrt_ss(m1);
}

// CHECK-LABEL: @test_rsqrt

// CHECK-LABEL: define available_externally <4 x float> @_mm_rsqrt_ps
// CHECK: call <4 x float> @vec_rsqrte(float vector[4])

// CHECK-LABEL: define available_externally <4 x float> @_mm_rsqrt_ss
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
// CHECK: call <4 x float> @vec_rsqrte(float vector[4])
// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)

void __attribute__((noinline))
test_sad() {
  res64 = _mm_sad_pu8(ms[0], ms[1]);
  res64 = _m_psadbw(ms[0], ms[1]);
}

// CHECK-LABEL: @test_sad

// CHECK-LABEL: define available_externally i64 @_mm_sad_pu8
// CHECK: call void @llvm.memset.p0.i64(ptr align 8 %{{[0-9a-zA-Z_.]+}}, i8 0, i64 8, i1 false)
// CHECK: insertelement <2 x i64> <i64 0, i64 poison>, i64 %{{[0-9a-zA-Z_.]+}}, i32 1
// CHECK: insertelement <2 x i64> <i64 0, i64 poison>, i64 %{{[0-9a-zA-Z_.]+}}, i32 1
// CHECK: call <16 x i8> @vec_min(unsigned char vector[16], unsigned char vector[16])
// CHECK: call <16 x i8> @vec_max(unsigned char vector[16], unsigned char vector[16])
// CHECK: call <16 x i8> @vec_sub(unsigned char vector[16], unsigned char vector[16])
// CHECK: call <4 x i32> @vec_sum4s(unsigned char vector[16], unsigned int vector[4])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef zeroinitializer)
// CHECK: call <4 x i32> @vec_sums(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef zeroinitializer)
// CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = extractelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 3
// CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %[[EXT]] to i16
// CHECK: %[[GEP:[0-9a-zA-Z_.]+]] = getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 0
// CHECK: store i16 %[[TRUNC]], ptr %[[GEP]], align 8

// CHECK-LABEL: define available_externally i64 @_m_psadbw
// CHECK: call i64 @_mm_sad_pu8

void __attribute__((noinline))
test_set() {
  res = _mm_set_ps(fs[0], fs[1], fs[2], fs[3]);
  res = _mm_set_ps1(fs[0]);
  res = _mm_set_ss(fs[0]);
  res = _mm_set1_ps(fs[0]);
  res = _mm_setr_ps(fs[0], fs[1], fs[2], fs[3]);
}

// CHECK-LABEL: @test_set

// CHECK-LABEL: define available_externally <4 x float> @_mm_set_ps
// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <4 x float> poison, float %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[VEC2:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC]], float %{{[0-9a-zA-Z_.]+}}, i32 1
// CHECK: %[[VEC3:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC2]], float %{{[0-9a-zA-Z_.]+}}, i32 2
// CHECK: %[[VEC4:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC3]], float %{{[0-9a-zA-Z_.]+}}, i32 3
// CHECK: store <4 x float> %[[VEC4]], ptr %{{[0-9a-zA-Z_.]+}}, align 16

// CHECK-LABEL: define available_externally <4 x float> @_mm_set_ps1
// CHECK: call <4 x float> @_mm_set1_ps

// CHECK-LABEL: define available_externally <4 x float> @_mm_set_ss
// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <4 x float> poison, float %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[VEC2:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC]], float 0.000000e+00, i32 1
// CHECK: %[[VEC3:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC2]], float 0.000000e+00, i32 2
// CHECK: %[[VEC4:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC3]], float 0.000000e+00, i32 3
// CHECK: store <4 x float> %[[VEC4]], ptr %{{[0-9a-zA-Z_.]+}}, align 16

// CHECK-LABEL: define available_externally <4 x float> @_mm_set1_ps
// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <4 x float> poison, float %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[VEC2:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC]], float %{{[0-9a-zA-Z_.]+}}, i32 1
// CHECK: %[[VEC3:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC2]], float %{{[0-9a-zA-Z_.]+}}, i32 2
// CHECK: %[[VEC4:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC3]], float %{{[0-9a-zA-Z_.]+}}, i32 3
// CHECK: store <4 x float> %[[VEC4]], ptr %{{[0-9a-zA-Z_.]+}}, align 16

// CHECK-LABEL: define available_externally <4 x float> @_mm_setr_ps
// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <4 x float> poison, float %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[VEC2:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC]], float %{{[0-9a-zA-Z_.]+}}, i32 1
// CHECK: %[[VEC3:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC2]], float %{{[0-9a-zA-Z_.]+}}, i32 2
// CHECK: %[[VEC4:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC3]], float %{{[0-9a-zA-Z_.]+}}, i32 3
// CHECK: store <4 x float> %[[VEC4]], ptr %{{[0-9a-zA-Z_.]+}}, align 16

void __attribute__((noinline))
test_setzero() {
  res = _mm_setzero_ps();
}

// CHECK-LABEL: @test_setzero

// CHECK-LABEL: define available_externally <4 x float> @_mm_setzero_ps
// CHECK: store <4 x float> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16

void __attribute__((noinline))
test_sfence() {
  _mm_sfence();
}

// CHECK-LABEL: @test_sfence

// CHECK-LABEL: define available_externally void @_mm_sfence
// CHECK: fence release

void __attribute__((noinline))
test_shuffle() {
  res64 = _mm_shuffle_pi16(ms[0], i);
  res = _mm_shuffle_ps(m1, m2, i);
  res64 = _m_pshufw(ms[0], i);
}

// CHECK-LABEL: @test_shuffle

// CHECK-LABEL: define available_externally i64 @_mm_shuffle_pi16
// CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 3
// CHECK: sext i32 %[[AND]] to i64
// CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 2
// CHECK: %[[AND2:[0-9a-zA-Z_.]+]] = and i32 %[[SHR]], 3
// CHECK: sext i32 %[[AND2]] to i64
// CHECK: %[[SHR2:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 4
// CHECK: %[[AND3:[0-9a-zA-Z_.]+]] = and i32 %[[SHR2]], 3
// CHECK: sext i32 %[[AND3]] to i64
// CHECK: %[[SHR3:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 6
// CHECK: %[[AND4:[0-9a-zA-Z_.]+]] = and i32 %[[SHR3]], 3
// CHECK: sext i32 %[[AND4]] to i64
// CHECK: getelementptr inbounds [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
// CHECK-LE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 0
// CHECK-BE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 3
// CHECK: getelementptr inbounds [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
// CHECK-LE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 1
// CHECK-BE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 2
// CHECK: getelementptr inbounds [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
// CHECK-LE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 2
// CHECK-BE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 1
// CHECK: getelementptr inbounds [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
// CHECK-LE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 3
// CHECK-BE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 0
// CHECK: call <2 x i64> @vec_splats(unsigned long long)
// CHECK: call <2 x i64> @vec_splats(unsigned long long)
// CHECK: call <2 x i64> @vec_perm(unsigned long long vector[2], unsigned long long vector[2], unsigned char vector[16])
// CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0

// CHECK-LABEL: define available_externally <4 x float> @_mm_shuffle_ps
// CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 3
// CHECK: sext i32 %[[AND]] to i64
// CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 2
// CHECK: %[[AND2:[0-9a-zA-Z_.]+]] = and i32 %[[SHR]], 3
// CHECK: sext i32 %[[AND2]] to i64
// CHECK: %[[SHR2:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 4
// CHECK: %[[AND3:[0-9a-zA-Z_.]+]] = and i32 %[[SHR2]], 3
// CHECK: sext i32 %[[AND3]] to i64
// CHECK: %[[SHR3:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 6
// CHECK: %[[AND4:[0-9a-zA-Z_.]+]] = and i32 %[[SHR3]], 3
// CHECK: sext i32 %[[AND4]] to i64
// CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64
// CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64
// CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 1
// CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64
// CHECK: %[[ADD:[0-9a-zA-Z_.]+]] = add i32 %{{[0-9a-zA-Z_.]+}}, 269488144
// CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %[[ADD]], i32 2
// CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64
// CHECK: %[[ADD2:[0-9a-zA-Z_.]+]] = add i32 %{{[0-9a-zA-Z_.]+}}, 269488144
// CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %[[ADD2]], i32 3
// CHECK: call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])

// CHECK-LABEL: define available_externally i64 @_m_pshufw
// CHECK: call i64 @_mm_shuffle_pi16

void __attribute__((noinline))
test_sqrt() {
  res = _mm_sqrt_ps(m1);
  res = _mm_sqrt_ss(m1);
}

// CHECK-LABEL: @test_sqrt

// CHECK-LABEL: define available_externally <4 x float> @_mm_sqrt_ps
// CHECK: call <4 x float> @vec_sqrt(float vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}})

// CHECK-LABEL: define available_externally <4 x float> @_mm_sqrt_ss
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
// CHECK: call <4 x float> @vec_sqrt(float vector[4])
// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)

void __attribute__((noinline))
test_store() {
  _mm_store_ps(fs, m1);
  _mm_store_ps1(fs, m1);
  _mm_store_ss(fs, m1);
  _mm_store1_ps(fs, m1);
  _mm_storeh_pi(ms, m1);
  _mm_storel_pi(ms, m1);
  _mm_storer_ps(fs, m1);
}

// CHECK-LABEL: @test_store

// CHECK-LABEL: define available_externally void @_mm_store_ps
// CHECK: call void @vec_st(float vector[4], long, float vector[4]*)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i64 noundef 0, ptr noundef %{{[0-9a-zA-Z_.]+}})

// CHECK-LABEL: define available_externally void @_mm_store_ps1
// CHECK: call void @_mm_store1_ps

// CHECK-LABEL: define available_externally void @_mm_store_ss
// CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: store float %[[VAL]], ptr %{{[0-9a-zA-Z_.]+}}, align 4

// CHECK-LABEL: define available_externally void @_mm_store1_ps
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
// CHECK: call void @_mm_store_ps

// CHECK-LABEL: define available_externally void @_mm_storeh_pi
// CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 1
// CHECK: store i64 %[[VAL]], ptr %{{[0-9a-zA-Z_.]+}}, align 8

// CHECK-LABEL: define available_externally void @_mm_storel_pi
// CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: store i64 %[[VAL]], ptr %{{[0-9a-zA-Z_.]+}}, align 8

// CHECK-LABEL: define available_externally void @_mm_storer_ps
// CHECK: call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 28, i8 29, i8 30, i8 31, i8 24, i8 25, i8 26, i8 27, i8 20, i8 21, i8 22, i8 23, i8 16, i8 17, i8 18, i8 19>)
// CHECK: call void @_mm_store_ps

void __attribute__((noinline))
test_stream() {
  _mm_stream_pi(&res64, ms[0]);
  _mm_stream_ps(&fs[0], m1);
}

// CHECK-LABEL: @test_stream

// CHECK-LABEL: define available_externally void @_mm_stream_pi
// CHECK: call void asm sideeffect "\09dcbtstt\090,$0", "b,~{memory}"(ptr %{{[0-9a-zA-Z_.]+}})

// CHECK-LABEL: define available_externally void @_mm_stream_ps
// CHECK: call void asm sideeffect "\09dcbtstt\090,$0", "b,~{memory}"(ptr %{{[0-9a-zA-Z_.]+}})
// CHECK: call void @_mm_store_ps

void __attribute__((noinline))
test_sub() {
  res = _mm_sub_ps(m1, m2);
  res = _mm_sub_ss(m1, m2);
}

// CHECK-LABEL: @test_sub

// CHECK-LABEL: define available_externally <4 x float> @_mm_sub_ps
// CHECK: fsub <4 x float>

// CHECK-LABEL: define available_externally <4 x float> @_mm_sub_ss
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
// CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
// CHECK: fsub <4 x float>
// CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)

void __attribute__((noinline))
test_transpose() {
  __m128 m3, m4;
  _MM_TRANSPOSE4_PS(m1, m2, m3, m4);
}

// CHECK-LABEL: @test_transpose

// CHECK: call <4 x float> @vec_vmrghw(float vector[4], float vector[4])
// CHECK: call <4 x float> @vec_vmrghw(float vector[4], float vector[4])
// CHECK: call <4 x float> @vec_vmrglw(float vector[4], float vector[4])
// CHECK: call <4 x float> @vec_vmrglw(float vector[4], float vector[4])
// CHECK: call <2 x i64> @vec_mergeh(long long vector[2], long long vector[2])
// CHECK: call <2 x i64> @vec_mergel(long long vector[2], long long vector[2])
// CHECK: call <2 x i64> @vec_mergeh(long long vector[2], long long vector[2])
// CHECK: call <2 x i64> @vec_mergel(long long vector[2], long long vector[2])

void __attribute__((noinline))
test_ucomi() {
  i = _mm_ucomieq_ss(m1, m2);
  i = _mm_ucomige_ss(m1, m2);
  i = _mm_ucomigt_ss(m1, m2);
  i = _mm_ucomile_ss(m1, m2);
  i = _mm_ucomilt_ss(m1, m2);
  i = _mm_ucomineq_ss(m1, m2);
}

// CHECK-LABEL: @test_ucomi

// CHECK-LABEL: define available_externally signext i32 @_mm_ucomieq_ss
// CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: fcmp oeq float %[[VAL1]], %[[VAL2]]

// CHECK-LABEL: define available_externally signext i32 @_mm_ucomige_ss
// CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: fcmp oge float %[[VAL1]], %[[VAL2]]

// CHECK-LABEL: define available_externally signext i32 @_mm_ucomigt_ss
// CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: fcmp ogt float %[[VAL1]], %[[VAL2]]

// CHECK-LABEL: define available_externally signext i32 @_mm_ucomile_ss
// CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: fcmp ole float %[[VAL1]], %[[VAL2]]

// CHECK-LABEL: define available_externally signext i32 @_mm_ucomilt_ss
// CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: fcmp olt float %[[VAL1]], %[[VAL2]]

// CHECK-LABEL: define available_externally signext i32 @_mm_ucomineq_ss
// CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: fcmp une float %[[VAL1]], %[[VAL2]]

void __attribute__((noinline))
test_undefined() {
  res = _mm_undefined_ps();
}

// CHECK-LABEL: @test_undefined

// CHECK-LABEL: define available_externally <4 x float> @_mm_undefined_ps
// CHECK: alloca <4 x float>, align 16
// CHECK: load <4 x float>, ptr %[[ADDR:[0-9a-zA-Z_.]+]], align 16
// CHECK: load <4 x float>, ptr %[[ADDR]], align 16

void __attribute__((noinline))
test_unpack() {
  res = _mm_unpackhi_ps(m1, m2);
  res = _mm_unpacklo_ps(m1, m2);
}

// CHECK-LABEL: @test_unpack

// CHECK-LABEL: define available_externally <4 x float> @_mm_unpackhi_ps
// CHECK: call <4 x float> @vec_vmrglw(float vector[4], float vector[4])

// CHECK-LABEL: define available_externally <4 x float> @_mm_unpacklo_ps
// CHECK: call <4 x float> @vec_vmrghw(float vector[4], float vector[4])