llvm/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-canonical.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s

; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vlbw-builtins.c


;
; Signed Saturation
;

define <8 x i16> @test_mask_adds_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_mask_adds_epi16_rr_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %1 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
  ret <8 x i16> %1
}
declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>)

define <8 x i16> @test_mask_adds_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
; CHECK-LABEL: test_mask_adds_epi16_rrk_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpaddsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1]
; CHECK-NEXT:    vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %1 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
  %2 = bitcast i8 %mask to <8 x i1>
  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
  ret <8 x i16> %3
}

define <8 x i16> @test_mask_adds_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
; CHECK-LABEL: test_mask_adds_epi16_rrkz_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %1 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
  %2 = bitcast i8 %mask to <8 x i1>
  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
  ret <8 x i16> %3
}

define <8 x i16> @test_mask_adds_epi16_rm_128(<8 x i16> %a, ptr %ptr_b) {
; CHECK-LABEL: test_mask_adds_epi16_rm_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <8 x i16>, ptr %ptr_b
  %1 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
  ret <8 x i16> %1
}

define <8 x i16> @test_mask_adds_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
; CHECK-LABEL: test_mask_adds_epi16_rmk_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpaddsw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xed,0x0f]
; CHECK-NEXT:    vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <8 x i16>, ptr %ptr_b
  %1 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
  %2 = bitcast i8 %mask to <8 x i1>
  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
  ret <8 x i16> %3
}

define <8 x i16> @test_mask_adds_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) {
; CHECK-LABEL: test_mask_adds_epi16_rmkz_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xed,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <8 x i16>, ptr %ptr_b
  %1 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
  %2 = bitcast i8 %mask to <8 x i1>
  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
  ret <8 x i16> %3
}

define <16 x i16> @test_mask_adds_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
; CHECK-LABEL: test_mask_adds_epi16_rr_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %1 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
  ret <16 x i16> %1
}
declare <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16>, <16 x i16>)

define <16 x i16> @test_mask_adds_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epi16_rrk_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpaddsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1]
; CHECK-NEXT:    vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %1 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
  %2 = bitcast i16 %mask to <16 x i1>
  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
  ret <16 x i16> %3
}

define <16 x i16> @test_mask_adds_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epi16_rrkz_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %1 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
  %2 = bitcast i16 %mask to <16 x i1>
  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
  ret <16 x i16> %3
}

define <16 x i16> @test_mask_adds_epi16_rm_256(<16 x i16> %a, ptr %ptr_b) {
; CHECK-LABEL: test_mask_adds_epi16_rm_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpaddsw (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <16 x i16>, ptr %ptr_b
  %1 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
  ret <16 x i16> %1
}

define <16 x i16> @test_mask_adds_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epi16_rmk_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpaddsw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xed,0x0f]
; CHECK-NEXT:    vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <16 x i16>, ptr %ptr_b
  %1 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
  %2 = bitcast i16 %mask to <16 x i1>
  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
  ret <16 x i16> %3
}

define <16 x i16> @test_mask_adds_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epi16_rmkz_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpaddsw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <16 x i16>, ptr %ptr_b
  %1 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
  %2 = bitcast i16 %mask to <16 x i1>
  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
  ret <16 x i16> %3
}

define <8 x i16> @test_mask_subs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_mask_subs_epi16_rr_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %sub = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
  ret <8 x i16> %sub
}
declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>)

define <8 x i16> @test_mask_subs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
; CHECK-LABEL: test_mask_subs_epi16_rrk_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpsubsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1]
; CHECK-NEXT:    vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %sub = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
  %bc = bitcast i8 %mask to <8 x i1>
  %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> %passThru
  ret <8 x i16> %res
}

define <8 x i16> @test_mask_subs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
; CHECK-LABEL: test_mask_subs_epi16_rrkz_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %sub = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
  %bc = bitcast i8 %mask to <8 x i1>
  %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> zeroinitializer
  ret <8 x i16> %res
}

define <8 x i16> @test_mask_subs_epi16_rm_128(<8 x i16> %a, ptr %ptr_b) {
; CHECK-LABEL: test_mask_subs_epi16_rm_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <8 x i16>, ptr %ptr_b
  %sub = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
  ret <8 x i16> %sub
}

define <8 x i16> @test_mask_subs_epi16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
; CHECK-LABEL: test_mask_subs_epi16_rmk_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpsubsw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x0f]
; CHECK-NEXT:    vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <8 x i16>, ptr %ptr_b
  %sub = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
  %bc = bitcast i8 %mask to <8 x i1>
  %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> %passThru
  ret <8 x i16> %res
}

define <8 x i16> @test_mask_subs_epi16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) {
; CHECK-LABEL: test_mask_subs_epi16_rmkz_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <8 x i16>, ptr %ptr_b
  %sub = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
  %bc = bitcast i8 %mask to <8 x i1>
  %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> zeroinitializer
  ret <8 x i16> %res
}

define <16 x i16> @test_mask_subs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
; CHECK-LABEL: test_mask_subs_epi16_rr_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %sub = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
  ret <16 x i16> %sub
}
declare <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16>, <16 x i16>)

define <16 x i16> @test_mask_subs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epi16_rrk_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpsubsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1]
; CHECK-NEXT:    vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %sub = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
  %bc = bitcast i16 %mask to <16 x i1>
  %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> %passThru
  ret <16 x i16> %res
}

define <16 x i16> @test_mask_subs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epi16_rrkz_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %sub = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
  %bc = bitcast i16 %mask to <16 x i1>
  %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> zeroinitializer
  ret <16 x i16> %res
}

define <16 x i16> @test_mask_subs_epi16_rm_256(<16 x i16> %a, ptr %ptr_b) {
; CHECK-LABEL: test_mask_subs_epi16_rm_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpsubsw (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <16 x i16>, ptr %ptr_b
  %sub = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
  ret <16 x i16> %sub
}

define <16 x i16> @test_mask_subs_epi16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epi16_rmk_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpsubsw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x0f]
; CHECK-NEXT:    vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <16 x i16>, ptr %ptr_b
  %sub = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
  %bc = bitcast i16 %mask to <16 x i1>
  %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> %passThru
  ret <16 x i16> %res
}

define <16 x i16> @test_mask_subs_epi16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epi16_rmkz_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpsubsw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <16 x i16>, ptr %ptr_b
  %sub = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
  %bc = bitcast i16 %mask to <16 x i1>
  %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> zeroinitializer
  ret <16 x i16> %res
}

define <16 x i8> @test_mask_adds_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test_mask_adds_epi8_rr_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %1 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
  ret <16 x i8> %1
}
declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>)

define <16 x i8> @test_mask_adds_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epi8_rrk_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpaddsb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1]
; CHECK-NEXT:    vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %1 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
  %2 = bitcast i16 %mask to <16 x i1>
  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
  ret <16 x i8> %3
}

define <16 x i8> @test_mask_adds_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epi8_rrkz_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %1 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
  %2 = bitcast i16 %mask to <16 x i1>
  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
  ret <16 x i8> %3
}

define <16 x i8> @test_mask_adds_epi8_rm_128(<16 x i8> %a, ptr %ptr_b) {
; CHECK-LABEL: test_mask_adds_epi8_rm_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <16 x i8>, ptr %ptr_b
  %1 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
  ret <16 x i8> %1
}

define <16 x i8> @test_mask_adds_epi8_rmk_128(<16 x i8> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epi8_rmk_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpaddsb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xec,0x0f]
; CHECK-NEXT:    vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <16 x i8>, ptr %ptr_b
  %1 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
  %2 = bitcast i16 %mask to <16 x i1>
  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
  ret <16 x i8> %3
}

define <16 x i8> @test_mask_adds_epi8_rmkz_128(<16 x i8> %a, ptr %ptr_b, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epi8_rmkz_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xec,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <16 x i8>, ptr %ptr_b
  %1 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
  %2 = bitcast i16 %mask to <16 x i1>
  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
  ret <16 x i8> %3
}

define <32 x i8> @test_mask_adds_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) {
; CHECK-LABEL: test_mask_adds_epi8_rr_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %1 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
  ret <32 x i8> %1
}
declare <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8>, <32 x i8>)

define <32 x i8> @test_mask_adds_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
; CHECK-LABEL: test_mask_adds_epi8_rrk_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpaddsb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1]
; CHECK-NEXT:    vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %1 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
  %2 = bitcast i32 %mask to <32 x i1>
  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
  ret <32 x i8> %3
}

define <32 x i8> @test_mask_adds_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
; CHECK-LABEL: test_mask_adds_epi8_rrkz_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %1 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
  %2 = bitcast i32 %mask to <32 x i1>
  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
  ret <32 x i8> %3
}

define <32 x i8> @test_mask_adds_epi8_rm_256(<32 x i8> %a, ptr %ptr_b) {
; CHECK-LABEL: test_mask_adds_epi8_rm_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpaddsb (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <32 x i8>, ptr %ptr_b
  %1 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
  ret <32 x i8> %1
}

define <32 x i8> @test_mask_adds_epi8_rmk_256(<32 x i8> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) {
; CHECK-LABEL: test_mask_adds_epi8_rmk_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpaddsb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xec,0x0f]
; CHECK-NEXT:    vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <32 x i8>, ptr %ptr_b
  %1 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
  %2 = bitcast i32 %mask to <32 x i1>
  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
  ret <32 x i8> %3
}

define <32 x i8> @test_mask_adds_epi8_rmkz_256(<32 x i8> %a, ptr %ptr_b, i32 %mask) {
; CHECK-LABEL: test_mask_adds_epi8_rmkz_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpaddsb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <32 x i8>, ptr %ptr_b
  %1 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
  %2 = bitcast i32 %mask to <32 x i1>
  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
  ret <32 x i8> %3
}

define <16 x i8> @test_mask_subs_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test_mask_subs_epi8_rr_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %sub = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
  ret <16 x i8> %sub
}
declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>)

define <16 x i8> @test_mask_subs_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epi8_rrk_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpsubsb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1]
; CHECK-NEXT:    vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %sub = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
  %bc = bitcast i16 %mask to <16 x i1>
  %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> %passThru
  ret <16 x i8> %res
}

define <16 x i8> @test_mask_subs_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epi8_rrkz_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %sub = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
  %bc = bitcast i16 %mask to <16 x i1>
  %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> zeroinitializer
  ret <16 x i8> %res
}

define <16 x i8> @test_mask_subs_epi8_rm_128(<16 x i8> %a, ptr %ptr_b) {
; CHECK-LABEL: test_mask_subs_epi8_rm_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <16 x i8>, ptr %ptr_b
  %sub = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
  ret <16 x i8> %sub
}

define <16 x i8> @test_mask_subs_epi8_rmk_128(<16 x i8> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epi8_rmk_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpsubsb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x0f]
; CHECK-NEXT:    vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <16 x i8>, ptr %ptr_b
  %sub = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
  %bc = bitcast i16 %mask to <16 x i1>
  %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> %passThru
  ret <16 x i8> %res
}

define <16 x i8> @test_mask_subs_epi8_rmkz_128(<16 x i8> %a, ptr %ptr_b, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epi8_rmkz_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <16 x i8>, ptr %ptr_b
  %sub = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
  %bc = bitcast i16 %mask to <16 x i1>
  %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> zeroinitializer
  ret <16 x i8> %res
}

define <32 x i8> @test_mask_subs_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) {
; CHECK-LABEL: test_mask_subs_epi8_rr_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %sub = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
  ret <32 x i8> %sub
}
declare <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8>, <32 x i8>)

define <32 x i8> @test_mask_subs_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
; CHECK-LABEL: test_mask_subs_epi8_rrk_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpsubsb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1]
; CHECK-NEXT:    vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %sub = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
  %bc = bitcast i32 %mask to <32 x i1>
  %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> %passThru
  ret <32 x i8> %res
}

define <32 x i8> @test_mask_subs_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
; CHECK-LABEL: test_mask_subs_epi8_rrkz_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %sub = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
  %bc = bitcast i32 %mask to <32 x i1>
  %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> zeroinitializer
  ret <32 x i8> %res
}

define <32 x i8> @test_mask_subs_epi8_rm_256(<32 x i8> %a, ptr %ptr_b) {
; CHECK-LABEL: test_mask_subs_epi8_rm_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpsubsb (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <32 x i8>, ptr %ptr_b
  %sub = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
  ret <32 x i8> %sub
}

define <32 x i8> @test_mask_subs_epi8_rmk_256(<32 x i8> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) {
; CHECK-LABEL: test_mask_subs_epi8_rmk_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpsubsb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x0f]
; CHECK-NEXT:    vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <32 x i8>, ptr %ptr_b
  %sub = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
  %bc = bitcast i32 %mask to <32 x i1>
  %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> %passThru
  ret <32 x i8> %res
}

define <32 x i8> @test_mask_subs_epi8_rmkz_256(<32 x i8> %a, ptr %ptr_b, i32 %mask) {
; CHECK-LABEL: test_mask_subs_epi8_rmkz_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpsubsb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <32 x i8>, ptr %ptr_b
  %sub = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
  %bc = bitcast i32 %mask to <32 x i1>
  %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> zeroinitializer
  ret <32 x i8> %res
}

;
; Unsigned Saturation
;

define <8 x i16> @test_mask_adds_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_mask_adds_epu16_rr_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
  ret <8 x i16> %1
}
declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>)

define <8 x i16> @test_mask_adds_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
; CHECK-LABEL: test_mask_adds_epu16_rrk_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpaddusw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdd,0xd1]
; CHECK-NEXT:    vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
  %2 = bitcast i8 %mask to <8 x i1>
  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
  ret <8 x i16> %3
}

define <8 x i16> @test_mask_adds_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
; CHECK-LABEL: test_mask_adds_epu16_rrkz_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdd,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
  %2 = bitcast i8 %mask to <8 x i1>
  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
  ret <8 x i16> %3
}

define <8 x i16> @test_mask_adds_epu16_rm_128(<8 x i16> %a, ptr %ptr_b) {
; CHECK-LABEL: test_mask_adds_epu16_rm_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <8 x i16>, ptr %ptr_b
  %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
  ret <8 x i16> %1
}

define <8 x i16> @test_mask_adds_epu16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
; CHECK-LABEL: test_mask_adds_epu16_rmk_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpaddusw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdd,0x0f]
; CHECK-NEXT:    vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <8 x i16>, ptr %ptr_b
  %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
  %2 = bitcast i8 %mask to <8 x i1>
  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
  ret <8 x i16> %3
}

define <8 x i16> @test_mask_adds_epu16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) {
; CHECK-LABEL: test_mask_adds_epu16_rmkz_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdd,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <8 x i16>, ptr %ptr_b
  %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
  %2 = bitcast i8 %mask to <8 x i1>
  %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
  ret <8 x i16> %3
}

define <16 x i16> @test_mask_adds_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) {
; CHECK-LABEL: test_mask_adds_epu16_rr_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %1 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
  ret <16 x i16> %1
}
declare <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16>, <16 x i16>)

define <16 x i16> @test_mask_adds_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epu16_rrk_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpaddusw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdd,0xd1]
; CHECK-NEXT:    vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %1 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
  %2 = bitcast i16 %mask to <16 x i1>
  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
  ret <16 x i16> %3
}

define <16 x i16> @test_mask_adds_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epu16_rrkz_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %1 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
  %2 = bitcast i16 %mask to <16 x i1>
  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
  ret <16 x i16> %3
}

define <16 x i16> @test_mask_adds_epu16_rm_256(<16 x i16> %a, ptr %ptr_b) {
; CHECK-LABEL: test_mask_adds_epu16_rm_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpaddusw (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <16 x i16>, ptr %ptr_b
  %1 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
  ret <16 x i16> %1
}

define <16 x i16> @test_mask_adds_epu16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epu16_rmk_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpaddusw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdd,0x0f]
; CHECK-NEXT:    vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <16 x i16>, ptr %ptr_b
  %1 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
  %2 = bitcast i16 %mask to <16 x i1>
  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
  ret <16 x i16> %3
}

define <16 x i16> @test_mask_adds_epu16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epu16_rmkz_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpaddusw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <16 x i16>, ptr %ptr_b
  %1 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
  %2 = bitcast i16 %mask to <16 x i1>
  %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
  ret <16 x i16> %3
}

define <8 x i16> @test_mask_subs_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_mask_subs_epu16_rr_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %sub = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
  ret <8 x i16> %sub
}
declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>)

define <8 x i16> @test_mask_subs_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
; CHECK-LABEL: test_mask_subs_epu16_rrk_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpsubusw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd9,0xd1]
; CHECK-NEXT:    vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %sub = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
  %bc = bitcast i8 %mask to <8 x i1>
  %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> %passThru
  ret <8 x i16> %res
}

define <8 x i16> @test_mask_subs_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
; CHECK-LABEL: test_mask_subs_epu16_rrkz_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd9,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %sub = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
  %bc = bitcast i8 %mask to <8 x i1>
  %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> zeroinitializer
  ret <8 x i16> %res
}

define <8 x i16> @test_mask_subs_epu16_rm_128(<8 x i16> %a, ptr %ptr_b) {
; CHECK-LABEL: test_mask_subs_epu16_rm_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <8 x i16>, ptr %ptr_b
  %sub = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
  ret <8 x i16> %sub
}

define <8 x i16> @test_mask_subs_epu16_rmk_128(<8 x i16> %a, ptr %ptr_b, <8 x i16> %passThru, i8 %mask) {
; CHECK-LABEL: test_mask_subs_epu16_rmk_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpsubusw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd9,0x0f]
; CHECK-NEXT:    vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <8 x i16>, ptr %ptr_b
  %sub = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
  %bc = bitcast i8 %mask to <8 x i1>
  %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> %passThru
  ret <8 x i16> %res
}

define <8 x i16> @test_mask_subs_epu16_rmkz_128(<8 x i16> %a, ptr %ptr_b, i8 %mask) {
; CHECK-LABEL: test_mask_subs_epu16_rmkz_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd9,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <8 x i16>, ptr %ptr_b
  %sub = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
  %bc = bitcast i8 %mask to <8 x i1>
  %res = select <8 x i1> %bc, <8 x i16> %sub, <8 x i16> zeroinitializer
  ret <8 x i16> %res
}

define <16 x i16> @test_mask_subs_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) {
; CHECK-LABEL: test_mask_subs_epu16_rr_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %sub = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
  ret <16 x i16> %sub
}
declare <16 x i16> @llvm.usub.sat.v16i16(<16 x i16>, <16 x i16>)

define <16 x i16> @test_mask_subs_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epu16_rrk_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpsubusw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd9,0xd1]
; CHECK-NEXT:    vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %sub = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
  %bc = bitcast i16 %mask to <16 x i1>
  %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> %passThru
  ret <16 x i16> %res
}

define <16 x i16> @test_mask_subs_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epu16_rrkz_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %sub = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
  %bc = bitcast i16 %mask to <16 x i1>
  %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> zeroinitializer
  ret <16 x i16> %res
}

define <16 x i16> @test_mask_subs_epu16_rm_256(<16 x i16> %a, ptr %ptr_b) {
; CHECK-LABEL: test_mask_subs_epu16_rm_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpsubusw (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <16 x i16>, ptr %ptr_b
  %sub = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
  ret <16 x i16> %sub
}

define <16 x i16> @test_mask_subs_epu16_rmk_256(<16 x i16> %a, ptr %ptr_b, <16 x i16> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epu16_rmk_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpsubusw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd9,0x0f]
; CHECK-NEXT:    vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <16 x i16>, ptr %ptr_b
  %sub = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
  %bc = bitcast i16 %mask to <16 x i1>
  %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> %passThru
  ret <16 x i16> %res
}

define <16 x i16> @test_mask_subs_epu16_rmkz_256(<16 x i16> %a, ptr %ptr_b, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epu16_rmkz_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpsubusw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <16 x i16>, ptr %ptr_b
  %sub = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a, <16 x i16> %b)
  %bc = bitcast i16 %mask to <16 x i1>
  %res = select <16 x i1> %bc, <16 x i16> %sub, <16 x i16> zeroinitializer
  ret <16 x i16> %res
}

define <16 x i8> @test_mask_adds_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test_mask_adds_epu8_rr_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %1 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
  ret <16 x i8> %1
}
declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>)

define <16 x i8> @test_mask_adds_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epu8_rrk_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpaddusb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdc,0xd1]
; CHECK-NEXT:    vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %1 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
  %2 = bitcast i16 %mask to <16 x i1>
  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
  ret <16 x i8> %3
}

define <16 x i8> @test_mask_adds_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epu8_rrkz_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdc,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %1 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
  %2 = bitcast i16 %mask to <16 x i1>
  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
  ret <16 x i8> %3
}

define <16 x i8> @test_mask_adds_epu8_rm_128(<16 x i8> %a, ptr %ptr_b) {
; CHECK-LABEL: test_mask_adds_epu8_rm_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <16 x i8>, ptr %ptr_b
  %1 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
  ret <16 x i8> %1
}

define <16 x i8> @test_mask_adds_epu8_rmk_128(<16 x i8> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epu8_rmk_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpaddusb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdc,0x0f]
; CHECK-NEXT:    vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <16 x i8>, ptr %ptr_b
  %1 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
  %2 = bitcast i16 %mask to <16 x i1>
  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
  ret <16 x i8> %3
}

define <16 x i8> @test_mask_adds_epu8_rmkz_128(<16 x i8> %a, ptr %ptr_b, i16 %mask) {
; CHECK-LABEL: test_mask_adds_epu8_rmkz_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdc,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <16 x i8>, ptr %ptr_b
  %1 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
  %2 = bitcast i16 %mask to <16 x i1>
  %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
  ret <16 x i8> %3
}

define <32 x i8> @test_mask_adds_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) {
; CHECK-LABEL: test_mask_adds_epu8_rr_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %1 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
  ret <32 x i8> %1
}
declare <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8>, <32 x i8>)

define <32 x i8> @test_mask_adds_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
; CHECK-LABEL: test_mask_adds_epu8_rrk_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpaddusb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdc,0xd1]
; CHECK-NEXT:    vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %1 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
  %2 = bitcast i32 %mask to <32 x i1>
  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
  ret <32 x i8> %3
}

define <32 x i8> @test_mask_adds_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
; CHECK-LABEL: test_mask_adds_epu8_rrkz_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %1 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
  %2 = bitcast i32 %mask to <32 x i1>
  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
  ret <32 x i8> %3
}

define <32 x i8> @test_mask_adds_epu8_rm_256(<32 x i8> %a, ptr %ptr_b) {
; CHECK-LABEL: test_mask_adds_epu8_rm_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpaddusb (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <32 x i8>, ptr %ptr_b
  %1 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
  ret <32 x i8> %1
}

define <32 x i8> @test_mask_adds_epu8_rmk_256(<32 x i8> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) {
; CHECK-LABEL: test_mask_adds_epu8_rmk_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpaddusb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdc,0x0f]
; CHECK-NEXT:    vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <32 x i8>, ptr %ptr_b
  %1 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
  %2 = bitcast i32 %mask to <32 x i1>
  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
  ret <32 x i8> %3
}

define <32 x i8> @test_mask_adds_epu8_rmkz_256(<32 x i8> %a, ptr %ptr_b, i32 %mask) {
; CHECK-LABEL: test_mask_adds_epu8_rmkz_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpaddusb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <32 x i8>, ptr %ptr_b
  %1 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
  %2 = bitcast i32 %mask to <32 x i1>
  %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
  ret <32 x i8> %3
}

define <16 x i8> @test_mask_subs_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test_mask_subs_epu8_rr_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %sub = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
  ret <16 x i8> %sub
}
declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>)

define <16 x i8> @test_mask_subs_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epu8_rrk_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpsubusb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd8,0xd1]
; CHECK-NEXT:    vmovdqa %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %sub = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
  %bc = bitcast i16 %mask to <16 x i1>
  %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> %passThru
  ret <16 x i8> %res
}

define <16 x i8> @test_mask_subs_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epu8_rrkz_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd8,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %sub = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
  %bc = bitcast i16 %mask to <16 x i1>
  %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> zeroinitializer
  ret <16 x i8> %res
}

define <16 x i8> @test_mask_subs_epu8_rm_128(<16 x i8> %a, ptr %ptr_b) {
; CHECK-LABEL: test_mask_subs_epu8_rm_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <16 x i8>, ptr %ptr_b
  %sub = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
  ret <16 x i8> %sub
}

define <16 x i8> @test_mask_subs_epu8_rmk_128(<16 x i8> %a, ptr %ptr_b, <16 x i8> %passThru, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epu8_rmk_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpsubusb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd8,0x0f]
; CHECK-NEXT:    vmovdqa %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <16 x i8>, ptr %ptr_b
  %sub = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
  %bc = bitcast i16 %mask to <16 x i1>
  %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> %passThru
  ret <16 x i8> %res
}

define <16 x i8> @test_mask_subs_epu8_rmkz_128(<16 x i8> %a, ptr %ptr_b, i16 %mask) {
; CHECK-LABEL: test_mask_subs_epu8_rmkz_128:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd8,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <16 x i8>, ptr %ptr_b
  %sub = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
  %bc = bitcast i16 %mask to <16 x i1>
  %res = select <16 x i1> %bc, <16 x i8> %sub, <16 x i8> zeroinitializer
  ret <16 x i8> %res
}

define <32 x i8> @test_mask_subs_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) {
; CHECK-LABEL: test_mask_subs_epu8_rr_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %sub = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
  ret <32 x i8> %sub
}
declare <32 x i8> @llvm.usub.sat.v32i8(<32 x i8>, <32 x i8>)

define <32 x i8> @test_mask_subs_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
; CHECK-LABEL: test_mask_subs_epu8_rrk_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpsubusb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd8,0xd1]
; CHECK-NEXT:    vmovdqa %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %sub = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
  %bc = bitcast i32 %mask to <32 x i1>
  %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> %passThru
  ret <32 x i8> %res
}

define <32 x i8> @test_mask_subs_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
; CHECK-LABEL: test_mask_subs_epu8_rrkz_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
; CHECK-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %sub = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
  %bc = bitcast i32 %mask to <32 x i1>
  %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> zeroinitializer
  ret <32 x i8> %res
}

define <32 x i8> @test_mask_subs_epu8_rm_256(<32 x i8> %a, ptr %ptr_b) {
; CHECK-LABEL: test_mask_subs_epu8_rm_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpsubusb (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <32 x i8>, ptr %ptr_b
  %sub = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
  ret <32 x i8> %sub
}

define <32 x i8> @test_mask_subs_epu8_rmk_256(<32 x i8> %a, ptr %ptr_b, <32 x i8> %passThru, i32 %mask) {
; CHECK-LABEL: test_mask_subs_epu8_rmk_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpsubusb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd8,0x0f]
; CHECK-NEXT:    vmovdqa %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <32 x i8>, ptr %ptr_b
  %sub = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
  %bc = bitcast i32 %mask to <32 x i1>
  %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> %passThru
  ret <32 x i8> %res
}

define <32 x i8> @test_mask_subs_epu8_rmkz_256(<32 x i8> %a, ptr %ptr_b, i32 %mask) {
; CHECK-LABEL: test_mask_subs_epu8_rmkz_256:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT:    vpsubusb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0x07]
; CHECK-NEXT:    retq ## encoding: [0xc3]
  %b = load <32 x i8>, ptr %ptr_b
  %sub = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a, <32 x i8> %b)
  %bc = bitcast i32 %mask to <32 x i1>
  %res = select <32 x i1> %bc, <32 x i8> %sub, <32 x i8> zeroinitializer
  ret <32 x i8> %res
}