llvm/llvm/test/CodeGen/X86/avx10_2_512ni-intrinsics.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx10.2-512 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-512 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64

; VNNI FP16

define <16 x float> @test_mm512_dpph_ps(<16 x float> %__W, <32 x half> %__A, <32 x half> %__B) {
; CHECK-LABEL: test_mm512_dpph_ps:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vdpphps %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x74,0x48,0x52,0xc2]
; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
  %res = tail call <16 x float> @llvm.x86.avx10.vdpphps.512(<16 x float> %__W, <32 x half> %__A, <32 x half> %__B)
  ret <16 x float> %res
}

define <16 x float> @test_mm512_mask_dpph_ps(<16 x float> %__W, i16 zeroext %__U, <32 x half> %__A, <32 x half> %__B) {
; X86-LABEL: test_mm512_mask_dpph_ps:
; X86:       # %bb.0:
; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT:    vdpphps %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x74,0x49,0x52,0xc2]
; X86-NEXT:    retl # encoding: [0xc3]
;
; X64-LABEL: test_mm512_mask_dpph_ps:
; X64:       # %bb.0:
; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT:    vdpphps %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x74,0x49,0x52,0xc2]
; X64-NEXT:    retq # encoding: [0xc3]
  %dph = tail call <16 x float> @llvm.x86.avx10.vdpphps.512(<16 x float> %__W, <32 x half> %__A, <32 x half> %__B)
  %bst = bitcast i16 %__U to <16 x i1>
  %res = select <16 x i1> %bst, <16 x float> %dph, <16 x float> %__W
  ret <16 x float> %res
}

define <16 x float> @test_mm512_maskz_dpph_ps(i16 zeroext %__U, <16 x float> %__W, <32 x half> %__A, <32 x half> %__B) {
; X86-LABEL: test_mm512_maskz_dpph_ps:
; X86:       # %bb.0:
; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT:    vdpphps %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x74,0xc9,0x52,0xc2]
; X86-NEXT:    retl # encoding: [0xc3]
;
; X64-LABEL: test_mm512_maskz_dpph_ps:
; X64:       # %bb.0:
; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT:    vdpphps %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x74,0xc9,0x52,0xc2]
; X64-NEXT:    retq # encoding: [0xc3]
  %dph = tail call <16 x float> @llvm.x86.avx10.vdpphps.512(<16 x float> %__W, <32 x half> %__A, <32 x half> %__B)
  %bst = bitcast i16 %__U to <16 x i1>
  %res = select <16 x i1> %bst, <16 x float> %dph, <16 x float> zeroinitializer
  ret <16 x float> %res
}

declare <16 x float> @llvm.x86.avx10.vdpphps.512(<16 x float>, <32 x half>, <32 x half>)

; VNNI INT8

define <16 x i32> @test_mm512_dpbssd_epi32(<16 x i32> %__W, <16 x i32> %__A, ptr %pB) {
; X86-LABEL: test_mm512_dpbssd_epi32:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT:    vpdpbssd (%eax), %zmm1, %zmm0 # encoding: [0x62,0xf2,0x77,0x48,0x50,0x00]
; X86-NEXT:    retl # encoding: [0xc3]
;
; X64-LABEL: test_mm512_dpbssd_epi32:
; X64:       # %bb.0:
; X64-NEXT:    vpdpbssd (%rdi), %zmm1, %zmm0 # encoding: [0x62,0xf2,0x77,0x48,0x50,0x07]
; X64-NEXT:    retq # encoding: [0xc3]
  %__B = load <16 x i32>, ptr %pB
  %res = tail call <16 x i32> @llvm.x86.avx10.vpdpbssd.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B)
  ret <16 x i32> %res
}

define <16 x i32> @test_mm512_mask_dpbssds_epi32(<16 x i32> %__W, i16 zeroext %__U, <16 x i32> %__A, <16 x i32> %__B) {
; X86-LABEL: test_mm512_mask_dpbssds_epi32:
; X86:       # %bb.0:
; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT:    vpdpbssds %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x77,0x49,0x51,0xc2]
; X86-NEXT:    retl # encoding: [0xc3]
;
; X64-LABEL: test_mm512_mask_dpbssds_epi32:
; X64:       # %bb.0:
; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT:    vpdpbssds %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x77,0x49,0x51,0xc2]
; X64-NEXT:    retq # encoding: [0xc3]
  %dpi = tail call <16 x i32> @llvm.x86.avx10.vpdpbssds.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B)
  %bst = bitcast i16 %__U to <16 x i1>
  %res = select <16 x i1> %bst, <16 x i32> %dpi, <16 x i32> %__W
  ret <16 x i32> %res
}

define <16 x i32> @test_mm512_maskz_dpbssd_epi32(i16 zeroext %__U, <16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) {
; X86-LABEL: test_mm512_maskz_dpbssd_epi32:
; X86:       # %bb.0:
; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT:    vpdpbssd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x77,0xc9,0x50,0xc2]
; X86-NEXT:    retl # encoding: [0xc3]
;
; X64-LABEL: test_mm512_maskz_dpbssd_epi32:
; X64:       # %bb.0:
; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT:    vpdpbssd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x77,0xc9,0x50,0xc2]
; X64-NEXT:    retq # encoding: [0xc3]
  %dpi = tail call <16 x i32> @llvm.x86.avx10.vpdpbssd.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B)
  %bst = bitcast i16 %__U to <16 x i1>
  %res = select <16 x i1> %bst, <16 x i32> %dpi, <16 x i32> zeroinitializer
  ret <16 x i32> %res
}

declare <16 x i32> @llvm.x86.avx10.vpdpbssd.512(<16 x i32>, <16 x i32>, <16 x i32>)
declare <16 x i32> @llvm.x86.avx10.vpdpbssds.512(<16 x i32>, <16 x i32>, <16 x i32>)

define <16 x i32> @test_mm512_dpbsud_epi32(<16 x i32> %__W, <16 x i32> %__A, ptr %pB) {
; X86-LABEL: test_mm512_dpbsud_epi32:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT:    vpdpbsud (%eax), %zmm1, %zmm0 # encoding: [0x62,0xf2,0x76,0x48,0x50,0x00]
; X86-NEXT:    retl # encoding: [0xc3]
;
; X64-LABEL: test_mm512_dpbsud_epi32:
; X64:       # %bb.0:
; X64-NEXT:    vpdpbsud (%rdi), %zmm1, %zmm0 # encoding: [0x62,0xf2,0x76,0x48,0x50,0x07]
; X64-NEXT:    retq # encoding: [0xc3]
  %__B = load <16 x i32>, ptr %pB
  %res = tail call <16 x i32> @llvm.x86.avx10.vpdpbsud.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B)
  ret <16 x i32> %res
}

define <16 x i32> @test_mm512_mask_dpbsuds_epi32(<16 x i32> %__W, i16 zeroext %__U, <16 x i32> %__A, <16 x i32> %__B) {
; X86-LABEL: test_mm512_mask_dpbsuds_epi32:
; X86:       # %bb.0:
; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT:    vpdpbsuds %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x76,0x49,0x51,0xc2]
; X86-NEXT:    retl # encoding: [0xc3]
;
; X64-LABEL: test_mm512_mask_dpbsuds_epi32:
; X64:       # %bb.0:
; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT:    vpdpbsuds %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x76,0x49,0x51,0xc2]
; X64-NEXT:    retq # encoding: [0xc3]
  %dpi = tail call <16 x i32> @llvm.x86.avx10.vpdpbsuds.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B)
  %bst = bitcast i16 %__U to <16 x i1>
  %res = select <16 x i1> %bst, <16 x i32> %dpi, <16 x i32> %__W
  ret <16 x i32> %res
}

define <16 x i32> @test_mm512_maskz_dpbsud_epi32(i16 zeroext %__U, <16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) {
; X86-LABEL: test_mm512_maskz_dpbsud_epi32:
; X86:       # %bb.0:
; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT:    vpdpbsud %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x76,0xc9,0x50,0xc2]
; X86-NEXT:    retl # encoding: [0xc3]
;
; X64-LABEL: test_mm512_maskz_dpbsud_epi32:
; X64:       # %bb.0:
; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT:    vpdpbsud %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x76,0xc9,0x50,0xc2]
; X64-NEXT:    retq # encoding: [0xc3]
  %dpi = tail call <16 x i32> @llvm.x86.avx10.vpdpbsud.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B)
  %bst = bitcast i16 %__U to <16 x i1>
  %res = select <16 x i1> %bst, <16 x i32> %dpi, <16 x i32> zeroinitializer
  ret <16 x i32> %res
}

declare <16 x i32> @llvm.x86.avx10.vpdpbsud.512(<16 x i32>, <16 x i32>, <16 x i32>)
declare <16 x i32> @llvm.x86.avx10.vpdpbsuds.512(<16 x i32>, <16 x i32>, <16 x i32>)

define <16 x i32> @test_mm512_dpbuud_epi32(<16 x i32> %__W, <16 x i32> %__A, ptr %pB) {
; X86-LABEL: test_mm512_dpbuud_epi32:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT:    vpdpbuud (%eax), %zmm1, %zmm0 # encoding: [0x62,0xf2,0x74,0x48,0x50,0x00]
; X86-NEXT:    retl # encoding: [0xc3]
;
; X64-LABEL: test_mm512_dpbuud_epi32:
; X64:       # %bb.0:
; X64-NEXT:    vpdpbuud (%rdi), %zmm1, %zmm0 # encoding: [0x62,0xf2,0x74,0x48,0x50,0x07]
; X64-NEXT:    retq # encoding: [0xc3]
  %__B = load <16 x i32>, ptr %pB
  %res = tail call <16 x i32> @llvm.x86.avx10.vpdpbuud.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B)
  ret <16 x i32> %res
}

define <16 x i32> @test_mm512_mask_dpbuuds_epi32(<16 x i32> %__W, i16 zeroext %__U, <16 x i32> %__A, <16 x i32> %__B) {
; X86-LABEL: test_mm512_mask_dpbuuds_epi32:
; X86:       # %bb.0:
; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT:    vpdpbuuds %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x74,0x49,0x51,0xc2]
; X86-NEXT:    retl # encoding: [0xc3]
;
; X64-LABEL: test_mm512_mask_dpbuuds_epi32:
; X64:       # %bb.0:
; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT:    vpdpbuuds %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x74,0x49,0x51,0xc2]
; X64-NEXT:    retq # encoding: [0xc3]
  %dpi = tail call <16 x i32> @llvm.x86.avx10.vpdpbuuds.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B)
  %bst = bitcast i16 %__U to <16 x i1>
  %res = select <16 x i1> %bst, <16 x i32> %dpi, <16 x i32> %__W
  ret <16 x i32> %res
}

define <16 x i32> @test_mm512_maskz_dpbuud_epi32(i16 zeroext %__U, <16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) {
; X86-LABEL: test_mm512_maskz_dpbuud_epi32:
; X86:       # %bb.0:
; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT:    vpdpbuud %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x74,0xc9,0x50,0xc2]
; X86-NEXT:    retl # encoding: [0xc3]
;
; X64-LABEL: test_mm512_maskz_dpbuud_epi32:
; X64:       # %bb.0:
; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT:    vpdpbuud %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x74,0xc9,0x50,0xc2]
; X64-NEXT:    retq # encoding: [0xc3]
  %dpi = tail call <16 x i32> @llvm.x86.avx10.vpdpbuud.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B)
  %bst = bitcast i16 %__U to <16 x i1>
  %res = select <16 x i1> %bst, <16 x i32> %dpi, <16 x i32> zeroinitializer
  ret <16 x i32> %res
}

declare <16 x i32> @llvm.x86.avx10.vpdpbuud.512(<16 x i32>, <16 x i32>, <16 x i32>)
declare <16 x i32> @llvm.x86.avx10.vpdpbuuds.512(<16 x i32>, <16 x i32>, <16 x i32>)

; VNNI INT16

define <16 x i32> @test_mm512_dpwsud_epi32(<16 x i32> %__W, <16 x i32> %__A, ptr %pB) {
; X86-LABEL: test_mm512_dpwsud_epi32:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT:    vpdpwsud (%eax), %zmm1, %zmm0 # encoding: [0x62,0xf2,0x76,0x48,0xd2,0x00]
; X86-NEXT:    retl # encoding: [0xc3]
;
; X64-LABEL: test_mm512_dpwsud_epi32:
; X64:       # %bb.0:
; X64-NEXT:    vpdpwsud (%rdi), %zmm1, %zmm0 # encoding: [0x62,0xf2,0x76,0x48,0xd2,0x07]
; X64-NEXT:    retq # encoding: [0xc3]
  %__B = load <16 x i32>, ptr %pB
  %res = tail call <16 x i32> @llvm.x86.avx10.vpdpwsud.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B)
  ret <16 x i32> %res
}

define <16 x i32> @test_mm512_mask_dpwsuds_epi32(<16 x i32> %__W, i16 zeroext %__U, <16 x i32> %__A, <16 x i32> %__B) {
; X86-LABEL: test_mm512_mask_dpwsuds_epi32:
; X86:       # %bb.0:
; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT:    vpdpwsuds %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x76,0x49,0xd3,0xc2]
; X86-NEXT:    retl # encoding: [0xc3]
;
; X64-LABEL: test_mm512_mask_dpwsuds_epi32:
; X64:       # %bb.0:
; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT:    vpdpwsuds %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x76,0x49,0xd3,0xc2]
; X64-NEXT:    retq # encoding: [0xc3]
  %dpi = tail call <16 x i32> @llvm.x86.avx10.vpdpwsuds.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B)
  %bst = bitcast i16 %__U to <16 x i1>
  %res = select <16 x i1> %bst, <16 x i32> %dpi, <16 x i32> %__W
  ret <16 x i32> %res
}

define <16 x i32> @test_mm512_maskz_dpwsud_epi32(i16 zeroext %__U, <16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) {
; X86-LABEL: test_mm512_maskz_dpwsud_epi32:
; X86:       # %bb.0:
; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT:    vpdpwsud %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x76,0xc9,0xd2,0xc2]
; X86-NEXT:    retl # encoding: [0xc3]
;
; X64-LABEL: test_mm512_maskz_dpwsud_epi32:
; X64:       # %bb.0:
; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT:    vpdpwsud %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x76,0xc9,0xd2,0xc2]
; X64-NEXT:    retq # encoding: [0xc3]
  %dpi = tail call <16 x i32> @llvm.x86.avx10.vpdpwsud.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B)
  %bst = bitcast i16 %__U to <16 x i1>
  %res = select <16 x i1> %bst, <16 x i32> %dpi, <16 x i32> zeroinitializer
  ret <16 x i32> %res
}

declare <16 x i32> @llvm.x86.avx10.vpdpwsud.512(<16 x i32>, <16 x i32>, <16 x i32>)
declare <16 x i32> @llvm.x86.avx10.vpdpwsuds.512(<16 x i32>, <16 x i32>, <16 x i32>)

define <16 x i32> @test_mm512_dpwusd_epi32(<16 x i32> %__W, <16 x i32> %__A, ptr %pB) {
; X86-LABEL: test_mm512_dpwusd_epi32:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT:    vpdpwusd (%eax), %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0xd2,0x00]
; X86-NEXT:    retl # encoding: [0xc3]
;
; X64-LABEL: test_mm512_dpwusd_epi32:
; X64:       # %bb.0:
; X64-NEXT:    vpdpwusd (%rdi), %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0xd2,0x07]
; X64-NEXT:    retq # encoding: [0xc3]
  %__B = load <16 x i32>, ptr %pB
  %res = tail call <16 x i32> @llvm.x86.avx10.vpdpwusd.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B)
  ret <16 x i32> %res
}

define <16 x i32> @test_mm512_mask_dpwusds_epi32(<16 x i32> %__W, i16 zeroext %__U, <16 x i32> %__A, <16 x i32> %__B) {
; X86-LABEL: test_mm512_mask_dpwusds_epi32:
; X86:       # %bb.0:
; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT:    vpdpwusds %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0xd3,0xc2]
; X86-NEXT:    retl # encoding: [0xc3]
;
; X64-LABEL: test_mm512_mask_dpwusds_epi32:
; X64:       # %bb.0:
; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT:    vpdpwusds %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0xd3,0xc2]
; X64-NEXT:    retq # encoding: [0xc3]
  %dpi = tail call <16 x i32> @llvm.x86.avx10.vpdpwusds.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B)
  %bst = bitcast i16 %__U to <16 x i1>
  %res = select <16 x i1> %bst, <16 x i32> %dpi, <16 x i32> %__W
  ret <16 x i32> %res
}

define <16 x i32> @test_mm512_maskz_dpwusd_epi32(i16 zeroext %__U, <16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) {
; X86-LABEL: test_mm512_maskz_dpwusd_epi32:
; X86:       # %bb.0:
; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT:    vpdpwusd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0xd2,0xc2]
; X86-NEXT:    retl # encoding: [0xc3]
;
; X64-LABEL: test_mm512_maskz_dpwusd_epi32:
; X64:       # %bb.0:
; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT:    vpdpwusd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0xd2,0xc2]
; X64-NEXT:    retq # encoding: [0xc3]
  %dpi = tail call <16 x i32> @llvm.x86.avx10.vpdpwusd.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B)
  %bst = bitcast i16 %__U to <16 x i1>
  %res = select <16 x i1> %bst, <16 x i32> %dpi, <16 x i32> zeroinitializer
  ret <16 x i32> %res
}

declare <16 x i32> @llvm.x86.avx10.vpdpwusd.512(<16 x i32>, <16 x i32>, <16 x i32>)
declare <16 x i32> @llvm.x86.avx10.vpdpwusds.512(<16 x i32>, <16 x i32>, <16 x i32>)

define <16 x i32> @test_mm512_dpwuud_epi32(<16 x i32> %__W, <16 x i32> %__A, ptr %pB) {
; X86-LABEL: test_mm512_dpwuud_epi32:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT:    vpdpwuud (%eax), %zmm1, %zmm0 # encoding: [0x62,0xf2,0x74,0x48,0xd2,0x00]
; X86-NEXT:    retl # encoding: [0xc3]
;
; X64-LABEL: test_mm512_dpwuud_epi32:
; X64:       # %bb.0:
; X64-NEXT:    vpdpwuud (%rdi), %zmm1, %zmm0 # encoding: [0x62,0xf2,0x74,0x48,0xd2,0x07]
; X64-NEXT:    retq # encoding: [0xc3]
  %__B = load <16 x i32>, ptr %pB
  %res = tail call <16 x i32> @llvm.x86.avx10.vpdpwuud.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B)
  ret <16 x i32> %res
}

define <16 x i32> @test_mm512_mask_dpwuuds_epi32(<16 x i32> %__W, i16 zeroext %__U, <16 x i32> %__A, <16 x i32> %__B) {
; X86-LABEL: test_mm512_mask_dpwuuds_epi32:
; X86:       # %bb.0:
; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT:    vpdpwuuds %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x74,0x49,0xd3,0xc2]
; X86-NEXT:    retl # encoding: [0xc3]
;
; X64-LABEL: test_mm512_mask_dpwuuds_epi32:
; X64:       # %bb.0:
; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT:    vpdpwuuds %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x74,0x49,0xd3,0xc2]
; X64-NEXT:    retq # encoding: [0xc3]
  %dpi = tail call <16 x i32> @llvm.x86.avx10.vpdpwuuds.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B)
  %bst = bitcast i16 %__U to <16 x i1>
  %res = select <16 x i1> %bst, <16 x i32> %dpi, <16 x i32> %__W
  ret <16 x i32> %res
}

define <16 x i32> @test_mm512_maskz_dpwuud_epi32(i16 zeroext %__U, <16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B) {
; X86-LABEL: test_mm512_maskz_dpwuud_epi32:
; X86:       # %bb.0:
; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT:    vpdpwuud %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x74,0xc9,0xd2,0xc2]
; X86-NEXT:    retl # encoding: [0xc3]
;
; X64-LABEL: test_mm512_maskz_dpwuud_epi32:
; X64:       # %bb.0:
; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT:    vpdpwuud %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x74,0xc9,0xd2,0xc2]
; X64-NEXT:    retq # encoding: [0xc3]
  %dpi = tail call <16 x i32> @llvm.x86.avx10.vpdpwuud.512(<16 x i32> %__W, <16 x i32> %__A, <16 x i32> %__B)
  %bst = bitcast i16 %__U to <16 x i1>
  %res = select <16 x i1> %bst, <16 x i32> %dpi, <16 x i32> zeroinitializer
  ret <16 x i32> %res
}

declare <16 x i32> @llvm.x86.avx10.vpdpwuud.512(<16 x i32>, <16 x i32>, <16 x i32>)
declare <16 x i32> @llvm.x86.avx10.vpdpwuuds.512(<16 x i32>, <16 x i32>, <16 x i32>)

; VMPSADBW

define { <32 x i16>, <32 x i16>, <32 x i16> } @test_mm512_mask_mpsadbw(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) {
; X86-LABEL: test_mm512_mask_mpsadbw:
; X86:       # %bb.0:
; X86-NEXT:    vmovdqa64 %zmm2, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe2]
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT:    vmpsadbw $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7e,0x48,0x42,0xd9,0x02]
; X86-NEXT:    vmpsadbw $3, %zmm1, %zmm0, %zmm4 {%k1} # encoding: [0x62,0xf3,0x7e,0x49,0x42,0xe1,0x03]
; X86-NEXT:    vmpsadbw $4, %zmm1, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7e,0xc9,0x42,0xd1,0x04]
; X86-NEXT:    vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
; X86-NEXT:    vmovdqa64 %zmm4, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcc]
; X86-NEXT:    retl # encoding: [0xc3]
;
; X64-LABEL: test_mm512_mask_mpsadbw:
; X64:       # %bb.0:
; X64-NEXT:    vmovdqa64 %zmm2, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe2]
; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT:    vmpsadbw $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7e,0x48,0x42,0xd9,0x02]
; X64-NEXT:    vmpsadbw $3, %zmm1, %zmm0, %zmm4 {%k1} # encoding: [0x62,0xf3,0x7e,0x49,0x42,0xe1,0x03]
; X64-NEXT:    vmpsadbw $4, %zmm1, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf3,0x7e,0xc9,0x42,0xd1,0x04]
; X64-NEXT:    vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
; X64-NEXT:    vmovdqa64 %zmm4, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcc]
; X64-NEXT:    retq # encoding: [0xc3]
  %msk = bitcast i32 %x4 to <32 x i1>
  %rs1 = call <32 x i16> @llvm.x86.avx10.vmpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i8 2)
  %ad2 = call <32 x i16> @llvm.x86.avx10.vmpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i8 3)
  %rs2 = select <32 x i1> %msk, <32 x i16> %ad2, <32 x i16> %x3
  %ad3 = call <32 x i16> @llvm.x86.avx10.vmpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i8 4)
  %rs3 = select <32 x i1> %msk, <32 x i16> %ad3, <32 x i16> zeroinitializer
  %rs4 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } undef, <32 x i16> %rs1, 0
  %rs5 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %rs4, <32 x i16> %rs2, 1
  %rs6 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %rs5, <32 x i16> %rs3, 2
  ret { <32 x i16>, <32 x i16>, <32 x i16> } %rs6
}

declare <32 x i16> @llvm.x86.avx10.vmpsadbw.512(<64 x i8>, <64 x i8>, i8)