llvm/llvm/test/CodeGen/X86/popcnt.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86,X86-NOSSE
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64,X64-BASE
; RUN: llc < %s -mtriple=i686-unknown -mattr=+popcnt | FileCheck %s --check-prefixes=X86-POPCNT
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+popcnt | FileCheck %s --check-prefixes=X64-POPCNT
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd | FileCheck %s --check-prefixes=X64,X64-NDD
; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2
; RUN: llc < %s -mtriple=i686-unknown -mattr=ssse3 | FileCheck %s --check-prefixes=X86,X86-SSSE3

define i8 @cnt8(i8 %x) nounwind readnone {
; X86-LABEL: cnt8:
; X86:       # %bb.0:
; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    imull $134480385, %eax, %eax # imm = 0x8040201
; X86-NEXT:    shrl $3, %eax
; X86-NEXT:    andl $286331153, %eax # imm = 0x11111111
; X86-NEXT:    imull $286331153, %eax, %eax # imm = 0x11111111
; X86-NEXT:    shrl $28, %eax
; X86-NEXT:    # kill: def $al killed $al killed $eax
; X86-NEXT:    retl
;
; X64-LABEL: cnt8:
; X64:       # %bb.0:
; X64-NEXT:    movzbl %dil, %eax
; X64-NEXT:    imull $134480385, %eax, %eax # imm = 0x8040201
; X64-NEXT:    shrl $3, %eax
; X64-NEXT:    andl $286331153, %eax # imm = 0x11111111
; X64-NEXT:    imull $286331153, %eax, %eax # imm = 0x11111111
; X64-NEXT:    shrl $28, %eax
; X64-NEXT:    # kill: def $al killed $al killed $eax
; X64-NEXT:    retq
;
; X86-POPCNT-LABEL: cnt8:
; X86-POPCNT:       # %bb.0:
; X86-POPCNT-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; X86-POPCNT-NEXT:    popcntl %eax, %eax
; X86-POPCNT-NEXT:    # kill: def $al killed $al killed $eax
; X86-POPCNT-NEXT:    retl
;
; X64-POPCNT-LABEL: cnt8:
; X64-POPCNT:       # %bb.0:
; X64-POPCNT-NEXT:    movzbl %dil, %eax
; X64-POPCNT-NEXT:    popcntl %eax, %eax
; X64-POPCNT-NEXT:    # kill: def $al killed $al killed $eax
; X64-POPCNT-NEXT:    retq
  %cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
  ret i8 %cnt
}

define i16 @cnt16(i16 %x) nounwind readnone {
; X86-LABEL: cnt16:
; X86:       # %bb.0:
; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl %eax, %ecx
; X86-NEXT:    shrl %ecx
; X86-NEXT:    andl $21845, %ecx # imm = 0x5555
; X86-NEXT:    subl %ecx, %eax
; X86-NEXT:    movl %eax, %ecx
; X86-NEXT:    andl $13107, %ecx # imm = 0x3333
; X86-NEXT:    shrl $2, %eax
; X86-NEXT:    andl $13107, %eax # imm = 0x3333
; X86-NEXT:    addl %ecx, %eax
; X86-NEXT:    movl %eax, %ecx
; X86-NEXT:    shrl $4, %ecx
; X86-NEXT:    addl %eax, %ecx
; X86-NEXT:    andl $3855, %ecx # imm = 0xF0F
; X86-NEXT:    movl %ecx, %eax
; X86-NEXT:    shrl $8, %eax
; X86-NEXT:    addl %ecx, %eax
; X86-NEXT:    movzbl %al, %eax
; X86-NEXT:    # kill: def $ax killed $ax killed $eax
; X86-NEXT:    retl
;
; X64-BASE-LABEL: cnt16:
; X64-BASE:       # %bb.0:
; X64-BASE-NEXT:    movl %edi, %eax
; X64-BASE-NEXT:    shrl %eax
; X64-BASE-NEXT:    andl $21845, %eax # imm = 0x5555
; X64-BASE-NEXT:    subl %eax, %edi
; X64-BASE-NEXT:    movl %edi, %eax
; X64-BASE-NEXT:    andl $13107, %eax # imm = 0x3333
; X64-BASE-NEXT:    shrl $2, %edi
; X64-BASE-NEXT:    andl $13107, %edi # imm = 0x3333
; X64-BASE-NEXT:    addl %eax, %edi
; X64-BASE-NEXT:    movl %edi, %eax
; X64-BASE-NEXT:    shrl $4, %eax
; X64-BASE-NEXT:    addl %edi, %eax
; X64-BASE-NEXT:    andl $3855, %eax # imm = 0xF0F
; X64-BASE-NEXT:    movl %eax, %ecx
; X64-BASE-NEXT:    shrl $8, %ecx
; X64-BASE-NEXT:    addl %eax, %ecx
; X64-BASE-NEXT:    movzbl %cl, %eax
; X64-BASE-NEXT:    # kill: def $ax killed $ax killed $eax
; X64-BASE-NEXT:    retq
;
; X86-POPCNT-LABEL: cnt16:
; X86-POPCNT:       # %bb.0:
; X86-POPCNT-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
; X86-POPCNT-NEXT:    popcntl %eax, %eax
; X86-POPCNT-NEXT:    # kill: def $ax killed $ax killed $eax
; X86-POPCNT-NEXT:    retl
;
; X64-POPCNT-LABEL: cnt16:
; X64-POPCNT:       # %bb.0:
; X64-POPCNT-NEXT:    movzwl %di, %eax
; X64-POPCNT-NEXT:    popcntl %eax, %eax
; X64-POPCNT-NEXT:    # kill: def $ax killed $ax killed $eax
; X64-POPCNT-NEXT:    retq
;
; X64-NDD-LABEL: cnt16:
; X64-NDD:       # %bb.0:
; X64-NDD-NEXT:    shrw %di, %ax
; X64-NDD-NEXT:    andw $21845, %ax # imm = 0x5555
; X64-NDD-NEXT:    subw %ax, %di, %ax
; X64-NDD-NEXT:    andw $13107, %ax, %cx # imm = 0x3333
; X64-NDD-NEXT:    shrw $2, %ax
; X64-NDD-NEXT:    andw $13107, %ax # imm = 0x3333
; X64-NDD-NEXT:    addw %cx, %ax
; X64-NDD-NEXT:    shrw $4, %ax, %cx
; X64-NDD-NEXT:    addw %cx, %ax
; X64-NDD-NEXT:    andw $3855, %ax # imm = 0xF0F
; X64-NDD-NEXT:    movzbl %ah, %ecx
; X64-NDD-NEXT:    addw %cx, %ax
; X64-NDD-NEXT:    movzbl %al, %eax
; X64-NDD-NEXT:    # kill: def $ax killed $ax killed $eax
; X64-NDD-NEXT:    retq
  %cnt = tail call i16 @llvm.ctpop.i16(i16 %x)
  ret i16 %cnt
}

define i32 @cnt32(i32 %x) nounwind readnone {
; X86-LABEL: cnt32:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl %eax, %ecx
; X86-NEXT:    shrl %ecx
; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT:    subl %ecx, %eax
; X86-NEXT:    movl %eax, %ecx
; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT:    shrl $2, %eax
; X86-NEXT:    andl $858993459, %eax # imm = 0x33333333
; X86-NEXT:    addl %ecx, %eax
; X86-NEXT:    movl %eax, %ecx
; X86-NEXT:    shrl $4, %ecx
; X86-NEXT:    addl %eax, %ecx
; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT:    imull $16843009, %ecx, %eax # imm = 0x1010101
; X86-NEXT:    shrl $24, %eax
; X86-NEXT:    retl
;
; X64-BASE-LABEL: cnt32:
; X64-BASE:       # %bb.0:
; X64-BASE-NEXT:    movl %edi, %eax
; X64-BASE-NEXT:    shrl %eax
; X64-BASE-NEXT:    andl $1431655765, %eax # imm = 0x55555555
; X64-BASE-NEXT:    subl %eax, %edi
; X64-BASE-NEXT:    movl %edi, %eax
; X64-BASE-NEXT:    andl $858993459, %eax # imm = 0x33333333
; X64-BASE-NEXT:    shrl $2, %edi
; X64-BASE-NEXT:    andl $858993459, %edi # imm = 0x33333333
; X64-BASE-NEXT:    addl %eax, %edi
; X64-BASE-NEXT:    movl %edi, %eax
; X64-BASE-NEXT:    shrl $4, %eax
; X64-BASE-NEXT:    addl %edi, %eax
; X64-BASE-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
; X64-BASE-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
; X64-BASE-NEXT:    shrl $24, %eax
; X64-BASE-NEXT:    retq
;
; X86-POPCNT-LABEL: cnt32:
; X86-POPCNT:       # %bb.0:
; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %eax
; X86-POPCNT-NEXT:    retl
;
; X64-POPCNT-LABEL: cnt32:
; X64-POPCNT:       # %bb.0:
; X64-POPCNT-NEXT:    popcntl %edi, %eax
; X64-POPCNT-NEXT:    retq
;
; X64-NDD-LABEL: cnt32:
; X64-NDD:       # %bb.0:
; X64-NDD-NEXT:    shrl %edi, %eax
; X64-NDD-NEXT:    andl $1431655765, %eax # imm = 0x55555555
; X64-NDD-NEXT:    subl %eax, %edi
; X64-NDD-NEXT:    andl $858993459, %edi, %eax # imm = 0x33333333
; X64-NDD-NEXT:    shrl $2, %edi
; X64-NDD-NEXT:    andl $858993459, %edi # imm = 0x33333333
; X64-NDD-NEXT:    addl %edi, %eax
; X64-NDD-NEXT:    shrl $4, %eax, %ecx
; X64-NDD-NEXT:    addl %ecx, %eax
; X64-NDD-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
; X64-NDD-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
; X64-NDD-NEXT:    shrl $24, %eax
; X64-NDD-NEXT:    retq
  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
  ret i32 %cnt
}

define i64 @cnt64(i64 %x) nounwind readnone {
; X86-NOSSE-LABEL: cnt64:
; X86-NOSSE:       # %bb.0:
; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT:    movl %ecx, %edx
; X86-NOSSE-NEXT:    shrl %edx
; X86-NOSSE-NEXT:    andl $1431655765, %edx # imm = 0x55555555
; X86-NOSSE-NEXT:    subl %edx, %ecx
; X86-NOSSE-NEXT:    movl %ecx, %edx
; X86-NOSSE-NEXT:    andl $858993459, %edx # imm = 0x33333333
; X86-NOSSE-NEXT:    shrl $2, %ecx
; X86-NOSSE-NEXT:    andl $858993459, %ecx # imm = 0x33333333
; X86-NOSSE-NEXT:    addl %edx, %ecx
; X86-NOSSE-NEXT:    movl %ecx, %edx
; X86-NOSSE-NEXT:    shrl $4, %edx
; X86-NOSSE-NEXT:    addl %ecx, %edx
; X86-NOSSE-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
; X86-NOSSE-NEXT:    imull $16843009, %edx, %ecx # imm = 0x1010101
; X86-NOSSE-NEXT:    shrl $24, %ecx
; X86-NOSSE-NEXT:    movl %eax, %edx
; X86-NOSSE-NEXT:    shrl %edx
; X86-NOSSE-NEXT:    andl $1431655765, %edx # imm = 0x55555555
; X86-NOSSE-NEXT:    subl %edx, %eax
; X86-NOSSE-NEXT:    movl %eax, %edx
; X86-NOSSE-NEXT:    andl $858993459, %edx # imm = 0x33333333
; X86-NOSSE-NEXT:    shrl $2, %eax
; X86-NOSSE-NEXT:    andl $858993459, %eax # imm = 0x33333333
; X86-NOSSE-NEXT:    addl %edx, %eax
; X86-NOSSE-NEXT:    movl %eax, %edx
; X86-NOSSE-NEXT:    shrl $4, %edx
; X86-NOSSE-NEXT:    addl %eax, %edx
; X86-NOSSE-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
; X86-NOSSE-NEXT:    imull $16843009, %edx, %eax # imm = 0x1010101
; X86-NOSSE-NEXT:    shrl $24, %eax
; X86-NOSSE-NEXT:    addl %ecx, %eax
; X86-NOSSE-NEXT:    xorl %edx, %edx
; X86-NOSSE-NEXT:    retl
;
; X64-BASE-LABEL: cnt64:
; X64-BASE:       # %bb.0:
; X64-BASE-NEXT:    movq %rdi, %rax
; X64-BASE-NEXT:    shrq %rax
; X64-BASE-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
; X64-BASE-NEXT:    andq %rax, %rcx
; X64-BASE-NEXT:    subq %rcx, %rdi
; X64-BASE-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
; X64-BASE-NEXT:    movq %rdi, %rcx
; X64-BASE-NEXT:    andq %rax, %rcx
; X64-BASE-NEXT:    shrq $2, %rdi
; X64-BASE-NEXT:    andq %rdi, %rax
; X64-BASE-NEXT:    addq %rcx, %rax
; X64-BASE-NEXT:    movq %rax, %rcx
; X64-BASE-NEXT:    shrq $4, %rcx
; X64-BASE-NEXT:    addq %rax, %rcx
; X64-BASE-NEXT:    movabsq $1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F
; X64-BASE-NEXT:    andq %rcx, %rdx
; X64-BASE-NEXT:    movabsq $72340172838076673, %rax # imm = 0x101010101010101
; X64-BASE-NEXT:    imulq %rdx, %rax
; X64-BASE-NEXT:    shrq $56, %rax
; X64-BASE-NEXT:    retq
;
; X86-POPCNT-LABEL: cnt64:
; X86-POPCNT:       # %bb.0:
; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %ecx
; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %eax
; X86-POPCNT-NEXT:    addl %ecx, %eax
; X86-POPCNT-NEXT:    xorl %edx, %edx
; X86-POPCNT-NEXT:    retl
;
; X64-POPCNT-LABEL: cnt64:
; X64-POPCNT:       # %bb.0:
; X64-POPCNT-NEXT:    popcntq %rdi, %rax
; X64-POPCNT-NEXT:    retq
;
; X64-NDD-LABEL: cnt64:
; X64-NDD:       # %bb.0:
; X64-NDD-NEXT:    shrq %rdi, %rax
; X64-NDD-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
; X64-NDD-NEXT:    andq %rcx, %rax
; X64-NDD-NEXT:    subq %rax, %rdi
; X64-NDD-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
; X64-NDD-NEXT:    andq %rax, %rdi, %rcx
; X64-NDD-NEXT:    shrq $2, %rdi
; X64-NDD-NEXT:    andq %rdi, %rax
; X64-NDD-NEXT:    addq %rcx, %rax
; X64-NDD-NEXT:    shrq $4, %rax, %rcx
; X64-NDD-NEXT:    addq %rcx, %rax
; X64-NDD-NEXT:    movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
; X64-NDD-NEXT:    andq %rcx, %rax
; X64-NDD-NEXT:    movabsq $72340172838076673, %rcx # imm = 0x101010101010101
; X64-NDD-NEXT:    imulq %rcx, %rax
; X64-NDD-NEXT:    shrq $56, %rax
; X64-NDD-NEXT:    retq
;
; X86-SSE2-LABEL: cnt64:
; X86-SSE2:       # %bb.0:
; X86-SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
; X86-SSE2-NEXT:    psrlw $1, %xmm1
; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE2-NEXT:    psubb %xmm1, %xmm0
; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
; X86-SSE2-NEXT:    pand %xmm1, %xmm2
; X86-SSE2-NEXT:    psrlw $2, %xmm0
; X86-SSE2-NEXT:    pand %xmm1, %xmm0
; X86-SSE2-NEXT:    paddb %xmm2, %xmm0
; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
; X86-SSE2-NEXT:    psrlw $4, %xmm1
; X86-SSE2-NEXT:    paddb %xmm0, %xmm1
; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE2-NEXT:    pxor %xmm0, %xmm0
; X86-SSE2-NEXT:    psadbw %xmm1, %xmm0
; X86-SSE2-NEXT:    movd %xmm0, %eax
; X86-SSE2-NEXT:    xorl %edx, %edx
; X86-SSE2-NEXT:    retl
;
; X86-SSSE3-LABEL: cnt64:
; X86-SSSE3:       # %bb.0:
; X86-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; X86-SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
; X86-SSSE3-NEXT:    movdqa %xmm1, %xmm2
; X86-SSSE3-NEXT:    pand %xmm0, %xmm2
; X86-SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; X86-SSSE3-NEXT:    movdqa %xmm3, %xmm4
; X86-SSSE3-NEXT:    pshufb %xmm2, %xmm4
; X86-SSSE3-NEXT:    psrlw $4, %xmm1
; X86-SSSE3-NEXT:    pand %xmm0, %xmm1
; X86-SSSE3-NEXT:    pshufb %xmm1, %xmm3
; X86-SSSE3-NEXT:    paddb %xmm4, %xmm3
; X86-SSSE3-NEXT:    pxor %xmm0, %xmm0
; X86-SSSE3-NEXT:    psadbw %xmm3, %xmm0
; X86-SSSE3-NEXT:    movd %xmm0, %eax
; X86-SSSE3-NEXT:    xorl %edx, %edx
; X86-SSSE3-NEXT:    retl
  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
  ret i64 %cnt
}

define i128 @cnt128(i128 %x) nounwind readnone {
; X86-NOSSE-LABEL: cnt128:
; X86-NOSSE:       # %bb.0:
; X86-NOSSE-NEXT:    pushl %ebx
; X86-NOSSE-NEXT:    pushl %edi
; X86-NOSSE-NEXT:    pushl %esi
; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edi
; X86-NOSSE-NEXT:    movl %edi, %ebx
; X86-NOSSE-NEXT:    shrl %ebx
; X86-NOSSE-NEXT:    andl $1431655765, %ebx # imm = 0x55555555
; X86-NOSSE-NEXT:    subl %ebx, %edi
; X86-NOSSE-NEXT:    movl %edi, %ebx
; X86-NOSSE-NEXT:    andl $858993459, %ebx # imm = 0x33333333
; X86-NOSSE-NEXT:    shrl $2, %edi
; X86-NOSSE-NEXT:    andl $858993459, %edi # imm = 0x33333333
; X86-NOSSE-NEXT:    addl %ebx, %edi
; X86-NOSSE-NEXT:    movl %edi, %ebx
; X86-NOSSE-NEXT:    shrl $4, %ebx
; X86-NOSSE-NEXT:    addl %edi, %ebx
; X86-NOSSE-NEXT:    andl $252645135, %ebx # imm = 0xF0F0F0F
; X86-NOSSE-NEXT:    imull $16843009, %ebx, %edi # imm = 0x1010101
; X86-NOSSE-NEXT:    shrl $24, %edi
; X86-NOSSE-NEXT:    movl %esi, %ebx
; X86-NOSSE-NEXT:    shrl %ebx
; X86-NOSSE-NEXT:    andl $1431655765, %ebx # imm = 0x55555555
; X86-NOSSE-NEXT:    subl %ebx, %esi
; X86-NOSSE-NEXT:    movl %esi, %ebx
; X86-NOSSE-NEXT:    andl $858993459, %ebx # imm = 0x33333333
; X86-NOSSE-NEXT:    shrl $2, %esi
; X86-NOSSE-NEXT:    andl $858993459, %esi # imm = 0x33333333
; X86-NOSSE-NEXT:    addl %ebx, %esi
; X86-NOSSE-NEXT:    movl %esi, %ebx
; X86-NOSSE-NEXT:    shrl $4, %ebx
; X86-NOSSE-NEXT:    addl %esi, %ebx
; X86-NOSSE-NEXT:    andl $252645135, %ebx # imm = 0xF0F0F0F
; X86-NOSSE-NEXT:    imull $16843009, %ebx, %esi # imm = 0x1010101
; X86-NOSSE-NEXT:    shrl $24, %esi
; X86-NOSSE-NEXT:    addl %edi, %esi
; X86-NOSSE-NEXT:    movl %edx, %edi
; X86-NOSSE-NEXT:    shrl %edi
; X86-NOSSE-NEXT:    andl $1431655765, %edi # imm = 0x55555555
; X86-NOSSE-NEXT:    subl %edi, %edx
; X86-NOSSE-NEXT:    movl %edx, %edi
; X86-NOSSE-NEXT:    andl $858993459, %edi # imm = 0x33333333
; X86-NOSSE-NEXT:    shrl $2, %edx
; X86-NOSSE-NEXT:    andl $858993459, %edx # imm = 0x33333333
; X86-NOSSE-NEXT:    addl %edi, %edx
; X86-NOSSE-NEXT:    movl %edx, %edi
; X86-NOSSE-NEXT:    shrl $4, %edi
; X86-NOSSE-NEXT:    addl %edx, %edi
; X86-NOSSE-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
; X86-NOSSE-NEXT:    imull $16843009, %edi, %edx # imm = 0x1010101
; X86-NOSSE-NEXT:    shrl $24, %edx
; X86-NOSSE-NEXT:    movl %ecx, %edi
; X86-NOSSE-NEXT:    shrl %edi
; X86-NOSSE-NEXT:    andl $1431655765, %edi # imm = 0x55555555
; X86-NOSSE-NEXT:    subl %edi, %ecx
; X86-NOSSE-NEXT:    movl %ecx, %edi
; X86-NOSSE-NEXT:    andl $858993459, %edi # imm = 0x33333333
; X86-NOSSE-NEXT:    shrl $2, %ecx
; X86-NOSSE-NEXT:    andl $858993459, %ecx # imm = 0x33333333
; X86-NOSSE-NEXT:    addl %edi, %ecx
; X86-NOSSE-NEXT:    movl %ecx, %edi
; X86-NOSSE-NEXT:    shrl $4, %edi
; X86-NOSSE-NEXT:    addl %ecx, %edi
; X86-NOSSE-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
; X86-NOSSE-NEXT:    imull $16843009, %edi, %ecx # imm = 0x1010101
; X86-NOSSE-NEXT:    shrl $24, %ecx
; X86-NOSSE-NEXT:    addl %edx, %ecx
; X86-NOSSE-NEXT:    addl %esi, %ecx
; X86-NOSSE-NEXT:    movl %ecx, (%eax)
; X86-NOSSE-NEXT:    movl $0, 12(%eax)
; X86-NOSSE-NEXT:    movl $0, 8(%eax)
; X86-NOSSE-NEXT:    movl $0, 4(%eax)
; X86-NOSSE-NEXT:    popl %esi
; X86-NOSSE-NEXT:    popl %edi
; X86-NOSSE-NEXT:    popl %ebx
; X86-NOSSE-NEXT:    retl $4
;
; X64-BASE-LABEL: cnt128:
; X64-BASE:       # %bb.0:
; X64-BASE-NEXT:    movq %rsi, %rax
; X64-BASE-NEXT:    shrq %rax
; X64-BASE-NEXT:    movabsq $6148914691236517205, %r8 # imm = 0x5555555555555555
; X64-BASE-NEXT:    andq %r8, %rax
; X64-BASE-NEXT:    subq %rax, %rsi
; X64-BASE-NEXT:    movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
; X64-BASE-NEXT:    movq %rsi, %rax
; X64-BASE-NEXT:    andq %rcx, %rax
; X64-BASE-NEXT:    shrq $2, %rsi
; X64-BASE-NEXT:    andq %rcx, %rsi
; X64-BASE-NEXT:    addq %rsi, %rax
; X64-BASE-NEXT:    movq %rax, %rdx
; X64-BASE-NEXT:    shrq $4, %rdx
; X64-BASE-NEXT:    addq %rax, %rdx
; X64-BASE-NEXT:    movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F
; X64-BASE-NEXT:    andq %rsi, %rdx
; X64-BASE-NEXT:    movabsq $72340172838076673, %r9 # imm = 0x101010101010101
; X64-BASE-NEXT:    imulq %r9, %rdx
; X64-BASE-NEXT:    shrq $56, %rdx
; X64-BASE-NEXT:    movq %rdi, %rax
; X64-BASE-NEXT:    shrq %rax
; X64-BASE-NEXT:    andq %r8, %rax
; X64-BASE-NEXT:    subq %rax, %rdi
; X64-BASE-NEXT:    movq %rdi, %rax
; X64-BASE-NEXT:    andq %rcx, %rax
; X64-BASE-NEXT:    shrq $2, %rdi
; X64-BASE-NEXT:    andq %rdi, %rcx
; X64-BASE-NEXT:    addq %rax, %rcx
; X64-BASE-NEXT:    movq %rcx, %rax
; X64-BASE-NEXT:    shrq $4, %rax
; X64-BASE-NEXT:    addq %rcx, %rax
; X64-BASE-NEXT:    andq %rsi, %rax
; X64-BASE-NEXT:    imulq %r9, %rax
; X64-BASE-NEXT:    shrq $56, %rax
; X64-BASE-NEXT:    addq %rdx, %rax
; X64-BASE-NEXT:    xorl %edx, %edx
; X64-BASE-NEXT:    retq
;
; X86-POPCNT-LABEL: cnt128:
; X86-POPCNT:       # %bb.0:
; X86-POPCNT-NEXT:    pushl %esi
; X86-POPCNT-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %ecx
; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %edx
; X86-POPCNT-NEXT:    addl %ecx, %edx
; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %ecx
; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %esi
; X86-POPCNT-NEXT:    addl %ecx, %esi
; X86-POPCNT-NEXT:    addl %edx, %esi
; X86-POPCNT-NEXT:    movl %esi, (%eax)
; X86-POPCNT-NEXT:    movl $0, 12(%eax)
; X86-POPCNT-NEXT:    movl $0, 8(%eax)
; X86-POPCNT-NEXT:    movl $0, 4(%eax)
; X86-POPCNT-NEXT:    popl %esi
; X86-POPCNT-NEXT:    retl $4
;
; X64-POPCNT-LABEL: cnt128:
; X64-POPCNT:       # %bb.0:
; X64-POPCNT-NEXT:    popcntq %rsi, %rcx
; X64-POPCNT-NEXT:    popcntq %rdi, %rax
; X64-POPCNT-NEXT:    addq %rcx, %rax
; X64-POPCNT-NEXT:    xorl %edx, %edx
; X64-POPCNT-NEXT:    retq
;
; X64-NDD-LABEL: cnt128:
; X64-NDD:       # %bb.0:
; X64-NDD-NEXT:    shrq %rsi, %rax
; X64-NDD-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
; X64-NDD-NEXT:    andq %rcx, %rax
; X64-NDD-NEXT:    subq %rax, %rsi
; X64-NDD-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
; X64-NDD-NEXT:    andq %rax, %rsi, %rdx
; X64-NDD-NEXT:    shrq $2, %rsi
; X64-NDD-NEXT:    andq %rax, %rsi
; X64-NDD-NEXT:    addq %rsi, %rdx
; X64-NDD-NEXT:    shrq $4, %rdx, %rsi
; X64-NDD-NEXT:    addq %rsi, %rdx
; X64-NDD-NEXT:    movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F
; X64-NDD-NEXT:    andq %rsi, %rdx
; X64-NDD-NEXT:    movabsq $72340172838076673, %r8 # imm = 0x101010101010101
; X64-NDD-NEXT:    imulq %r8, %rdx
; X64-NDD-NEXT:    shrq $56, %rdx
; X64-NDD-NEXT:    shrq %rdi, %r9
; X64-NDD-NEXT:    andq %r9, %rcx
; X64-NDD-NEXT:    subq %rcx, %rdi
; X64-NDD-NEXT:    andq %rax, %rdi, %rcx
; X64-NDD-NEXT:    shrq $2, %rdi
; X64-NDD-NEXT:    andq %rdi, %rax
; X64-NDD-NEXT:    addq %rcx, %rax
; X64-NDD-NEXT:    shrq $4, %rax, %rcx
; X64-NDD-NEXT:    addq %rcx, %rax
; X64-NDD-NEXT:    andq %rsi, %rax
; X64-NDD-NEXT:    imulq %r8, %rax
; X64-NDD-NEXT:    shrq $56, %rax
; X64-NDD-NEXT:    addq %rdx, %rax
; X64-NDD-NEXT:    xorl %edx, %edx
; X64-NDD-NEXT:    retq
;
; X86-SSE2-LABEL: cnt128:
; X86-SSE2:       # %bb.0:
; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero
; X86-SSE2-NEXT:    movdqa %xmm2, %xmm0
; X86-SSE2-NEXT:    psrlw $1, %xmm0
; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
; X86-SSE2-NEXT:    pand %xmm1, %xmm0
; X86-SSE2-NEXT:    psubb %xmm0, %xmm2
; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
; X86-SSE2-NEXT:    movdqa %xmm2, %xmm3
; X86-SSE2-NEXT:    pand %xmm0, %xmm3
; X86-SSE2-NEXT:    psrlw $2, %xmm2
; X86-SSE2-NEXT:    pand %xmm0, %xmm2
; X86-SSE2-NEXT:    paddb %xmm3, %xmm2
; X86-SSE2-NEXT:    movdqa %xmm2, %xmm4
; X86-SSE2-NEXT:    psrlw $4, %xmm4
; X86-SSE2-NEXT:    paddb %xmm2, %xmm4
; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; X86-SSE2-NEXT:    pand %xmm2, %xmm4
; X86-SSE2-NEXT:    pxor %xmm3, %xmm3
; X86-SSE2-NEXT:    psadbw %xmm3, %xmm4
; X86-SSE2-NEXT:    movd %xmm4, %ecx
; X86-SSE2-NEXT:    movq {{.*#+}} xmm4 = mem[0],zero
; X86-SSE2-NEXT:    movdqa %xmm4, %xmm5
; X86-SSE2-NEXT:    psrlw $1, %xmm5
; X86-SSE2-NEXT:    pand %xmm1, %xmm5
; X86-SSE2-NEXT:    psubb %xmm5, %xmm4
; X86-SSE2-NEXT:    movdqa %xmm4, %xmm1
; X86-SSE2-NEXT:    pand %xmm0, %xmm1
; X86-SSE2-NEXT:    psrlw $2, %xmm4
; X86-SSE2-NEXT:    pand %xmm0, %xmm4
; X86-SSE2-NEXT:    paddb %xmm1, %xmm4
; X86-SSE2-NEXT:    movdqa %xmm4, %xmm0
; X86-SSE2-NEXT:    psrlw $4, %xmm0
; X86-SSE2-NEXT:    paddb %xmm4, %xmm0
; X86-SSE2-NEXT:    pand %xmm2, %xmm0
; X86-SSE2-NEXT:    psadbw %xmm3, %xmm0
; X86-SSE2-NEXT:    movd %xmm0, %edx
; X86-SSE2-NEXT:    addl %ecx, %edx
; X86-SSE2-NEXT:    movl %edx, (%eax)
; X86-SSE2-NEXT:    movl $0, 12(%eax)
; X86-SSE2-NEXT:    movl $0, 8(%eax)
; X86-SSE2-NEXT:    movl $0, 4(%eax)
; X86-SSE2-NEXT:    retl $4
;
; X86-SSSE3-LABEL: cnt128:
; X86-SSSE3:       # %bb.0:
; X86-SSSE3-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; X86-SSSE3-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero
; X86-SSSE3-NEXT:    movdqa %xmm2, %xmm3
; X86-SSSE3-NEXT:    pand %xmm1, %xmm3
; X86-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; X86-SSSE3-NEXT:    movdqa %xmm0, %xmm4
; X86-SSSE3-NEXT:    pshufb %xmm3, %xmm4
; X86-SSSE3-NEXT:    psrlw $4, %xmm2
; X86-SSSE3-NEXT:    pand %xmm1, %xmm2
; X86-SSSE3-NEXT:    movdqa %xmm0, %xmm3
; X86-SSSE3-NEXT:    pshufb %xmm2, %xmm3
; X86-SSSE3-NEXT:    paddb %xmm4, %xmm3
; X86-SSSE3-NEXT:    pxor %xmm2, %xmm2
; X86-SSSE3-NEXT:    psadbw %xmm2, %xmm3
; X86-SSSE3-NEXT:    movd %xmm3, %ecx
; X86-SSSE3-NEXT:    movq {{.*#+}} xmm3 = mem[0],zero
; X86-SSSE3-NEXT:    movdqa %xmm3, %xmm4
; X86-SSSE3-NEXT:    pand %xmm1, %xmm4
; X86-SSSE3-NEXT:    movdqa %xmm0, %xmm5
; X86-SSSE3-NEXT:    pshufb %xmm4, %xmm5
; X86-SSSE3-NEXT:    psrlw $4, %xmm3
; X86-SSSE3-NEXT:    pand %xmm1, %xmm3
; X86-SSSE3-NEXT:    pshufb %xmm3, %xmm0
; X86-SSSE3-NEXT:    paddb %xmm5, %xmm0
; X86-SSSE3-NEXT:    psadbw %xmm2, %xmm0
; X86-SSSE3-NEXT:    movd %xmm0, %edx
; X86-SSSE3-NEXT:    addl %ecx, %edx
; X86-SSSE3-NEXT:    movl %edx, (%eax)
; X86-SSSE3-NEXT:    movl $0, 12(%eax)
; X86-SSSE3-NEXT:    movl $0, 8(%eax)
; X86-SSSE3-NEXT:    movl $0, 4(%eax)
; X86-SSSE3-NEXT:    retl $4
  %cnt = tail call i128 @llvm.ctpop.i128(i128 %x)
  ret i128 %cnt
}

define i64 @cnt64_noimplicitfloat(i64 %x) nounwind readnone noimplicitfloat  {
; X86-LABEL: cnt64_noimplicitfloat:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    movl %ecx, %edx
; X86-NEXT:    shrl %edx
; X86-NEXT:    andl $1431655765, %edx # imm = 0x55555555
; X86-NEXT:    subl %edx, %ecx
; X86-NEXT:    movl %ecx, %edx
; X86-NEXT:    andl $858993459, %edx # imm = 0x33333333
; X86-NEXT:    shrl $2, %ecx
; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT:    addl %edx, %ecx
; X86-NEXT:    movl %ecx, %edx
; X86-NEXT:    shrl $4, %edx
; X86-NEXT:    addl %ecx, %edx
; X86-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
; X86-NEXT:    imull $16843009, %edx, %ecx # imm = 0x1010101
; X86-NEXT:    shrl $24, %ecx
; X86-NEXT:    movl %eax, %edx
; X86-NEXT:    shrl %edx
; X86-NEXT:    andl $1431655765, %edx # imm = 0x55555555
; X86-NEXT:    subl %edx, %eax
; X86-NEXT:    movl %eax, %edx
; X86-NEXT:    andl $858993459, %edx # imm = 0x33333333
; X86-NEXT:    shrl $2, %eax
; X86-NEXT:    andl $858993459, %eax # imm = 0x33333333
; X86-NEXT:    addl %edx, %eax
; X86-NEXT:    movl %eax, %edx
; X86-NEXT:    shrl $4, %edx
; X86-NEXT:    addl %eax, %edx
; X86-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
; X86-NEXT:    imull $16843009, %edx, %eax # imm = 0x1010101
; X86-NEXT:    shrl $24, %eax
; X86-NEXT:    addl %ecx, %eax
; X86-NEXT:    xorl %edx, %edx
; X86-NEXT:    retl
;
; X64-BASE-LABEL: cnt64_noimplicitfloat:
; X64-BASE:       # %bb.0:
; X64-BASE-NEXT:    movq %rdi, %rax
; X64-BASE-NEXT:    shrq %rax
; X64-BASE-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
; X64-BASE-NEXT:    andq %rax, %rcx
; X64-BASE-NEXT:    subq %rcx, %rdi
; X64-BASE-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
; X64-BASE-NEXT:    movq %rdi, %rcx
; X64-BASE-NEXT:    andq %rax, %rcx
; X64-BASE-NEXT:    shrq $2, %rdi
; X64-BASE-NEXT:    andq %rdi, %rax
; X64-BASE-NEXT:    addq %rcx, %rax
; X64-BASE-NEXT:    movq %rax, %rcx
; X64-BASE-NEXT:    shrq $4, %rcx
; X64-BASE-NEXT:    addq %rax, %rcx
; X64-BASE-NEXT:    movabsq $1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F
; X64-BASE-NEXT:    andq %rcx, %rdx
; X64-BASE-NEXT:    movabsq $72340172838076673, %rax # imm = 0x101010101010101
; X64-BASE-NEXT:    imulq %rdx, %rax
; X64-BASE-NEXT:    shrq $56, %rax
; X64-BASE-NEXT:    retq
;
; X86-POPCNT-LABEL: cnt64_noimplicitfloat:
; X86-POPCNT:       # %bb.0:
; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %ecx
; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %eax
; X86-POPCNT-NEXT:    addl %ecx, %eax
; X86-POPCNT-NEXT:    xorl %edx, %edx
; X86-POPCNT-NEXT:    retl
;
; X64-POPCNT-LABEL: cnt64_noimplicitfloat:
; X64-POPCNT:       # %bb.0:
; X64-POPCNT-NEXT:    popcntq %rdi, %rax
; X64-POPCNT-NEXT:    retq
;
; X64-NDD-LABEL: cnt64_noimplicitfloat:
; X64-NDD:       # %bb.0:
; X64-NDD-NEXT:    shrq %rdi, %rax
; X64-NDD-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
; X64-NDD-NEXT:    andq %rcx, %rax
; X64-NDD-NEXT:    subq %rax, %rdi
; X64-NDD-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
; X64-NDD-NEXT:    andq %rax, %rdi, %rcx
; X64-NDD-NEXT:    shrq $2, %rdi
; X64-NDD-NEXT:    andq %rdi, %rax
; X64-NDD-NEXT:    addq %rcx, %rax
; X64-NDD-NEXT:    shrq $4, %rax, %rcx
; X64-NDD-NEXT:    addq %rcx, %rax
; X64-NDD-NEXT:    movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
; X64-NDD-NEXT:    andq %rcx, %rax
; X64-NDD-NEXT:    movabsq $72340172838076673, %rcx # imm = 0x101010101010101
; X64-NDD-NEXT:    imulq %rcx, %rax
; X64-NDD-NEXT:    shrq $56, %rax
; X64-NDD-NEXT:    retq
  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
  ret i64 %cnt
}

define i32 @cnt32_optsize(i32 %x) nounwind readnone optsize {
; X86-LABEL: cnt32_optsize:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl %eax, %ecx
; X86-NEXT:    shrl %ecx
; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT:    subl %ecx, %eax
; X86-NEXT:    movl $858993459, %ecx # imm = 0x33333333
; X86-NEXT:    movl %eax, %edx
; X86-NEXT:    andl %ecx, %edx
; X86-NEXT:    shrl $2, %eax
; X86-NEXT:    andl %ecx, %eax
; X86-NEXT:    addl %edx, %eax
; X86-NEXT:    movl %eax, %ecx
; X86-NEXT:    shrl $4, %ecx
; X86-NEXT:    addl %eax, %ecx
; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT:    imull $16843009, %ecx, %eax # imm = 0x1010101
; X86-NEXT:    shrl $24, %eax
; X86-NEXT:    retl
;
; X64-BASE-LABEL: cnt32_optsize:
; X64-BASE:       # %bb.0:
; X64-BASE-NEXT:    movl %edi, %eax
; X64-BASE-NEXT:    shrl %eax
; X64-BASE-NEXT:    andl $1431655765, %eax # imm = 0x55555555
; X64-BASE-NEXT:    subl %eax, %edi
; X64-BASE-NEXT:    movl $858993459, %eax # imm = 0x33333333
; X64-BASE-NEXT:    movl %edi, %ecx
; X64-BASE-NEXT:    andl %eax, %ecx
; X64-BASE-NEXT:    shrl $2, %edi
; X64-BASE-NEXT:    andl %eax, %edi
; X64-BASE-NEXT:    addl %ecx, %edi
; X64-BASE-NEXT:    movl %edi, %eax
; X64-BASE-NEXT:    shrl $4, %eax
; X64-BASE-NEXT:    addl %edi, %eax
; X64-BASE-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
; X64-BASE-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
; X64-BASE-NEXT:    shrl $24, %eax
; X64-BASE-NEXT:    retq
;
; X86-POPCNT-LABEL: cnt32_optsize:
; X86-POPCNT:       # %bb.0:
; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %eax
; X86-POPCNT-NEXT:    retl
;
; X64-POPCNT-LABEL: cnt32_optsize:
; X64-POPCNT:       # %bb.0:
; X64-POPCNT-NEXT:    popcntl %edi, %eax
; X64-POPCNT-NEXT:    retq
;
; X64-NDD-LABEL: cnt32_optsize:
; X64-NDD:       # %bb.0:
; X64-NDD-NEXT:    shrl %edi, %eax
; X64-NDD-NEXT:    andl $1431655765, %eax # imm = 0x55555555
; X64-NDD-NEXT:    subl %eax, %edi
; X64-NDD-NEXT:    movl $858993459, %eax # imm = 0x33333333
; X64-NDD-NEXT:    andl %eax, %edi, %ecx
; X64-NDD-NEXT:    shrl $2, %edi
; X64-NDD-NEXT:    andl %edi, %eax
; X64-NDD-NEXT:    addl %ecx, %eax
; X64-NDD-NEXT:    shrl $4, %eax, %ecx
; X64-NDD-NEXT:    addl %ecx, %eax
; X64-NDD-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
; X64-NDD-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
; X64-NDD-NEXT:    shrl $24, %eax
; X64-NDD-NEXT:    retq
  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
  ret i32 %cnt
}

define i64 @cnt64_optsize(i64 %x) nounwind readnone optsize {
; X86-NOSSE-LABEL: cnt64_optsize:
; X86-NOSSE:       # %bb.0:
; X86-NOSSE-NEXT:    pushl %ebx
; X86-NOSSE-NEXT:    pushl %edi
; X86-NOSSE-NEXT:    pushl %esi
; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
; X86-NOSSE-NEXT:    movl %esi, %ecx
; X86-NOSSE-NEXT:    shrl %ecx
; X86-NOSSE-NEXT:    movl $1431655765, %edx # imm = 0x55555555
; X86-NOSSE-NEXT:    andl %edx, %ecx
; X86-NOSSE-NEXT:    subl %ecx, %esi
; X86-NOSSE-NEXT:    movl $858993459, %ecx # imm = 0x33333333
; X86-NOSSE-NEXT:    movl %esi, %edi
; X86-NOSSE-NEXT:    andl %ecx, %edi
; X86-NOSSE-NEXT:    shrl $2, %esi
; X86-NOSSE-NEXT:    andl %ecx, %esi
; X86-NOSSE-NEXT:    addl %edi, %esi
; X86-NOSSE-NEXT:    movl %esi, %ebx
; X86-NOSSE-NEXT:    shrl $4, %ebx
; X86-NOSSE-NEXT:    addl %esi, %ebx
; X86-NOSSE-NEXT:    movl $252645135, %edi # imm = 0xF0F0F0F
; X86-NOSSE-NEXT:    andl %edi, %ebx
; X86-NOSSE-NEXT:    imull $16843009, %ebx, %esi # imm = 0x1010101
; X86-NOSSE-NEXT:    shrl $24, %esi
; X86-NOSSE-NEXT:    movl %eax, %ebx
; X86-NOSSE-NEXT:    shrl %ebx
; X86-NOSSE-NEXT:    andl %edx, %ebx
; X86-NOSSE-NEXT:    subl %ebx, %eax
; X86-NOSSE-NEXT:    movl %eax, %edx
; X86-NOSSE-NEXT:    andl %ecx, %edx
; X86-NOSSE-NEXT:    shrl $2, %eax
; X86-NOSSE-NEXT:    andl %ecx, %eax
; X86-NOSSE-NEXT:    addl %edx, %eax
; X86-NOSSE-NEXT:    movl %eax, %ecx
; X86-NOSSE-NEXT:    shrl $4, %ecx
; X86-NOSSE-NEXT:    addl %eax, %ecx
; X86-NOSSE-NEXT:    andl %edi, %ecx
; X86-NOSSE-NEXT:    imull $16843009, %ecx, %eax # imm = 0x1010101
; X86-NOSSE-NEXT:    shrl $24, %eax
; X86-NOSSE-NEXT:    addl %esi, %eax
; X86-NOSSE-NEXT:    xorl %edx, %edx
; X86-NOSSE-NEXT:    popl %esi
; X86-NOSSE-NEXT:    popl %edi
; X86-NOSSE-NEXT:    popl %ebx
; X86-NOSSE-NEXT:    retl
;
; X64-BASE-LABEL: cnt64_optsize:
; X64-BASE:       # %bb.0:
; X64-BASE-NEXT:    movq %rdi, %rax
; X64-BASE-NEXT:    shrq %rax
; X64-BASE-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
; X64-BASE-NEXT:    andq %rax, %rcx
; X64-BASE-NEXT:    subq %rcx, %rdi
; X64-BASE-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
; X64-BASE-NEXT:    movq %rdi, %rcx
; X64-BASE-NEXT:    andq %rax, %rcx
; X64-BASE-NEXT:    shrq $2, %rdi
; X64-BASE-NEXT:    andq %rdi, %rax
; X64-BASE-NEXT:    addq %rcx, %rax
; X64-BASE-NEXT:    movq %rax, %rcx
; X64-BASE-NEXT:    shrq $4, %rcx
; X64-BASE-NEXT:    addq %rax, %rcx
; X64-BASE-NEXT:    movabsq $1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F
; X64-BASE-NEXT:    andq %rcx, %rdx
; X64-BASE-NEXT:    movabsq $72340172838076673, %rax # imm = 0x101010101010101
; X64-BASE-NEXT:    imulq %rdx, %rax
; X64-BASE-NEXT:    shrq $56, %rax
; X64-BASE-NEXT:    retq
;
; X86-POPCNT-LABEL: cnt64_optsize:
; X86-POPCNT:       # %bb.0:
; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %ecx
; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %eax
; X86-POPCNT-NEXT:    addl %ecx, %eax
; X86-POPCNT-NEXT:    xorl %edx, %edx
; X86-POPCNT-NEXT:    retl
;
; X64-POPCNT-LABEL: cnt64_optsize:
; X64-POPCNT:       # %bb.0:
; X64-POPCNT-NEXT:    popcntq %rdi, %rax
; X64-POPCNT-NEXT:    retq
;
; X64-NDD-LABEL: cnt64_optsize:
; X64-NDD:       # %bb.0:
; X64-NDD-NEXT:    shrq %rdi, %rax
; X64-NDD-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
; X64-NDD-NEXT:    andq %rcx, %rax
; X64-NDD-NEXT:    subq %rax, %rdi
; X64-NDD-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
; X64-NDD-NEXT:    andq %rax, %rdi, %rcx
; X64-NDD-NEXT:    shrq $2, %rdi
; X64-NDD-NEXT:    andq %rdi, %rax
; X64-NDD-NEXT:    addq %rcx, %rax
; X64-NDD-NEXT:    shrq $4, %rax, %rcx
; X64-NDD-NEXT:    addq %rcx, %rax
; X64-NDD-NEXT:    movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
; X64-NDD-NEXT:    andq %rcx, %rax
; X64-NDD-NEXT:    movabsq $72340172838076673, %rcx # imm = 0x101010101010101
; X64-NDD-NEXT:    imulq %rcx, %rax
; X64-NDD-NEXT:    shrq $56, %rax
; X64-NDD-NEXT:    retq
;
; X86-SSE2-LABEL: cnt64_optsize:
; X86-SSE2:       # %bb.0:
; X86-SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
; X86-SSE2-NEXT:    psrlw $1, %xmm1
; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE2-NEXT:    psubb %xmm1, %xmm0
; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
; X86-SSE2-NEXT:    pand %xmm1, %xmm2
; X86-SSE2-NEXT:    psrlw $2, %xmm0
; X86-SSE2-NEXT:    pand %xmm1, %xmm0
; X86-SSE2-NEXT:    paddb %xmm2, %xmm0
; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
; X86-SSE2-NEXT:    psrlw $4, %xmm1
; X86-SSE2-NEXT:    paddb %xmm0, %xmm1
; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE2-NEXT:    pxor %xmm0, %xmm0
; X86-SSE2-NEXT:    psadbw %xmm1, %xmm0
; X86-SSE2-NEXT:    movd %xmm0, %eax
; X86-SSE2-NEXT:    xorl %edx, %edx
; X86-SSE2-NEXT:    retl
;
; X86-SSSE3-LABEL: cnt64_optsize:
; X86-SSSE3:       # %bb.0:
; X86-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; X86-SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
; X86-SSSE3-NEXT:    movdqa %xmm1, %xmm2
; X86-SSSE3-NEXT:    pand %xmm0, %xmm2
; X86-SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; X86-SSSE3-NEXT:    movdqa %xmm3, %xmm4
; X86-SSSE3-NEXT:    pshufb %xmm2, %xmm4
; X86-SSSE3-NEXT:    psrlw $4, %xmm1
; X86-SSSE3-NEXT:    pand %xmm0, %xmm1
; X86-SSSE3-NEXT:    pshufb %xmm1, %xmm3
; X86-SSSE3-NEXT:    paddb %xmm4, %xmm3
; X86-SSSE3-NEXT:    pxor %xmm0, %xmm0
; X86-SSSE3-NEXT:    psadbw %xmm3, %xmm0
; X86-SSSE3-NEXT:    movd %xmm0, %eax
; X86-SSSE3-NEXT:    xorl %edx, %edx
; X86-SSSE3-NEXT:    retl
  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
  ret i64 %cnt
}

define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize {
; X86-NOSSE-LABEL: cnt128_optsize:
; X86-NOSSE:       # %bb.0:
; X86-NOSSE-NEXT:    pushl %ebp
; X86-NOSSE-NEXT:    pushl %ebx
; X86-NOSSE-NEXT:    pushl %edi
; X86-NOSSE-NEXT:    pushl %esi
; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ebx
; X86-NOSSE-NEXT:    movl %ebx, %ecx
; X86-NOSSE-NEXT:    shrl %ecx
; X86-NOSSE-NEXT:    movl $1431655765, %edi # imm = 0x55555555
; X86-NOSSE-NEXT:    andl %edi, %ecx
; X86-NOSSE-NEXT:    subl %ecx, %ebx
; X86-NOSSE-NEXT:    movl $858993459, %ecx # imm = 0x33333333
; X86-NOSSE-NEXT:    movl %ebx, %ebp
; X86-NOSSE-NEXT:    andl %ecx, %ebp
; X86-NOSSE-NEXT:    shrl $2, %ebx
; X86-NOSSE-NEXT:    andl %ecx, %ebx
; X86-NOSSE-NEXT:    addl %ebp, %ebx
; X86-NOSSE-NEXT:    movl %ebx, %ebp
; X86-NOSSE-NEXT:    shrl $4, %ebp
; X86-NOSSE-NEXT:    addl %ebx, %ebp
; X86-NOSSE-NEXT:    movl %eax, %ebx
; X86-NOSSE-NEXT:    shrl %ebx
; X86-NOSSE-NEXT:    andl %edi, %ebx
; X86-NOSSE-NEXT:    subl %ebx, %eax
; X86-NOSSE-NEXT:    movl %eax, %ebx
; X86-NOSSE-NEXT:    andl %ecx, %ebx
; X86-NOSSE-NEXT:    shrl $2, %eax
; X86-NOSSE-NEXT:    andl %ecx, %eax
; X86-NOSSE-NEXT:    addl %ebx, %eax
; X86-NOSSE-NEXT:    movl %eax, %edi
; X86-NOSSE-NEXT:    shrl $4, %edi
; X86-NOSSE-NEXT:    addl %eax, %edi
; X86-NOSSE-NEXT:    movl $252645135, %ebx # imm = 0xF0F0F0F
; X86-NOSSE-NEXT:    andl %ebx, %ebp
; X86-NOSSE-NEXT:    imull $16843009, %ebp, %eax # imm = 0x1010101
; X86-NOSSE-NEXT:    shrl $24, %eax
; X86-NOSSE-NEXT:    andl %ebx, %edi
; X86-NOSSE-NEXT:    imull $16843009, %edi, %edi # imm = 0x1010101
; X86-NOSSE-NEXT:    shrl $24, %edi
; X86-NOSSE-NEXT:    addl %eax, %edi
; X86-NOSSE-NEXT:    movl %esi, %eax
; X86-NOSSE-NEXT:    shrl %eax
; X86-NOSSE-NEXT:    movl $1431655765, %ebp # imm = 0x55555555
; X86-NOSSE-NEXT:    andl %ebp, %eax
; X86-NOSSE-NEXT:    subl %eax, %esi
; X86-NOSSE-NEXT:    movl %esi, %eax
; X86-NOSSE-NEXT:    andl %ecx, %eax
; X86-NOSSE-NEXT:    shrl $2, %esi
; X86-NOSSE-NEXT:    andl %ecx, %esi
; X86-NOSSE-NEXT:    addl %eax, %esi
; X86-NOSSE-NEXT:    movl %esi, %ebp
; X86-NOSSE-NEXT:    shrl $4, %ebp
; X86-NOSSE-NEXT:    addl %esi, %ebp
; X86-NOSSE-NEXT:    movl %edx, %eax
; X86-NOSSE-NEXT:    shrl %eax
; X86-NOSSE-NEXT:    movl $1431655765, %esi # imm = 0x55555555
; X86-NOSSE-NEXT:    andl %esi, %eax
; X86-NOSSE-NEXT:    subl %eax, %edx
; X86-NOSSE-NEXT:    movl %edx, %eax
; X86-NOSSE-NEXT:    andl %ecx, %eax
; X86-NOSSE-NEXT:    shrl $2, %edx
; X86-NOSSE-NEXT:    andl %ecx, %edx
; X86-NOSSE-NEXT:    addl %eax, %edx
; X86-NOSSE-NEXT:    movl %edx, %eax
; X86-NOSSE-NEXT:    shrl $4, %eax
; X86-NOSSE-NEXT:    addl %edx, %eax
; X86-NOSSE-NEXT:    andl %ebx, %ebp
; X86-NOSSE-NEXT:    andl %ebx, %eax
; X86-NOSSE-NEXT:    imull $16843009, %ebp, %ecx # imm = 0x1010101
; X86-NOSSE-NEXT:    shrl $24, %ecx
; X86-NOSSE-NEXT:    imull $16843009, %eax, %edx # imm = 0x1010101
; X86-NOSSE-NEXT:    shrl $24, %edx
; X86-NOSSE-NEXT:    addl %ecx, %edx
; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT:    addl %edi, %edx
; X86-NOSSE-NEXT:    xorl %ecx, %ecx
; X86-NOSSE-NEXT:    movl %ecx, 12(%eax)
; X86-NOSSE-NEXT:    movl %ecx, 8(%eax)
; X86-NOSSE-NEXT:    movl %ecx, 4(%eax)
; X86-NOSSE-NEXT:    movl %edx, (%eax)
; X86-NOSSE-NEXT:    popl %esi
; X86-NOSSE-NEXT:    popl %edi
; X86-NOSSE-NEXT:    popl %ebx
; X86-NOSSE-NEXT:    popl %ebp
; X86-NOSSE-NEXT:    retl $4
;
; X64-BASE-LABEL: cnt128_optsize:
; X64-BASE:       # %bb.0:
; X64-BASE-NEXT:    movq %rsi, %rax
; X64-BASE-NEXT:    shrq %rax
; X64-BASE-NEXT:    movabsq $6148914691236517205, %r8 # imm = 0x5555555555555555
; X64-BASE-NEXT:    andq %r8, %rax
; X64-BASE-NEXT:    subq %rax, %rsi
; X64-BASE-NEXT:    movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
; X64-BASE-NEXT:    movq %rsi, %rax
; X64-BASE-NEXT:    andq %rcx, %rax
; X64-BASE-NEXT:    shrq $2, %rsi
; X64-BASE-NEXT:    andq %rcx, %rsi
; X64-BASE-NEXT:    addq %rsi, %rax
; X64-BASE-NEXT:    movq %rax, %rdx
; X64-BASE-NEXT:    shrq $4, %rdx
; X64-BASE-NEXT:    addq %rax, %rdx
; X64-BASE-NEXT:    movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F
; X64-BASE-NEXT:    andq %rsi, %rdx
; X64-BASE-NEXT:    movabsq $72340172838076673, %r9 # imm = 0x101010101010101
; X64-BASE-NEXT:    imulq %r9, %rdx
; X64-BASE-NEXT:    shrq $56, %rdx
; X64-BASE-NEXT:    movq %rdi, %rax
; X64-BASE-NEXT:    shrq %rax
; X64-BASE-NEXT:    andq %r8, %rax
; X64-BASE-NEXT:    subq %rax, %rdi
; X64-BASE-NEXT:    movq %rdi, %rax
; X64-BASE-NEXT:    andq %rcx, %rax
; X64-BASE-NEXT:    shrq $2, %rdi
; X64-BASE-NEXT:    andq %rdi, %rcx
; X64-BASE-NEXT:    addq %rax, %rcx
; X64-BASE-NEXT:    movq %rcx, %rax
; X64-BASE-NEXT:    shrq $4, %rax
; X64-BASE-NEXT:    addq %rcx, %rax
; X64-BASE-NEXT:    andq %rsi, %rax
; X64-BASE-NEXT:    imulq %r9, %rax
; X64-BASE-NEXT:    shrq $56, %rax
; X64-BASE-NEXT:    addq %rdx, %rax
; X64-BASE-NEXT:    xorl %edx, %edx
; X64-BASE-NEXT:    retq
;
; X86-POPCNT-LABEL: cnt128_optsize:
; X86-POPCNT:       # %bb.0:
; X86-POPCNT-NEXT:    pushl %esi
; X86-POPCNT-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %ecx
; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %edx
; X86-POPCNT-NEXT:    addl %ecx, %edx
; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %ecx
; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %esi
; X86-POPCNT-NEXT:    addl %ecx, %esi
; X86-POPCNT-NEXT:    addl %edx, %esi
; X86-POPCNT-NEXT:    xorl %ecx, %ecx
; X86-POPCNT-NEXT:    movl %ecx, 12(%eax)
; X86-POPCNT-NEXT:    movl %ecx, 8(%eax)
; X86-POPCNT-NEXT:    movl %ecx, 4(%eax)
; X86-POPCNT-NEXT:    movl %esi, (%eax)
; X86-POPCNT-NEXT:    popl %esi
; X86-POPCNT-NEXT:    retl $4
;
; X64-POPCNT-LABEL: cnt128_optsize:
; X64-POPCNT:       # %bb.0:
; X64-POPCNT-NEXT:    popcntq %rsi, %rcx
; X64-POPCNT-NEXT:    popcntq %rdi, %rax
; X64-POPCNT-NEXT:    addq %rcx, %rax
; X64-POPCNT-NEXT:    xorl %edx, %edx
; X64-POPCNT-NEXT:    retq
;
; X64-NDD-LABEL: cnt128_optsize:
; X64-NDD:       # %bb.0:
; X64-NDD-NEXT:    shrq %rsi, %rax
; X64-NDD-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
; X64-NDD-NEXT:    andq %rcx, %rax
; X64-NDD-NEXT:    subq %rax, %rsi
; X64-NDD-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
; X64-NDD-NEXT:    andq %rax, %rsi, %rdx
; X64-NDD-NEXT:    shrq $2, %rsi
; X64-NDD-NEXT:    andq %rax, %rsi
; X64-NDD-NEXT:    addq %rsi, %rdx
; X64-NDD-NEXT:    shrq $4, %rdx, %rsi
; X64-NDD-NEXT:    addq %rsi, %rdx
; X64-NDD-NEXT:    movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F
; X64-NDD-NEXT:    andq %rsi, %rdx
; X64-NDD-NEXT:    movabsq $72340172838076673, %r8 # imm = 0x101010101010101
; X64-NDD-NEXT:    imulq %r8, %rdx
; X64-NDD-NEXT:    shrq $56, %rdx
; X64-NDD-NEXT:    shrq %rdi, %r9
; X64-NDD-NEXT:    andq %r9, %rcx
; X64-NDD-NEXT:    subq %rcx, %rdi
; X64-NDD-NEXT:    andq %rax, %rdi, %rcx
; X64-NDD-NEXT:    shrq $2, %rdi
; X64-NDD-NEXT:    andq %rdi, %rax
; X64-NDD-NEXT:    addq %rcx, %rax
; X64-NDD-NEXT:    shrq $4, %rax, %rcx
; X64-NDD-NEXT:    addq %rcx, %rax
; X64-NDD-NEXT:    andq %rsi, %rax
; X64-NDD-NEXT:    imulq %r8, %rax
; X64-NDD-NEXT:    shrq $56, %rax
; X64-NDD-NEXT:    addq %rdx, %rax
; X64-NDD-NEXT:    xorl %edx, %edx
; X64-NDD-NEXT:    retq
;
; X86-SSE2-LABEL: cnt128_optsize:
; X86-SSE2:       # %bb.0:
; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero
; X86-SSE2-NEXT:    movdqa %xmm2, %xmm0
; X86-SSE2-NEXT:    psrlw $1, %xmm0
; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
; X86-SSE2-NEXT:    pand %xmm1, %xmm0
; X86-SSE2-NEXT:    psubb %xmm0, %xmm2
; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
; X86-SSE2-NEXT:    movdqa %xmm2, %xmm3
; X86-SSE2-NEXT:    pand %xmm0, %xmm3
; X86-SSE2-NEXT:    psrlw $2, %xmm2
; X86-SSE2-NEXT:    pand %xmm0, %xmm2
; X86-SSE2-NEXT:    paddb %xmm3, %xmm2
; X86-SSE2-NEXT:    movdqa %xmm2, %xmm4
; X86-SSE2-NEXT:    psrlw $4, %xmm4
; X86-SSE2-NEXT:    paddb %xmm2, %xmm4
; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; X86-SSE2-NEXT:    pand %xmm2, %xmm4
; X86-SSE2-NEXT:    pxor %xmm3, %xmm3
; X86-SSE2-NEXT:    psadbw %xmm3, %xmm4
; X86-SSE2-NEXT:    movd %xmm4, %ecx
; X86-SSE2-NEXT:    movq {{.*#+}} xmm4 = mem[0],zero
; X86-SSE2-NEXT:    movdqa %xmm4, %xmm5
; X86-SSE2-NEXT:    psrlw $1, %xmm5
; X86-SSE2-NEXT:    pand %xmm1, %xmm5
; X86-SSE2-NEXT:    psubb %xmm5, %xmm4
; X86-SSE2-NEXT:    movdqa %xmm4, %xmm1
; X86-SSE2-NEXT:    pand %xmm0, %xmm1
; X86-SSE2-NEXT:    psrlw $2, %xmm4
; X86-SSE2-NEXT:    pand %xmm0, %xmm4
; X86-SSE2-NEXT:    paddb %xmm1, %xmm4
; X86-SSE2-NEXT:    movdqa %xmm4, %xmm0
; X86-SSE2-NEXT:    psrlw $4, %xmm0
; X86-SSE2-NEXT:    paddb %xmm4, %xmm0
; X86-SSE2-NEXT:    pand %xmm2, %xmm0
; X86-SSE2-NEXT:    psadbw %xmm3, %xmm0
; X86-SSE2-NEXT:    movd %xmm0, %edx
; X86-SSE2-NEXT:    addl %ecx, %edx
; X86-SSE2-NEXT:    xorl %ecx, %ecx
; X86-SSE2-NEXT:    movl %ecx, 12(%eax)
; X86-SSE2-NEXT:    movl %ecx, 8(%eax)
; X86-SSE2-NEXT:    movl %ecx, 4(%eax)
; X86-SSE2-NEXT:    movl %edx, (%eax)
; X86-SSE2-NEXT:    retl $4
;
; X86-SSSE3-LABEL: cnt128_optsize:
; X86-SSSE3:       # %bb.0:
; X86-SSSE3-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; X86-SSSE3-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero
; X86-SSSE3-NEXT:    movdqa %xmm2, %xmm3
; X86-SSSE3-NEXT:    pand %xmm1, %xmm3
; X86-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; X86-SSSE3-NEXT:    movdqa %xmm0, %xmm4
; X86-SSSE3-NEXT:    pshufb %xmm3, %xmm4
; X86-SSSE3-NEXT:    psrlw $4, %xmm2
; X86-SSSE3-NEXT:    pand %xmm1, %xmm2
; X86-SSSE3-NEXT:    movdqa %xmm0, %xmm3
; X86-SSSE3-NEXT:    pshufb %xmm2, %xmm3
; X86-SSSE3-NEXT:    paddb %xmm4, %xmm3
; X86-SSSE3-NEXT:    pxor %xmm2, %xmm2
; X86-SSSE3-NEXT:    psadbw %xmm2, %xmm3
; X86-SSSE3-NEXT:    movd %xmm3, %ecx
; X86-SSSE3-NEXT:    movq {{.*#+}} xmm3 = mem[0],zero
; X86-SSSE3-NEXT:    movdqa %xmm3, %xmm4
; X86-SSSE3-NEXT:    pand %xmm1, %xmm4
; X86-SSSE3-NEXT:    movdqa %xmm0, %xmm5
; X86-SSSE3-NEXT:    pshufb %xmm4, %xmm5
; X86-SSSE3-NEXT:    psrlw $4, %xmm3
; X86-SSSE3-NEXT:    pand %xmm1, %xmm3
; X86-SSSE3-NEXT:    pshufb %xmm3, %xmm0
; X86-SSSE3-NEXT:    paddb %xmm5, %xmm0
; X86-SSSE3-NEXT:    psadbw %xmm2, %xmm0
; X86-SSSE3-NEXT:    movd %xmm0, %edx
; X86-SSSE3-NEXT:    addl %ecx, %edx
; X86-SSSE3-NEXT:    xorl %ecx, %ecx
; X86-SSSE3-NEXT:    movl %ecx, 12(%eax)
; X86-SSSE3-NEXT:    movl %ecx, 8(%eax)
; X86-SSSE3-NEXT:    movl %ecx, 4(%eax)
; X86-SSSE3-NEXT:    movl %edx, (%eax)
; X86-SSSE3-NEXT:    retl $4
  %cnt = tail call i128 @llvm.ctpop.i128(i128 %x)
  ret i128 %cnt
}

define i32 @cnt32_pgso(i32 %x) nounwind readnone !prof !14 {
; X86-LABEL: cnt32_pgso:
; X86:       # %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl %eax, %ecx
; X86-NEXT:    shrl %ecx
; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT:    subl %ecx, %eax
; X86-NEXT:    movl %eax, %ecx
; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT:    shrl $2, %eax
; X86-NEXT:    andl $858993459, %eax # imm = 0x33333333
; X86-NEXT:    addl %ecx, %eax
; X86-NEXT:    movl %eax, %ecx
; X86-NEXT:    shrl $4, %ecx
; X86-NEXT:    addl %eax, %ecx
; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT:    imull $16843009, %ecx, %eax # imm = 0x1010101
; X86-NEXT:    shrl $24, %eax
; X86-NEXT:    retl
;
; X64-BASE-LABEL: cnt32_pgso:
; X64-BASE:       # %bb.0:
; X64-BASE-NEXT:    movl %edi, %eax
; X64-BASE-NEXT:    shrl %eax
; X64-BASE-NEXT:    andl $1431655765, %eax # imm = 0x55555555
; X64-BASE-NEXT:    subl %eax, %edi
; X64-BASE-NEXT:    movl %edi, %eax
; X64-BASE-NEXT:    andl $858993459, %eax # imm = 0x33333333
; X64-BASE-NEXT:    shrl $2, %edi
; X64-BASE-NEXT:    andl $858993459, %edi # imm = 0x33333333
; X64-BASE-NEXT:    addl %eax, %edi
; X64-BASE-NEXT:    movl %edi, %eax
; X64-BASE-NEXT:    shrl $4, %eax
; X64-BASE-NEXT:    addl %edi, %eax
; X64-BASE-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
; X64-BASE-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
; X64-BASE-NEXT:    shrl $24, %eax
; X64-BASE-NEXT:    retq
;
; X86-POPCNT-LABEL: cnt32_pgso:
; X86-POPCNT:       # %bb.0:
; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %eax
; X86-POPCNT-NEXT:    retl
;
; X64-POPCNT-LABEL: cnt32_pgso:
; X64-POPCNT:       # %bb.0:
; X64-POPCNT-NEXT:    popcntl %edi, %eax
; X64-POPCNT-NEXT:    retq
;
; X64-NDD-LABEL: cnt32_pgso:
; X64-NDD:       # %bb.0:
; X64-NDD-NEXT:    shrl %edi, %eax
; X64-NDD-NEXT:    andl $1431655765, %eax # imm = 0x55555555
; X64-NDD-NEXT:    subl %eax, %edi
; X64-NDD-NEXT:    andl $858993459, %edi, %eax # imm = 0x33333333
; X64-NDD-NEXT:    shrl $2, %edi
; X64-NDD-NEXT:    andl $858993459, %edi # imm = 0x33333333
; X64-NDD-NEXT:    addl %edi, %eax
; X64-NDD-NEXT:    shrl $4, %eax, %ecx
; X64-NDD-NEXT:    addl %ecx, %eax
; X64-NDD-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
; X64-NDD-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
; X64-NDD-NEXT:    shrl $24, %eax
; X64-NDD-NEXT:    retq
  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
  ret i32 %cnt
}

define i64 @cnt64_pgso(i64 %x) nounwind readnone !prof !14 {
; X86-NOSSE-LABEL: cnt64_pgso:
; X86-NOSSE:       # %bb.0:
; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT:    movl %ecx, %edx
; X86-NOSSE-NEXT:    shrl %edx
; X86-NOSSE-NEXT:    andl $1431655765, %edx # imm = 0x55555555
; X86-NOSSE-NEXT:    subl %edx, %ecx
; X86-NOSSE-NEXT:    movl %ecx, %edx
; X86-NOSSE-NEXT:    andl $858993459, %edx # imm = 0x33333333
; X86-NOSSE-NEXT:    shrl $2, %ecx
; X86-NOSSE-NEXT:    andl $858993459, %ecx # imm = 0x33333333
; X86-NOSSE-NEXT:    addl %edx, %ecx
; X86-NOSSE-NEXT:    movl %ecx, %edx
; X86-NOSSE-NEXT:    shrl $4, %edx
; X86-NOSSE-NEXT:    addl %ecx, %edx
; X86-NOSSE-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
; X86-NOSSE-NEXT:    imull $16843009, %edx, %ecx # imm = 0x1010101
; X86-NOSSE-NEXT:    shrl $24, %ecx
; X86-NOSSE-NEXT:    movl %eax, %edx
; X86-NOSSE-NEXT:    shrl %edx
; X86-NOSSE-NEXT:    andl $1431655765, %edx # imm = 0x55555555
; X86-NOSSE-NEXT:    subl %edx, %eax
; X86-NOSSE-NEXT:    movl %eax, %edx
; X86-NOSSE-NEXT:    andl $858993459, %edx # imm = 0x33333333
; X86-NOSSE-NEXT:    shrl $2, %eax
; X86-NOSSE-NEXT:    andl $858993459, %eax # imm = 0x33333333
; X86-NOSSE-NEXT:    addl %edx, %eax
; X86-NOSSE-NEXT:    movl %eax, %edx
; X86-NOSSE-NEXT:    shrl $4, %edx
; X86-NOSSE-NEXT:    addl %eax, %edx
; X86-NOSSE-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
; X86-NOSSE-NEXT:    imull $16843009, %edx, %eax # imm = 0x1010101
; X86-NOSSE-NEXT:    shrl $24, %eax
; X86-NOSSE-NEXT:    addl %ecx, %eax
; X86-NOSSE-NEXT:    xorl %edx, %edx
; X86-NOSSE-NEXT:    retl
;
; X64-BASE-LABEL: cnt64_pgso:
; X64-BASE:       # %bb.0:
; X64-BASE-NEXT:    movq %rdi, %rax
; X64-BASE-NEXT:    shrq %rax
; X64-BASE-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
; X64-BASE-NEXT:    andq %rax, %rcx
; X64-BASE-NEXT:    subq %rcx, %rdi
; X64-BASE-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
; X64-BASE-NEXT:    movq %rdi, %rcx
; X64-BASE-NEXT:    andq %rax, %rcx
; X64-BASE-NEXT:    shrq $2, %rdi
; X64-BASE-NEXT:    andq %rdi, %rax
; X64-BASE-NEXT:    addq %rcx, %rax
; X64-BASE-NEXT:    movq %rax, %rcx
; X64-BASE-NEXT:    shrq $4, %rcx
; X64-BASE-NEXT:    addq %rax, %rcx
; X64-BASE-NEXT:    movabsq $1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F
; X64-BASE-NEXT:    andq %rcx, %rdx
; X64-BASE-NEXT:    movabsq $72340172838076673, %rax # imm = 0x101010101010101
; X64-BASE-NEXT:    imulq %rdx, %rax
; X64-BASE-NEXT:    shrq $56, %rax
; X64-BASE-NEXT:    retq
;
; X86-POPCNT-LABEL: cnt64_pgso:
; X86-POPCNT:       # %bb.0:
; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %ecx
; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %eax
; X86-POPCNT-NEXT:    addl %ecx, %eax
; X86-POPCNT-NEXT:    xorl %edx, %edx
; X86-POPCNT-NEXT:    retl
;
; X64-POPCNT-LABEL: cnt64_pgso:
; X64-POPCNT:       # %bb.0:
; X64-POPCNT-NEXT:    popcntq %rdi, %rax
; X64-POPCNT-NEXT:    retq
;
; X64-NDD-LABEL: cnt64_pgso:
; X64-NDD:       # %bb.0:
; X64-NDD-NEXT:    shrq %rdi, %rax
; X64-NDD-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
; X64-NDD-NEXT:    andq %rcx, %rax
; X64-NDD-NEXT:    subq %rax, %rdi
; X64-NDD-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
; X64-NDD-NEXT:    andq %rax, %rdi, %rcx
; X64-NDD-NEXT:    shrq $2, %rdi
; X64-NDD-NEXT:    andq %rdi, %rax
; X64-NDD-NEXT:    addq %rcx, %rax
; X64-NDD-NEXT:    shrq $4, %rax, %rcx
; X64-NDD-NEXT:    addq %rcx, %rax
; X64-NDD-NEXT:    movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
; X64-NDD-NEXT:    andq %rcx, %rax
; X64-NDD-NEXT:    movabsq $72340172838076673, %rcx # imm = 0x101010101010101
; X64-NDD-NEXT:    imulq %rcx, %rax
; X64-NDD-NEXT:    shrq $56, %rax
; X64-NDD-NEXT:    retq
;
; X86-SSE2-LABEL: cnt64_pgso:
; X86-SSE2:       # %bb.0:
; X86-SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
; X86-SSE2-NEXT:    psrlw $1, %xmm1
; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE2-NEXT:    psubb %xmm1, %xmm0
; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
; X86-SSE2-NEXT:    pand %xmm1, %xmm2
; X86-SSE2-NEXT:    psrlw $2, %xmm0
; X86-SSE2-NEXT:    pand %xmm1, %xmm0
; X86-SSE2-NEXT:    paddb %xmm2, %xmm0
; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
; X86-SSE2-NEXT:    psrlw $4, %xmm1
; X86-SSE2-NEXT:    paddb %xmm0, %xmm1
; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE2-NEXT:    pxor %xmm0, %xmm0
; X86-SSE2-NEXT:    psadbw %xmm1, %xmm0
; X86-SSE2-NEXT:    movd %xmm0, %eax
; X86-SSE2-NEXT:    xorl %edx, %edx
; X86-SSE2-NEXT:    retl
;
; X86-SSSE3-LABEL: cnt64_pgso:
; X86-SSSE3:       # %bb.0:
; X86-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; X86-SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
; X86-SSSE3-NEXT:    movdqa %xmm1, %xmm2
; X86-SSSE3-NEXT:    pand %xmm0, %xmm2
; X86-SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; X86-SSSE3-NEXT:    movdqa %xmm3, %xmm4
; X86-SSSE3-NEXT:    pshufb %xmm2, %xmm4
; X86-SSSE3-NEXT:    psrlw $4, %xmm1
; X86-SSSE3-NEXT:    pand %xmm0, %xmm1
; X86-SSSE3-NEXT:    pshufb %xmm1, %xmm3
; X86-SSSE3-NEXT:    paddb %xmm4, %xmm3
; X86-SSSE3-NEXT:    pxor %xmm0, %xmm0
; X86-SSSE3-NEXT:    psadbw %xmm3, %xmm0
; X86-SSSE3-NEXT:    movd %xmm0, %eax
; X86-SSSE3-NEXT:    xorl %edx, %edx
; X86-SSSE3-NEXT:    retl
  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
  ret i64 %cnt
}

define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 {
; X86-NOSSE-LABEL: cnt128_pgso:
; X86-NOSSE:       # %bb.0:
; X86-NOSSE-NEXT:    pushl %ebx
; X86-NOSSE-NEXT:    pushl %edi
; X86-NOSSE-NEXT:    pushl %esi
; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edi
; X86-NOSSE-NEXT:    movl %edi, %ebx
; X86-NOSSE-NEXT:    shrl %ebx
; X86-NOSSE-NEXT:    andl $1431655765, %ebx # imm = 0x55555555
; X86-NOSSE-NEXT:    subl %ebx, %edi
; X86-NOSSE-NEXT:    movl %edi, %ebx
; X86-NOSSE-NEXT:    andl $858993459, %ebx # imm = 0x33333333
; X86-NOSSE-NEXT:    shrl $2, %edi
; X86-NOSSE-NEXT:    andl $858993459, %edi # imm = 0x33333333
; X86-NOSSE-NEXT:    addl %ebx, %edi
; X86-NOSSE-NEXT:    movl %edi, %ebx
; X86-NOSSE-NEXT:    shrl $4, %ebx
; X86-NOSSE-NEXT:    addl %edi, %ebx
; X86-NOSSE-NEXT:    andl $252645135, %ebx # imm = 0xF0F0F0F
; X86-NOSSE-NEXT:    imull $16843009, %ebx, %edi # imm = 0x1010101
; X86-NOSSE-NEXT:    shrl $24, %edi
; X86-NOSSE-NEXT:    movl %esi, %ebx
; X86-NOSSE-NEXT:    shrl %ebx
; X86-NOSSE-NEXT:    andl $1431655765, %ebx # imm = 0x55555555
; X86-NOSSE-NEXT:    subl %ebx, %esi
; X86-NOSSE-NEXT:    movl %esi, %ebx
; X86-NOSSE-NEXT:    andl $858993459, %ebx # imm = 0x33333333
; X86-NOSSE-NEXT:    shrl $2, %esi
; X86-NOSSE-NEXT:    andl $858993459, %esi # imm = 0x33333333
; X86-NOSSE-NEXT:    addl %ebx, %esi
; X86-NOSSE-NEXT:    movl %esi, %ebx
; X86-NOSSE-NEXT:    shrl $4, %ebx
; X86-NOSSE-NEXT:    addl %esi, %ebx
; X86-NOSSE-NEXT:    andl $252645135, %ebx # imm = 0xF0F0F0F
; X86-NOSSE-NEXT:    imull $16843009, %ebx, %esi # imm = 0x1010101
; X86-NOSSE-NEXT:    shrl $24, %esi
; X86-NOSSE-NEXT:    addl %edi, %esi
; X86-NOSSE-NEXT:    movl %edx, %edi
; X86-NOSSE-NEXT:    shrl %edi
; X86-NOSSE-NEXT:    andl $1431655765, %edi # imm = 0x55555555
; X86-NOSSE-NEXT:    subl %edi, %edx
; X86-NOSSE-NEXT:    movl %edx, %edi
; X86-NOSSE-NEXT:    andl $858993459, %edi # imm = 0x33333333
; X86-NOSSE-NEXT:    shrl $2, %edx
; X86-NOSSE-NEXT:    andl $858993459, %edx # imm = 0x33333333
; X86-NOSSE-NEXT:    addl %edi, %edx
; X86-NOSSE-NEXT:    movl %edx, %edi
; X86-NOSSE-NEXT:    shrl $4, %edi
; X86-NOSSE-NEXT:    addl %edx, %edi
; X86-NOSSE-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
; X86-NOSSE-NEXT:    imull $16843009, %edi, %edx # imm = 0x1010101
; X86-NOSSE-NEXT:    shrl $24, %edx
; X86-NOSSE-NEXT:    movl %ecx, %edi
; X86-NOSSE-NEXT:    shrl %edi
; X86-NOSSE-NEXT:    andl $1431655765, %edi # imm = 0x55555555
; X86-NOSSE-NEXT:    subl %edi, %ecx
; X86-NOSSE-NEXT:    movl %ecx, %edi
; X86-NOSSE-NEXT:    andl $858993459, %edi # imm = 0x33333333
; X86-NOSSE-NEXT:    shrl $2, %ecx
; X86-NOSSE-NEXT:    andl $858993459, %ecx # imm = 0x33333333
; X86-NOSSE-NEXT:    addl %edi, %ecx
; X86-NOSSE-NEXT:    movl %ecx, %edi
; X86-NOSSE-NEXT:    shrl $4, %edi
; X86-NOSSE-NEXT:    addl %ecx, %edi
; X86-NOSSE-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
; X86-NOSSE-NEXT:    imull $16843009, %edi, %ecx # imm = 0x1010101
; X86-NOSSE-NEXT:    shrl $24, %ecx
; X86-NOSSE-NEXT:    addl %edx, %ecx
; X86-NOSSE-NEXT:    addl %esi, %ecx
; X86-NOSSE-NEXT:    xorl %edx, %edx
; X86-NOSSE-NEXT:    movl %edx, 12(%eax)
; X86-NOSSE-NEXT:    movl %edx, 8(%eax)
; X86-NOSSE-NEXT:    movl %edx, 4(%eax)
; X86-NOSSE-NEXT:    movl %ecx, (%eax)
; X86-NOSSE-NEXT:    popl %esi
; X86-NOSSE-NEXT:    popl %edi
; X86-NOSSE-NEXT:    popl %ebx
; X86-NOSSE-NEXT:    retl $4
;
; X64-BASE-LABEL: cnt128_pgso:
; X64-BASE:       # %bb.0:
; X64-BASE-NEXT:    movq %rsi, %rax
; X64-BASE-NEXT:    shrq %rax
; X64-BASE-NEXT:    movabsq $6148914691236517205, %r8 # imm = 0x5555555555555555
; X64-BASE-NEXT:    andq %r8, %rax
; X64-BASE-NEXT:    subq %rax, %rsi
; X64-BASE-NEXT:    movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
; X64-BASE-NEXT:    movq %rsi, %rax
; X64-BASE-NEXT:    andq %rcx, %rax
; X64-BASE-NEXT:    shrq $2, %rsi
; X64-BASE-NEXT:    andq %rcx, %rsi
; X64-BASE-NEXT:    addq %rsi, %rax
; X64-BASE-NEXT:    movq %rax, %rdx
; X64-BASE-NEXT:    shrq $4, %rdx
; X64-BASE-NEXT:    addq %rax, %rdx
; X64-BASE-NEXT:    movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F
; X64-BASE-NEXT:    andq %rsi, %rdx
; X64-BASE-NEXT:    movabsq $72340172838076673, %r9 # imm = 0x101010101010101
; X64-BASE-NEXT:    imulq %r9, %rdx
; X64-BASE-NEXT:    shrq $56, %rdx
; X64-BASE-NEXT:    movq %rdi, %rax
; X64-BASE-NEXT:    shrq %rax
; X64-BASE-NEXT:    andq %r8, %rax
; X64-BASE-NEXT:    subq %rax, %rdi
; X64-BASE-NEXT:    movq %rdi, %rax
; X64-BASE-NEXT:    andq %rcx, %rax
; X64-BASE-NEXT:    shrq $2, %rdi
; X64-BASE-NEXT:    andq %rdi, %rcx
; X64-BASE-NEXT:    addq %rax, %rcx
; X64-BASE-NEXT:    movq %rcx, %rax
; X64-BASE-NEXT:    shrq $4, %rax
; X64-BASE-NEXT:    addq %rcx, %rax
; X64-BASE-NEXT:    andq %rsi, %rax
; X64-BASE-NEXT:    imulq %r9, %rax
; X64-BASE-NEXT:    shrq $56, %rax
; X64-BASE-NEXT:    addq %rdx, %rax
; X64-BASE-NEXT:    xorl %edx, %edx
; X64-BASE-NEXT:    retq
;
; X86-POPCNT-LABEL: cnt128_pgso:
; X86-POPCNT:       # %bb.0:
; X86-POPCNT-NEXT:    pushl %esi
; X86-POPCNT-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %ecx
; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %edx
; X86-POPCNT-NEXT:    addl %ecx, %edx
; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %ecx
; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %esi
; X86-POPCNT-NEXT:    addl %ecx, %esi
; X86-POPCNT-NEXT:    addl %edx, %esi
; X86-POPCNT-NEXT:    xorl %ecx, %ecx
; X86-POPCNT-NEXT:    movl %ecx, 12(%eax)
; X86-POPCNT-NEXT:    movl %ecx, 8(%eax)
; X86-POPCNT-NEXT:    movl %ecx, 4(%eax)
; X86-POPCNT-NEXT:    movl %esi, (%eax)
; X86-POPCNT-NEXT:    popl %esi
; X86-POPCNT-NEXT:    retl $4
;
; X64-POPCNT-LABEL: cnt128_pgso:
; X64-POPCNT:       # %bb.0:
; X64-POPCNT-NEXT:    popcntq %rsi, %rcx
; X64-POPCNT-NEXT:    popcntq %rdi, %rax
; X64-POPCNT-NEXT:    addq %rcx, %rax
; X64-POPCNT-NEXT:    xorl %edx, %edx
; X64-POPCNT-NEXT:    retq
;
; X64-NDD-LABEL: cnt128_pgso:
; X64-NDD:       # %bb.0:
; X64-NDD-NEXT:    shrq %rsi, %rax
; X64-NDD-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
; X64-NDD-NEXT:    andq %rcx, %rax
; X64-NDD-NEXT:    subq %rax, %rsi
; X64-NDD-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
; X64-NDD-NEXT:    andq %rax, %rsi, %rdx
; X64-NDD-NEXT:    shrq $2, %rsi
; X64-NDD-NEXT:    andq %rax, %rsi
; X64-NDD-NEXT:    addq %rsi, %rdx
; X64-NDD-NEXT:    shrq $4, %rdx, %rsi
; X64-NDD-NEXT:    addq %rsi, %rdx
; X64-NDD-NEXT:    movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F
; X64-NDD-NEXT:    andq %rsi, %rdx
; X64-NDD-NEXT:    movabsq $72340172838076673, %r8 # imm = 0x101010101010101
; X64-NDD-NEXT:    imulq %r8, %rdx
; X64-NDD-NEXT:    shrq $56, %rdx
; X64-NDD-NEXT:    shrq %rdi, %r9
; X64-NDD-NEXT:    andq %r9, %rcx
; X64-NDD-NEXT:    subq %rcx, %rdi
; X64-NDD-NEXT:    andq %rax, %rdi, %rcx
; X64-NDD-NEXT:    shrq $2, %rdi
; X64-NDD-NEXT:    andq %rdi, %rax
; X64-NDD-NEXT:    addq %rcx, %rax
; X64-NDD-NEXT:    shrq $4, %rax, %rcx
; X64-NDD-NEXT:    addq %rcx, %rax
; X64-NDD-NEXT:    andq %rsi, %rax
; X64-NDD-NEXT:    imulq %r8, %rax
; X64-NDD-NEXT:    shrq $56, %rax
; X64-NDD-NEXT:    addq %rdx, %rax
; X64-NDD-NEXT:    xorl %edx, %edx
; X64-NDD-NEXT:    retq
;
; X86-SSE2-LABEL: cnt128_pgso:
; X86-SSE2:       # %bb.0:
; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero
; X86-SSE2-NEXT:    movdqa %xmm2, %xmm0
; X86-SSE2-NEXT:    psrlw $1, %xmm0
; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
; X86-SSE2-NEXT:    pand %xmm1, %xmm0
; X86-SSE2-NEXT:    psubb %xmm0, %xmm2
; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
; X86-SSE2-NEXT:    movdqa %xmm2, %xmm3
; X86-SSE2-NEXT:    pand %xmm0, %xmm3
; X86-SSE2-NEXT:    psrlw $2, %xmm2
; X86-SSE2-NEXT:    pand %xmm0, %xmm2
; X86-SSE2-NEXT:    paddb %xmm3, %xmm2
; X86-SSE2-NEXT:    movdqa %xmm2, %xmm4
; X86-SSE2-NEXT:    psrlw $4, %xmm4
; X86-SSE2-NEXT:    paddb %xmm2, %xmm4
; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; X86-SSE2-NEXT:    pand %xmm2, %xmm4
; X86-SSE2-NEXT:    pxor %xmm3, %xmm3
; X86-SSE2-NEXT:    psadbw %xmm3, %xmm4
; X86-SSE2-NEXT:    movd %xmm4, %ecx
; X86-SSE2-NEXT:    movq {{.*#+}} xmm4 = mem[0],zero
; X86-SSE2-NEXT:    movdqa %xmm4, %xmm5
; X86-SSE2-NEXT:    psrlw $1, %xmm5
; X86-SSE2-NEXT:    pand %xmm1, %xmm5
; X86-SSE2-NEXT:    psubb %xmm5, %xmm4
; X86-SSE2-NEXT:    movdqa %xmm4, %xmm1
; X86-SSE2-NEXT:    pand %xmm0, %xmm1
; X86-SSE2-NEXT:    psrlw $2, %xmm4
; X86-SSE2-NEXT:    pand %xmm0, %xmm4
; X86-SSE2-NEXT:    paddb %xmm1, %xmm4
; X86-SSE2-NEXT:    movdqa %xmm4, %xmm0
; X86-SSE2-NEXT:    psrlw $4, %xmm0
; X86-SSE2-NEXT:    paddb %xmm4, %xmm0
; X86-SSE2-NEXT:    pand %xmm2, %xmm0
; X86-SSE2-NEXT:    psadbw %xmm3, %xmm0
; X86-SSE2-NEXT:    movd %xmm0, %edx
; X86-SSE2-NEXT:    addl %ecx, %edx
; X86-SSE2-NEXT:    xorl %ecx, %ecx
; X86-SSE2-NEXT:    movl %ecx, 12(%eax)
; X86-SSE2-NEXT:    movl %ecx, 8(%eax)
; X86-SSE2-NEXT:    movl %ecx, 4(%eax)
; X86-SSE2-NEXT:    movl %edx, (%eax)
; X86-SSE2-NEXT:    retl $4
;
; X86-SSSE3-LABEL: cnt128_pgso:
; X86-SSSE3:       # %bb.0:
; X86-SSSE3-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; X86-SSSE3-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero
; X86-SSSE3-NEXT:    movdqa %xmm2, %xmm3
; X86-SSSE3-NEXT:    pand %xmm1, %xmm3
; X86-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; X86-SSSE3-NEXT:    movdqa %xmm0, %xmm4
; X86-SSSE3-NEXT:    pshufb %xmm3, %xmm4
; X86-SSSE3-NEXT:    psrlw $4, %xmm2
; X86-SSSE3-NEXT:    pand %xmm1, %xmm2
; X86-SSSE3-NEXT:    movdqa %xmm0, %xmm3
; X86-SSSE3-NEXT:    pshufb %xmm2, %xmm3
; X86-SSSE3-NEXT:    paddb %xmm4, %xmm3
; X86-SSSE3-NEXT:    pxor %xmm2, %xmm2
; X86-SSSE3-NEXT:    psadbw %xmm2, %xmm3
; X86-SSSE3-NEXT:    movd %xmm3, %ecx
; X86-SSSE3-NEXT:    movq {{.*#+}} xmm3 = mem[0],zero
; X86-SSSE3-NEXT:    movdqa %xmm3, %xmm4
; X86-SSSE3-NEXT:    pand %xmm1, %xmm4
; X86-SSSE3-NEXT:    movdqa %xmm0, %xmm5
; X86-SSSE3-NEXT:    pshufb %xmm4, %xmm5
; X86-SSSE3-NEXT:    psrlw $4, %xmm3
; X86-SSSE3-NEXT:    pand %xmm1, %xmm3
; X86-SSSE3-NEXT:    pshufb %xmm3, %xmm0
; X86-SSSE3-NEXT:    paddb %xmm5, %xmm0
; X86-SSSE3-NEXT:    psadbw %xmm2, %xmm0
; X86-SSSE3-NEXT:    movd %xmm0, %edx
; X86-SSSE3-NEXT:    addl %ecx, %edx
; X86-SSSE3-NEXT:    xorl %ecx, %ecx
; X86-SSSE3-NEXT:    movl %ecx, 12(%eax)
; X86-SSSE3-NEXT:    movl %ecx, 8(%eax)
; X86-SSSE3-NEXT:    movl %ecx, 4(%eax)
; X86-SSSE3-NEXT:    movl %edx, (%eax)
; X86-SSSE3-NEXT:    retl $4
  %cnt = tail call i128 @llvm.ctpop.i128(i128 %x)
  ret i128 %cnt
}

define i32 @popcount_zext_i32(i16 zeroext %x) {
; X86-LABEL: popcount_zext_i32:
; X86:       # %bb.0:
; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl %eax, %ecx
; X86-NEXT:    shrl %ecx
; X86-NEXT:    andl $21845, %ecx # imm = 0x5555
; X86-NEXT:    subl %ecx, %eax
; X86-NEXT:    movl %eax, %ecx
; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT:    shrl $2, %eax
; X86-NEXT:    andl $858993459, %eax # imm = 0x33333333
; X86-NEXT:    addl %ecx, %eax
; X86-NEXT:    movl %eax, %ecx
; X86-NEXT:    shrl $4, %ecx
; X86-NEXT:    addl %eax, %ecx
; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT:    imull $16843009, %ecx, %eax # imm = 0x1010101
; X86-NEXT:    shrl $24, %eax
; X86-NEXT:    retl
;
; X64-BASE-LABEL: popcount_zext_i32:
; X64-BASE:       # %bb.0:
; X64-BASE-NEXT:    movl %edi, %eax
; X64-BASE-NEXT:    shrl %eax
; X64-BASE-NEXT:    andl $21845, %eax # imm = 0x5555
; X64-BASE-NEXT:    subl %eax, %edi
; X64-BASE-NEXT:    movl %edi, %eax
; X64-BASE-NEXT:    andl $858993459, %eax # imm = 0x33333333
; X64-BASE-NEXT:    shrl $2, %edi
; X64-BASE-NEXT:    andl $858993459, %edi # imm = 0x33333333
; X64-BASE-NEXT:    addl %eax, %edi
; X64-BASE-NEXT:    movl %edi, %eax
; X64-BASE-NEXT:    shrl $4, %eax
; X64-BASE-NEXT:    addl %edi, %eax
; X64-BASE-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
; X64-BASE-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
; X64-BASE-NEXT:    shrl $24, %eax
; X64-BASE-NEXT:    retq
;
; X86-POPCNT-LABEL: popcount_zext_i32:
; X86-POPCNT:       # %bb.0:
; X86-POPCNT-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
; X86-POPCNT-NEXT:    popcntl %eax, %eax
; X86-POPCNT-NEXT:    retl
;
; X64-POPCNT-LABEL: popcount_zext_i32:
; X64-POPCNT:       # %bb.0:
; X64-POPCNT-NEXT:    popcntl %edi, %eax
; X64-POPCNT-NEXT:    retq
;
; X64-NDD-LABEL: popcount_zext_i32:
; X64-NDD:       # %bb.0:
; X64-NDD-NEXT:    shrl %edi, %eax
; X64-NDD-NEXT:    andl $21845, %eax # imm = 0x5555
; X64-NDD-NEXT:    subl %eax, %edi
; X64-NDD-NEXT:    andl $858993459, %edi, %eax # imm = 0x33333333
; X64-NDD-NEXT:    shrl $2, %edi
; X64-NDD-NEXT:    andl $858993459, %edi # imm = 0x33333333
; X64-NDD-NEXT:    addl %edi, %eax
; X64-NDD-NEXT:    shrl $4, %eax, %ecx
; X64-NDD-NEXT:    addl %ecx, %eax
; X64-NDD-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
; X64-NDD-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
; X64-NDD-NEXT:    shrl $24, %eax
; X64-NDD-NEXT:    retq
  %z = zext i16 %x to i32
  %cnt = tail call i32 @llvm.ctpop.i32(i32 %z)
  ret i32 %cnt
}

define i32 @popcount_i16_zext(i16 zeroext %x) {
; X86-LABEL: popcount_i16_zext:
; X86:       # %bb.0:
; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl %eax, %ecx
; X86-NEXT:    shrl %ecx
; X86-NEXT:    andl $21845, %ecx # imm = 0x5555
; X86-NEXT:    subl %ecx, %eax
; X86-NEXT:    movl %eax, %ecx
; X86-NEXT:    andl $13107, %ecx # imm = 0x3333
; X86-NEXT:    shrl $2, %eax
; X86-NEXT:    andl $13107, %eax # imm = 0x3333
; X86-NEXT:    addl %ecx, %eax
; X86-NEXT:    movl %eax, %ecx
; X86-NEXT:    shrl $4, %ecx
; X86-NEXT:    addl %eax, %ecx
; X86-NEXT:    andl $3855, %ecx # imm = 0xF0F
; X86-NEXT:    movl %ecx, %eax
; X86-NEXT:    shrl $8, %eax
; X86-NEXT:    addl %ecx, %eax
; X86-NEXT:    movzbl %al, %eax
; X86-NEXT:    retl
;
; X64-BASE-LABEL: popcount_i16_zext:
; X64-BASE:       # %bb.0:
; X64-BASE-NEXT:    movl %edi, %eax
; X64-BASE-NEXT:    shrl %eax
; X64-BASE-NEXT:    andl $21845, %eax # imm = 0x5555
; X64-BASE-NEXT:    subl %eax, %edi
; X64-BASE-NEXT:    movl %edi, %eax
; X64-BASE-NEXT:    andl $13107, %eax # imm = 0x3333
; X64-BASE-NEXT:    shrl $2, %edi
; X64-BASE-NEXT:    andl $13107, %edi # imm = 0x3333
; X64-BASE-NEXT:    addl %eax, %edi
; X64-BASE-NEXT:    movl %edi, %eax
; X64-BASE-NEXT:    shrl $4, %eax
; X64-BASE-NEXT:    addl %edi, %eax
; X64-BASE-NEXT:    andl $3855, %eax # imm = 0xF0F
; X64-BASE-NEXT:    movl %eax, %ecx
; X64-BASE-NEXT:    shrl $8, %ecx
; X64-BASE-NEXT:    addl %eax, %ecx
; X64-BASE-NEXT:    movzbl %cl, %eax
; X64-BASE-NEXT:    retq
;
; X86-POPCNT-LABEL: popcount_i16_zext:
; X86-POPCNT:       # %bb.0:
; X86-POPCNT-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
; X86-POPCNT-NEXT:    popcntl %eax, %eax
; X86-POPCNT-NEXT:    retl
;
; X64-POPCNT-LABEL: popcount_i16_zext:
; X64-POPCNT:       # %bb.0:
; X64-POPCNT-NEXT:    popcntl %edi, %eax
; X64-POPCNT-NEXT:    retq
;
; X64-NDD-LABEL: popcount_i16_zext:
; X64-NDD:       # %bb.0:
; X64-NDD-NEXT:    shrw %di, %ax
; X64-NDD-NEXT:    andw $21845, %ax # imm = 0x5555
; X64-NDD-NEXT:    subw %ax, %di, %ax
; X64-NDD-NEXT:    andw $13107, %ax, %cx # imm = 0x3333
; X64-NDD-NEXT:    shrw $2, %ax
; X64-NDD-NEXT:    andw $13107, %ax # imm = 0x3333
; X64-NDD-NEXT:    addw %cx, %ax
; X64-NDD-NEXT:    shrw $4, %ax, %cx
; X64-NDD-NEXT:    addw %cx, %ax
; X64-NDD-NEXT:    andw $3855, %ax # imm = 0xF0F
; X64-NDD-NEXT:    movzbl %ah, %ecx
; X64-NDD-NEXT:    addw %cx, %ax
; X64-NDD-NEXT:    movzbl %al, %eax
; X64-NDD-NEXT:    movzwl %ax, %eax
; X64-NDD-NEXT:    retq
  %cnt = tail call i16 @llvm.ctpop.i16(i16 %x)
  %z = zext i16 %cnt to i32
  ret i32 %z
}

declare i8 @llvm.ctpop.i8(i8) nounwind readnone
declare i16 @llvm.ctpop.i16(i16) nounwind readnone
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
declare i64 @llvm.ctpop.i64(i64) nounwind readnone
declare i128 @llvm.ctpop.i128(i128) nounwind readnone

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}