; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unkown-unknown -mattr=+kl,widekl | FileCheck %s --check-prefix=X64
; RUN: llc < %s -verify-machineinstrs -mtriple=i386-unkown-unknown -mattr=+kl,widekl -mattr=+avx2 | FileCheck %s --check-prefix=X86
; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unkown-unknown -mattr=+widekl | FileCheck %s --check-prefix=X64
; RUN: llc < %s -verify-machineinstrs -mtriple=i386-unkown-unknown -mattr=+widekl -mattr=+avx2 | FileCheck %s --check-prefix=X86
declare void @llvm.x86.loadiwkey(<2 x i64>, <2 x i64>, <2 x i64>, i32)
declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32, <2 x i64>)
declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32, <2 x i64>, <2 x i64>)
declare { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64>, ptr)
declare { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64>, ptr)
declare { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64>, ptr)
declare { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64>, ptr)
declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(ptr, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(ptr, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(ptr, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(ptr, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
define void @test_loadiwkey(i32 %ctl, <2 x i64> %intkey, <2 x i64> %enkey_lo, <2 x i64> %enkey_hi) {
; X64-LABEL: test_loadiwkey:
; X64: # %bb.0: # %entry
; X64-NEXT: movl %edi, %eax
; X64-NEXT: loadiwkey %xmm2, %xmm1
; X64-NEXT: retq
;
; X86-LABEL: test_loadiwkey:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: loadiwkey %xmm2, %xmm1
; X86-NEXT: retl
entry:
tail call void @llvm.x86.loadiwkey(<2 x i64> %intkey, <2 x i64> %enkey_lo, <2 x i64> %enkey_hi, i32 %ctl)
ret void
}
define i32 @test_encodekey128_u32(i32 %htype, <2 x i64> %key, ptr nocapture %h0, ptr nocapture %h1, ptr nocapture %h2, ptr nocapture %h3, ptr nocapture %h4, ptr nocapture %h5) nounwind {
; X64-LABEL: test_encodekey128_u32:
; X64: # %bb.0: # %entry
; X64-NEXT: encodekey128 %edi, %eax
; X64-NEXT: movaps %xmm0, (%rsi)
; X64-NEXT: movaps %xmm1, (%rdx)
; X64-NEXT: movaps %xmm2, (%rcx)
; X64-NEXT: retq
;
; X86-LABEL: test_encodekey128_u32:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: encodekey128 %eax, %eax
; X86-NEXT: vmovaps %xmm0, (%esi)
; X86-NEXT: vmovaps %xmm1, (%edx)
; X86-NEXT: vmovaps %xmm2, (%ecx)
; X86-NEXT: popl %esi
; X86-NEXT: retl
entry:
%0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32 %htype, <2 x i64> %key)
%1 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
store <2 x i64> %1, ptr %h0, align 16
%2 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
store <2 x i64> %2, ptr %h1, align 16
%3 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
store <2 x i64> %3, ptr %h2, align 16
%4 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
ret i32 %4
}
define i32 @test_encodekey256_u32(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi, ptr nocapture %h0, ptr nocapture %h1, ptr nocapture %h2, ptr nocapture %h3, ptr nocapture %h4, ptr nocapture %h5, ptr nocapture readnone %h6) nounwind {
; X64-LABEL: test_encodekey256_u32:
; X64: # %bb.0: # %entry
; X64-NEXT: encodekey256 %edi, %eax
; X64-NEXT: movaps %xmm0, (%rsi)
; X64-NEXT: movaps %xmm1, (%rdx)
; X64-NEXT: movaps %xmm2, (%rcx)
; X64-NEXT: movaps %xmm3, (%r8)
; X64-NEXT: retq
;
; X86-LABEL: test_encodekey256_u32:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: encodekey256 %eax, %eax
; X86-NEXT: vmovaps %xmm0, (%edi)
; X86-NEXT: vmovaps %xmm1, (%esi)
; X86-NEXT: vmovaps %xmm2, (%edx)
; X86-NEXT: vmovaps %xmm3, (%ecx)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl
entry:
%0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi)
%1 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
store <2 x i64> %1, ptr %h0, align 16
%2 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
store <2 x i64> %2, ptr %h1, align 16
%3 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
store <2 x i64> %3, ptr %h2, align 16
%4 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4
store <2 x i64> %4, ptr %h3, align 16
%5 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
ret i32 %5
}
define i8 @test_mm_aesenc128kl_u8(<2 x i64> %data, ptr %h, ptr %out) {
; X64-LABEL: test_mm_aesenc128kl_u8:
; X64: # %bb.0: # %entry
; X64-NEXT: aesenc128kl (%rdi), %xmm0
; X64-NEXT: sete %al
; X64-NEXT: movaps %xmm0, (%rsi)
; X64-NEXT: retq
;
; X86-LABEL: test_mm_aesenc128kl_u8:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: aesenc128kl (%eax), %xmm0
; X86-NEXT: sete %al
; X86-NEXT: vmovaps %xmm0, (%ecx)
; X86-NEXT: retl
entry:
%0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64> %data, ptr %h)
%1 = extractvalue { i8, <2 x i64> } %0, 1
store <2 x i64> %1, ptr %out
%2 = extractvalue { i8, <2 x i64> } %0, 0
ret i8 %2
}
define i8 @test_mm_aesdec128kl_u8(<2 x i64> %data, ptr %h, ptr %out) {
; X64-LABEL: test_mm_aesdec128kl_u8:
; X64: # %bb.0: # %entry
; X64-NEXT: aesdec128kl (%rdi), %xmm0
; X64-NEXT: sete %al
; X64-NEXT: movaps %xmm0, (%rsi)
; X64-NEXT: retq
;
; X86-LABEL: test_mm_aesdec128kl_u8:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: aesdec128kl (%eax), %xmm0
; X86-NEXT: sete %al
; X86-NEXT: vmovaps %xmm0, (%ecx)
; X86-NEXT: retl
entry:
%0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64> %data, ptr %h)
%1 = extractvalue { i8, <2 x i64> } %0, 1
store <2 x i64> %1, ptr %out
%2 = extractvalue { i8, <2 x i64> } %0, 0
ret i8 %2
}
define i8 @test_mm_aesenc256kl_u8(<2 x i64> %data, ptr %h, ptr %out) {
; X64-LABEL: test_mm_aesenc256kl_u8:
; X64: # %bb.0: # %entry
; X64-NEXT: aesenc256kl (%rdi), %xmm0
; X64-NEXT: sete %al
; X64-NEXT: movaps %xmm0, (%rsi)
; X64-NEXT: retq
;
; X86-LABEL: test_mm_aesenc256kl_u8:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: aesenc256kl (%eax), %xmm0
; X86-NEXT: sete %al
; X86-NEXT: vmovaps %xmm0, (%ecx)
; X86-NEXT: retl
entry:
%0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> %data, ptr %h)
%1 = extractvalue { i8, <2 x i64> } %0, 1
store <2 x i64> %1, ptr %out
%2 = extractvalue { i8, <2 x i64> } %0, 0
ret i8 %2
}
define i8 @test_mm_aesdec256kl_u8(<2 x i64> %data, ptr %h, ptr %out) {
; X64-LABEL: test_mm_aesdec256kl_u8:
; X64: # %bb.0: # %entry
; X64-NEXT: aesdec256kl (%rdi), %xmm0
; X64-NEXT: sete %al
; X64-NEXT: movaps %xmm0, (%rsi)
; X64-NEXT: retq
;
; X86-LABEL: test_mm_aesdec256kl_u8:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: aesdec256kl (%eax), %xmm0
; X86-NEXT: sete %al
; X86-NEXT: vmovaps %xmm0, (%ecx)
; X86-NEXT: retl
entry:
%0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64> %data, ptr %h)
%1 = extractvalue { i8, <2 x i64> } %0, 1
store <2 x i64> %1, ptr %out
%2 = extractvalue { i8, <2 x i64> } %0, 0
ret i8 %2
}
define i8 @test_mm_aesencwide128kl_u8(ptr %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7, ptr %out0, ptr %out1, ptr %out2, ptr %out3, ptr %out4, ptr %out5, ptr %out6, ptr %out7) nounwind {
; X64-LABEL: test_mm_aesencwide128kl_u8:
; X64: # %bb.0: # %entry
; X64-NEXT: pushq %rbx
; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10
; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx
; X64-NEXT: aesencwide128kl (%rdi)
; X64-NEXT: sete %al
; X64-NEXT: movaps %xmm0, (%rsi)
; X64-NEXT: movaps %xmm1, (%rdx)
; X64-NEXT: movaps %xmm1, (%rcx)
; X64-NEXT: movaps %xmm1, (%r8)
; X64-NEXT: movaps %xmm1, (%r9)
; X64-NEXT: movaps %xmm1, (%rbx)
; X64-NEXT: movaps %xmm1, (%r11)
; X64-NEXT: movaps %xmm1, (%r10)
; X64-NEXT: popq %rbx
; X64-NEXT: retq
;
; X86-LABEL: test_mm_aesencwide128kl_u8:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $16, %esp
; X86-NEXT: vmovaps 24(%ebp), %xmm3
; X86-NEXT: vmovaps 40(%ebp), %xmm4
; X86-NEXT: vmovaps 56(%ebp), %xmm5
; X86-NEXT: vmovaps 72(%ebp), %xmm6
; X86-NEXT: vmovaps 88(%ebp), %xmm7
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: aesencwide128kl (%eax)
; X86-NEXT: movl 104(%ebp), %eax
; X86-NEXT: vmovaps %xmm0, (%eax)
; X86-NEXT: movl 108(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 112(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 116(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 120(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 124(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 128(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 132(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: sete %al
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: retl
entry:
%0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(ptr %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7)
%1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
store <2 x i64> %1, ptr %out0
%2 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
store <2 x i64> %2, ptr %out1
%3 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
store <2 x i64> %2, ptr %out2
%4 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4
store <2 x i64> %2, ptr %out3
%5 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5
store <2 x i64> %2, ptr %out4
%6 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6
store <2 x i64> %2, ptr %out5
%7 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7
store <2 x i64> %2, ptr %out6
%8 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 8
store <2 x i64> %2, ptr %out7
%9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
ret i8 %9
}
define i8 @test_mm_aesdecwide128kl_u8(ptr %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7, ptr %out0, ptr %out1, ptr %out2, ptr %out3, ptr %out4, ptr %out5, ptr %out6, ptr %out7) nounwind {
; X64-LABEL: test_mm_aesdecwide128kl_u8:
; X64: # %bb.0: # %entry
; X64-NEXT: pushq %rbx
; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10
; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx
; X64-NEXT: aesdecwide128kl (%rdi)
; X64-NEXT: sete %al
; X64-NEXT: movaps %xmm0, (%rsi)
; X64-NEXT: movaps %xmm1, (%rdx)
; X64-NEXT: movaps %xmm1, (%rcx)
; X64-NEXT: movaps %xmm1, (%r8)
; X64-NEXT: movaps %xmm1, (%r9)
; X64-NEXT: movaps %xmm1, (%rbx)
; X64-NEXT: movaps %xmm1, (%r11)
; X64-NEXT: movaps %xmm1, (%r10)
; X64-NEXT: popq %rbx
; X64-NEXT: retq
;
; X86-LABEL: test_mm_aesdecwide128kl_u8:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $16, %esp
; X86-NEXT: vmovaps 24(%ebp), %xmm3
; X86-NEXT: vmovaps 40(%ebp), %xmm4
; X86-NEXT: vmovaps 56(%ebp), %xmm5
; X86-NEXT: vmovaps 72(%ebp), %xmm6
; X86-NEXT: vmovaps 88(%ebp), %xmm7
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: aesdecwide128kl (%eax)
; X86-NEXT: movl 104(%ebp), %eax
; X86-NEXT: vmovaps %xmm0, (%eax)
; X86-NEXT: movl 108(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 112(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 116(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 120(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 124(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 128(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 132(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: sete %al
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: retl
entry:
%0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(ptr %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7)
%1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
store <2 x i64> %1, ptr %out0
%2 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
store <2 x i64> %2, ptr %out1
%3 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
store <2 x i64> %2, ptr %out2
%4 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4
store <2 x i64> %2, ptr %out3
%5 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5
store <2 x i64> %2, ptr %out4
%6 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6
store <2 x i64> %2, ptr %out5
%7 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7
store <2 x i64> %2, ptr %out6
%8 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 8
store <2 x i64> %2, ptr %out7
%9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
ret i8 %9
}
define i8 @test_mm_aesencwide256kl_u8(ptr %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7, ptr %out0, ptr %out1, ptr %out2, ptr %out3, ptr %out4, ptr %out5, ptr %out6, ptr %out7) nounwind {
; X64-LABEL: test_mm_aesencwide256kl_u8:
; X64: # %bb.0: # %entry
; X64-NEXT: pushq %rbx
; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10
; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx
; X64-NEXT: aesencwide256kl (%rdi)
; X64-NEXT: sete %al
; X64-NEXT: movaps %xmm0, (%rsi)
; X64-NEXT: movaps %xmm1, (%rdx)
; X64-NEXT: movaps %xmm1, (%rcx)
; X64-NEXT: movaps %xmm1, (%r8)
; X64-NEXT: movaps %xmm1, (%r9)
; X64-NEXT: movaps %xmm1, (%rbx)
; X64-NEXT: movaps %xmm1, (%r11)
; X64-NEXT: movaps %xmm1, (%r10)
; X64-NEXT: popq %rbx
; X64-NEXT: retq
;
; X86-LABEL: test_mm_aesencwide256kl_u8:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $16, %esp
; X86-NEXT: vmovaps 24(%ebp), %xmm3
; X86-NEXT: vmovaps 40(%ebp), %xmm4
; X86-NEXT: vmovaps 56(%ebp), %xmm5
; X86-NEXT: vmovaps 72(%ebp), %xmm6
; X86-NEXT: vmovaps 88(%ebp), %xmm7
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: aesencwide256kl (%eax)
; X86-NEXT: movl 104(%ebp), %eax
; X86-NEXT: vmovaps %xmm0, (%eax)
; X86-NEXT: movl 108(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 112(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 116(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 120(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 124(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 128(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 132(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: sete %al
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: retl
entry:
%0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(ptr %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7)
%1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
store <2 x i64> %1, ptr %out0
%2 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
store <2 x i64> %2, ptr %out1
%3 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
store <2 x i64> %2, ptr %out2
%4 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4
store <2 x i64> %2, ptr %out3
%5 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5
store <2 x i64> %2, ptr %out4
%6 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6
store <2 x i64> %2, ptr %out5
%7 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7
store <2 x i64> %2, ptr %out6
%8 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 8
store <2 x i64> %2, ptr %out7
%9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
ret i8 %9
}
define i8 @test_mm_aesdecwide256kl_u8(ptr %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7, ptr %out0, ptr %out1, ptr %out2, ptr %out3, ptr %out4, ptr %out5, ptr %out6, ptr %out7) nounwind {
; X64-LABEL: test_mm_aesdecwide256kl_u8:
; X64: # %bb.0: # %entry
; X64-NEXT: pushq %rbx
; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10
; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx
; X64-NEXT: aesdecwide256kl (%rdi)
; X64-NEXT: sete %al
; X64-NEXT: movaps %xmm0, (%rsi)
; X64-NEXT: movaps %xmm1, (%rdx)
; X64-NEXT: movaps %xmm1, (%rcx)
; X64-NEXT: movaps %xmm1, (%r8)
; X64-NEXT: movaps %xmm1, (%r9)
; X64-NEXT: movaps %xmm1, (%rbx)
; X64-NEXT: movaps %xmm1, (%r11)
; X64-NEXT: movaps %xmm1, (%r10)
; X64-NEXT: popq %rbx
; X64-NEXT: retq
;
; X86-LABEL: test_mm_aesdecwide256kl_u8:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $16, %esp
; X86-NEXT: vmovaps 24(%ebp), %xmm3
; X86-NEXT: vmovaps 40(%ebp), %xmm4
; X86-NEXT: vmovaps 56(%ebp), %xmm5
; X86-NEXT: vmovaps 72(%ebp), %xmm6
; X86-NEXT: vmovaps 88(%ebp), %xmm7
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: aesdecwide256kl (%eax)
; X86-NEXT: movl 104(%ebp), %eax
; X86-NEXT: vmovaps %xmm0, (%eax)
; X86-NEXT: movl 108(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 112(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 116(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 120(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 124(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 128(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 132(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: sete %al
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: retl
entry:
%0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(ptr %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7)
%1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
store <2 x i64> %1, ptr %out0
%2 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
store <2 x i64> %2, ptr %out1
%3 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
store <2 x i64> %2, ptr %out2
%4 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4
store <2 x i64> %2, ptr %out3
%5 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5
store <2 x i64> %2, ptr %out4
%6 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6
store <2 x i64> %2, ptr %out5
%7 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7
store <2 x i64> %2, ptr %out6
%8 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 8
store <2 x i64> %2, ptr %out7
%9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
ret i8 %9
}
; Tests to make sure we can select an appropriate addressing mode for a global.
@foo = external dso_local global [64 x i8]
define i8 @test_mm_aesenc256kl_u8_global(<2 x i64> %data, ptr %out) {
; X64-LABEL: test_mm_aesenc256kl_u8_global:
; X64: # %bb.0: # %entry
; X64-NEXT: aesenc256kl foo(%rip), %xmm0
; X64-NEXT: sete %al
; X64-NEXT: movaps %xmm0, (%rdi)
; X64-NEXT: retq
;
; X86-LABEL: test_mm_aesenc256kl_u8_global:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: aesenc256kl foo, %xmm0
; X86-NEXT: sete %al
; X86-NEXT: vmovaps %xmm0, (%ecx)
; X86-NEXT: retl
entry:
%0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> %data, ptr @foo)
%1 = extractvalue { i8, <2 x i64> } %0, 1
store <2 x i64> %1, ptr %out
%2 = extractvalue { i8, <2 x i64> } %0, 0
ret i8 %2
}
define i8 @test_mm_aesdecwide256kl_u8_global(<2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7, ptr %out0, ptr %out1, ptr %out2, ptr %out3, ptr %out4, ptr %out5, ptr %out6, ptr %out7) nounwind {
; X64-LABEL: test_mm_aesdecwide256kl_u8_global:
; X64: # %bb.0: # %entry
; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10
; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11
; X64-NEXT: aesdecwide256kl foo(%rip)
; X64-NEXT: sete %al
; X64-NEXT: movaps %xmm0, (%rdi)
; X64-NEXT: movaps %xmm1, (%rsi)
; X64-NEXT: movaps %xmm1, (%rdx)
; X64-NEXT: movaps %xmm1, (%rcx)
; X64-NEXT: movaps %xmm1, (%r8)
; X64-NEXT: movaps %xmm1, (%r9)
; X64-NEXT: movaps %xmm1, (%r11)
; X64-NEXT: movaps %xmm1, (%r10)
; X64-NEXT: retq
;
; X86-LABEL: test_mm_aesdecwide256kl_u8_global:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $16, %esp
; X86-NEXT: movl 88(%ebp), %eax
; X86-NEXT: vmovaps 8(%ebp), %xmm3
; X86-NEXT: vmovaps 24(%ebp), %xmm4
; X86-NEXT: vmovaps 40(%ebp), %xmm5
; X86-NEXT: vmovaps 56(%ebp), %xmm6
; X86-NEXT: vmovaps 72(%ebp), %xmm7
; X86-NEXT: aesdecwide256kl foo
; X86-NEXT: vmovaps %xmm0, (%eax)
; X86-NEXT: movl 92(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 96(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 100(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 104(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 108(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 112(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: movl 116(%ebp), %eax
; X86-NEXT: vmovaps %xmm1, (%eax)
; X86-NEXT: sete %al
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: retl
entry:
%0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(ptr @foo, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7)
%1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
store <2 x i64> %1, ptr %out0
%2 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
store <2 x i64> %2, ptr %out1
%3 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
store <2 x i64> %2, ptr %out2
%4 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4
store <2 x i64> %2, ptr %out3
%5 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5
store <2 x i64> %2, ptr %out4
%6 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6
store <2 x i64> %2, ptr %out5
%7 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7
store <2 x i64> %2, ptr %out6
%8 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 8
store <2 x i64> %2, ptr %out7
%9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
ret i8 %9
}