llvm/llvm/test/CodeGen/X86/avoid-sfb-overlaps.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-linux -mcpu=x86-64 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK
; RUN: llc < %s -mtriple=x86_64-linux -mcpu=x86-64 --x86-disable-avoid-SFB -verify-machineinstrs | FileCheck %s --check-prefix=DISABLED
; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core-avx2 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX2
; RUN: llc < %s -mtriple=x86_64-linux -mcpu=skx -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX512

; ModuleID = '../testSFB/testOverlapBlocks.c'
source_filename = "../testSFB/testOverlapBlocks.c"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: nounwind uwtable
define dso_local void @test_overlap_1(ptr nocapture %A, i32 %x) local_unnamed_addr #0 {
; CHECK-LABEL: test_overlap_1:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    movl $7, -8(%rdi)
; CHECK-NEXT:    movq -16(%rdi), %rax
; CHECK-NEXT:    movq %rax, (%rdi)
; CHECK-NEXT:    movl -8(%rdi), %eax
; CHECK-NEXT:    movl %eax, 8(%rdi)
; CHECK-NEXT:    movl -4(%rdi), %eax
; CHECK-NEXT:    movl %eax, 12(%rdi)
; CHECK-NEXT:    movslq %esi, %rax
; CHECK-NEXT:    movq %rax, -9(%rdi)
; CHECK-NEXT:    movq %rax, -16(%rdi)
; CHECK-NEXT:    movb $0, -1(%rdi)
; CHECK-NEXT:    movq -16(%rdi), %rax
; CHECK-NEXT:    movq %rax, 16(%rdi)
; CHECK-NEXT:    movl -8(%rdi), %eax
; CHECK-NEXT:    movl %eax, 24(%rdi)
; CHECK-NEXT:    movzwl -4(%rdi), %eax
; CHECK-NEXT:    movw %ax, 28(%rdi)
; CHECK-NEXT:    movzbl -2(%rdi), %eax
; CHECK-NEXT:    movb %al, 30(%rdi)
; CHECK-NEXT:    movzbl -1(%rdi), %eax
; CHECK-NEXT:    movb %al, 31(%rdi)
; CHECK-NEXT:    retq
;
; DISABLED-LABEL: test_overlap_1:
; DISABLED:       # %bb.0: # %entry
; DISABLED-NEXT:    movl $7, -8(%rdi)
; DISABLED-NEXT:    movups -16(%rdi), %xmm0
; DISABLED-NEXT:    movups %xmm0, (%rdi)
; DISABLED-NEXT:    movslq %esi, %rax
; DISABLED-NEXT:    movq %rax, -9(%rdi)
; DISABLED-NEXT:    movq %rax, -16(%rdi)
; DISABLED-NEXT:    movb $0, -1(%rdi)
; DISABLED-NEXT:    movups -16(%rdi), %xmm0
; DISABLED-NEXT:    movups %xmm0, 16(%rdi)
; DISABLED-NEXT:    retq
;
; CHECK-AVX2-LABEL: test_overlap_1:
; CHECK-AVX2:       # %bb.0: # %entry
; CHECK-AVX2-NEXT:    movl $7, -8(%rdi)
; CHECK-AVX2-NEXT:    movq -16(%rdi), %rax
; CHECK-AVX2-NEXT:    movq %rax, (%rdi)
; CHECK-AVX2-NEXT:    movl -8(%rdi), %eax
; CHECK-AVX2-NEXT:    movl %eax, 8(%rdi)
; CHECK-AVX2-NEXT:    movl -4(%rdi), %eax
; CHECK-AVX2-NEXT:    movl %eax, 12(%rdi)
; CHECK-AVX2-NEXT:    movslq %esi, %rax
; CHECK-AVX2-NEXT:    movq %rax, -9(%rdi)
; CHECK-AVX2-NEXT:    movq %rax, -16(%rdi)
; CHECK-AVX2-NEXT:    movb $0, -1(%rdi)
; CHECK-AVX2-NEXT:    movq -16(%rdi), %rax
; CHECK-AVX2-NEXT:    movq %rax, 16(%rdi)
; CHECK-AVX2-NEXT:    movl -8(%rdi), %eax
; CHECK-AVX2-NEXT:    movl %eax, 24(%rdi)
; CHECK-AVX2-NEXT:    movzwl -4(%rdi), %eax
; CHECK-AVX2-NEXT:    movw %ax, 28(%rdi)
; CHECK-AVX2-NEXT:    movzbl -2(%rdi), %eax
; CHECK-AVX2-NEXT:    movb %al, 30(%rdi)
; CHECK-AVX2-NEXT:    movzbl -1(%rdi), %eax
; CHECK-AVX2-NEXT:    movb %al, 31(%rdi)
; CHECK-AVX2-NEXT:    retq
;
; CHECK-AVX512-LABEL: test_overlap_1:
; CHECK-AVX512:       # %bb.0: # %entry
; CHECK-AVX512-NEXT:    movl $7, -8(%rdi)
; CHECK-AVX512-NEXT:    movq -16(%rdi), %rax
; CHECK-AVX512-NEXT:    movq %rax, (%rdi)
; CHECK-AVX512-NEXT:    movl -8(%rdi), %eax
; CHECK-AVX512-NEXT:    movl %eax, 8(%rdi)
; CHECK-AVX512-NEXT:    movl -4(%rdi), %eax
; CHECK-AVX512-NEXT:    movl %eax, 12(%rdi)
; CHECK-AVX512-NEXT:    movslq %esi, %rax
; CHECK-AVX512-NEXT:    movq %rax, -9(%rdi)
; CHECK-AVX512-NEXT:    movq %rax, -16(%rdi)
; CHECK-AVX512-NEXT:    movb $0, -1(%rdi)
; CHECK-AVX512-NEXT:    movq -16(%rdi), %rax
; CHECK-AVX512-NEXT:    movq %rax, 16(%rdi)
; CHECK-AVX512-NEXT:    movl -8(%rdi), %eax
; CHECK-AVX512-NEXT:    movl %eax, 24(%rdi)
; CHECK-AVX512-NEXT:    movzwl -4(%rdi), %eax
; CHECK-AVX512-NEXT:    movw %ax, 28(%rdi)
; CHECK-AVX512-NEXT:    movzbl -2(%rdi), %eax
; CHECK-AVX512-NEXT:    movb %al, 30(%rdi)
; CHECK-AVX512-NEXT:    movzbl -1(%rdi), %eax
; CHECK-AVX512-NEXT:    movb %al, 31(%rdi)
; CHECK-AVX512-NEXT:    retq
entry:
  %add.ptr = getelementptr inbounds i8, ptr %A, i64 -16
  %add.ptr1 = getelementptr inbounds i8, ptr %A, i64 -8
  store i32 7, ptr %add.ptr1, align 4
  tail call void @llvm.memcpy.p0.p0.i64(ptr align 4 %A, ptr nonnull align 4 %add.ptr, i64 16, i1 false)
  %conv = sext i32 %x to i64
  %add.ptr2 = getelementptr inbounds i8, ptr %A, i64 -9
  store i64 %conv, ptr %add.ptr2, align 8
  store i64 %conv, ptr %add.ptr, align 8
  %add.ptr5 = getelementptr inbounds i8, ptr %A, i64 -1
  store i8 0, ptr %add.ptr5, align 1
  %add.ptr6 = getelementptr inbounds i8, ptr %A, i64 16
  tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 4 %add.ptr6, ptr nonnull align 4 %add.ptr, i64 16, i1 false)
  ret void
}

; Function Attrs: argmemonly nounwind
declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1) #1

; Function Attrs: nounwind uwtable
define dso_local void @test_overlap_2(ptr nocapture %A, i32 %x) local_unnamed_addr #0 {
; CHECK-LABEL: test_overlap_2:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    movslq %esi, %rax
; CHECK-NEXT:    movq %rax, -16(%rdi)
; CHECK-NEXT:    movq -16(%rdi), %rcx
; CHECK-NEXT:    movq %rcx, (%rdi)
; CHECK-NEXT:    movq -8(%rdi), %rcx
; CHECK-NEXT:    movq %rcx, 8(%rdi)
; CHECK-NEXT:    movq %rax, -8(%rdi)
; CHECK-NEXT:    movl $7, -12(%rdi)
; CHECK-NEXT:    movl -16(%rdi), %eax
; CHECK-NEXT:    movl %eax, 16(%rdi)
; CHECK-NEXT:    movl -12(%rdi), %eax
; CHECK-NEXT:    movl %eax, 20(%rdi)
; CHECK-NEXT:    movq -8(%rdi), %rax
; CHECK-NEXT:    movq %rax, 24(%rdi)
; CHECK-NEXT:    retq
;
; DISABLED-LABEL: test_overlap_2:
; DISABLED:       # %bb.0: # %entry
; DISABLED-NEXT:    movslq %esi, %rax
; DISABLED-NEXT:    movq %rax, -16(%rdi)
; DISABLED-NEXT:    movups -16(%rdi), %xmm0
; DISABLED-NEXT:    movups %xmm0, (%rdi)
; DISABLED-NEXT:    movq %rax, -8(%rdi)
; DISABLED-NEXT:    movl $7, -12(%rdi)
; DISABLED-NEXT:    movups -16(%rdi), %xmm0
; DISABLED-NEXT:    movups %xmm0, 16(%rdi)
; DISABLED-NEXT:    retq
;
; CHECK-AVX2-LABEL: test_overlap_2:
; CHECK-AVX2:       # %bb.0: # %entry
; CHECK-AVX2-NEXT:    movslq %esi, %rax
; CHECK-AVX2-NEXT:    movq %rax, -16(%rdi)
; CHECK-AVX2-NEXT:    movq -16(%rdi), %rcx
; CHECK-AVX2-NEXT:    movq %rcx, (%rdi)
; CHECK-AVX2-NEXT:    movq -8(%rdi), %rcx
; CHECK-AVX2-NEXT:    movq %rcx, 8(%rdi)
; CHECK-AVX2-NEXT:    movq %rax, -8(%rdi)
; CHECK-AVX2-NEXT:    movl $7, -12(%rdi)
; CHECK-AVX2-NEXT:    movl -16(%rdi), %eax
; CHECK-AVX2-NEXT:    movl %eax, 16(%rdi)
; CHECK-AVX2-NEXT:    movl -12(%rdi), %eax
; CHECK-AVX2-NEXT:    movl %eax, 20(%rdi)
; CHECK-AVX2-NEXT:    movq -8(%rdi), %rax
; CHECK-AVX2-NEXT:    movq %rax, 24(%rdi)
; CHECK-AVX2-NEXT:    retq
;
; CHECK-AVX512-LABEL: test_overlap_2:
; CHECK-AVX512:       # %bb.0: # %entry
; CHECK-AVX512-NEXT:    movslq %esi, %rax
; CHECK-AVX512-NEXT:    movq %rax, -16(%rdi)
; CHECK-AVX512-NEXT:    movq -16(%rdi), %rcx
; CHECK-AVX512-NEXT:    movq %rcx, (%rdi)
; CHECK-AVX512-NEXT:    movq -8(%rdi), %rcx
; CHECK-AVX512-NEXT:    movq %rcx, 8(%rdi)
; CHECK-AVX512-NEXT:    movq %rax, -8(%rdi)
; CHECK-AVX512-NEXT:    movl $7, -12(%rdi)
; CHECK-AVX512-NEXT:    movl -16(%rdi), %eax
; CHECK-AVX512-NEXT:    movl %eax, 16(%rdi)
; CHECK-AVX512-NEXT:    movl -12(%rdi), %eax
; CHECK-AVX512-NEXT:    movl %eax, 20(%rdi)
; CHECK-AVX512-NEXT:    movq -8(%rdi), %rax
; CHECK-AVX512-NEXT:    movq %rax, 24(%rdi)
; CHECK-AVX512-NEXT:    retq
entry:
  %add.ptr = getelementptr inbounds i8, ptr %A, i64 -16
  %conv = sext i32 %x to i64
  store i64 %conv, ptr %add.ptr, align 8
  tail call void @llvm.memcpy.p0.p0.i64(ptr align 4 %A, ptr nonnull align 4 %add.ptr, i64 16, i1 false)
  %add.ptr3 = getelementptr inbounds i8, ptr %A, i64 -8
  store i64 %conv, ptr %add.ptr3, align 8
  %add.ptr4 = getelementptr inbounds i8, ptr %A, i64 -12
  store i32 7, ptr %add.ptr4, align 4
  %add.ptr5 = getelementptr inbounds i8, ptr %A, i64 16
  tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 4 %add.ptr5, ptr nonnull align 4 %add.ptr, i64 16, i1 false)
  ret void
}

; Function Attrs: nounwind uwtable
define dso_local void @test_overlap_3(ptr nocapture %A, i32 %x) local_unnamed_addr #0 {
; CHECK-LABEL: test_overlap_3:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    movl $7, -10(%rdi)
; CHECK-NEXT:    movl -16(%rdi), %eax
; CHECK-NEXT:    movl %eax, (%rdi)
; CHECK-NEXT:    movzwl -12(%rdi), %eax
; CHECK-NEXT:    movw %ax, 4(%rdi)
; CHECK-NEXT:    movl -10(%rdi), %eax
; CHECK-NEXT:    movl %eax, 6(%rdi)
; CHECK-NEXT:    movl -6(%rdi), %eax
; CHECK-NEXT:    movl %eax, 10(%rdi)
; CHECK-NEXT:    movzwl -2(%rdi), %eax
; CHECK-NEXT:    movw %ax, 14(%rdi)
; CHECK-NEXT:    movslq %esi, %rax
; CHECK-NEXT:    movq %rax, -9(%rdi)
; CHECK-NEXT:    movq %rax, -16(%rdi)
; CHECK-NEXT:    movb $0, -1(%rdi)
; CHECK-NEXT:    movq -16(%rdi), %rax
; CHECK-NEXT:    movq %rax, 16(%rdi)
; CHECK-NEXT:    movzwl -8(%rdi), %eax
; CHECK-NEXT:    movw %ax, 24(%rdi)
; CHECK-NEXT:    movl -6(%rdi), %eax
; CHECK-NEXT:    movl %eax, 26(%rdi)
; CHECK-NEXT:    movzbl -2(%rdi), %eax
; CHECK-NEXT:    movb %al, 30(%rdi)
; CHECK-NEXT:    movzbl -1(%rdi), %eax
; CHECK-NEXT:    movb %al, 31(%rdi)
; CHECK-NEXT:    retq
;
; DISABLED-LABEL: test_overlap_3:
; DISABLED:       # %bb.0: # %entry
; DISABLED-NEXT:    movl $7, -10(%rdi)
; DISABLED-NEXT:    movups -16(%rdi), %xmm0
; DISABLED-NEXT:    movups %xmm0, (%rdi)
; DISABLED-NEXT:    movslq %esi, %rax
; DISABLED-NEXT:    movq %rax, -9(%rdi)
; DISABLED-NEXT:    movq %rax, -16(%rdi)
; DISABLED-NEXT:    movb $0, -1(%rdi)
; DISABLED-NEXT:    movups -16(%rdi), %xmm0
; DISABLED-NEXT:    movups %xmm0, 16(%rdi)
; DISABLED-NEXT:    retq
;
; CHECK-AVX2-LABEL: test_overlap_3:
; CHECK-AVX2:       # %bb.0: # %entry
; CHECK-AVX2-NEXT:    movl $7, -10(%rdi)
; CHECK-AVX2-NEXT:    movl -16(%rdi), %eax
; CHECK-AVX2-NEXT:    movl %eax, (%rdi)
; CHECK-AVX2-NEXT:    movzwl -12(%rdi), %eax
; CHECK-AVX2-NEXT:    movw %ax, 4(%rdi)
; CHECK-AVX2-NEXT:    movl -10(%rdi), %eax
; CHECK-AVX2-NEXT:    movl %eax, 6(%rdi)
; CHECK-AVX2-NEXT:    movl -6(%rdi), %eax
; CHECK-AVX2-NEXT:    movl %eax, 10(%rdi)
; CHECK-AVX2-NEXT:    movzwl -2(%rdi), %eax
; CHECK-AVX2-NEXT:    movw %ax, 14(%rdi)
; CHECK-AVX2-NEXT:    movslq %esi, %rax
; CHECK-AVX2-NEXT:    movq %rax, -9(%rdi)
; CHECK-AVX2-NEXT:    movq %rax, -16(%rdi)
; CHECK-AVX2-NEXT:    movb $0, -1(%rdi)
; CHECK-AVX2-NEXT:    movq -16(%rdi), %rax
; CHECK-AVX2-NEXT:    movq %rax, 16(%rdi)
; CHECK-AVX2-NEXT:    movzwl -8(%rdi), %eax
; CHECK-AVX2-NEXT:    movw %ax, 24(%rdi)
; CHECK-AVX2-NEXT:    movl -6(%rdi), %eax
; CHECK-AVX2-NEXT:    movl %eax, 26(%rdi)
; CHECK-AVX2-NEXT:    movzbl -2(%rdi), %eax
; CHECK-AVX2-NEXT:    movb %al, 30(%rdi)
; CHECK-AVX2-NEXT:    movzbl -1(%rdi), %eax
; CHECK-AVX2-NEXT:    movb %al, 31(%rdi)
; CHECK-AVX2-NEXT:    retq
;
; CHECK-AVX512-LABEL: test_overlap_3:
; CHECK-AVX512:       # %bb.0: # %entry
; CHECK-AVX512-NEXT:    movl $7, -10(%rdi)
; CHECK-AVX512-NEXT:    movl -16(%rdi), %eax
; CHECK-AVX512-NEXT:    movl %eax, (%rdi)
; CHECK-AVX512-NEXT:    movzwl -12(%rdi), %eax
; CHECK-AVX512-NEXT:    movw %ax, 4(%rdi)
; CHECK-AVX512-NEXT:    movl -10(%rdi), %eax
; CHECK-AVX512-NEXT:    movl %eax, 6(%rdi)
; CHECK-AVX512-NEXT:    movl -6(%rdi), %eax
; CHECK-AVX512-NEXT:    movl %eax, 10(%rdi)
; CHECK-AVX512-NEXT:    movzwl -2(%rdi), %eax
; CHECK-AVX512-NEXT:    movw %ax, 14(%rdi)
; CHECK-AVX512-NEXT:    movslq %esi, %rax
; CHECK-AVX512-NEXT:    movq %rax, -9(%rdi)
; CHECK-AVX512-NEXT:    movq %rax, -16(%rdi)
; CHECK-AVX512-NEXT:    movb $0, -1(%rdi)
; CHECK-AVX512-NEXT:    movq -16(%rdi), %rax
; CHECK-AVX512-NEXT:    movq %rax, 16(%rdi)
; CHECK-AVX512-NEXT:    movzwl -8(%rdi), %eax
; CHECK-AVX512-NEXT:    movw %ax, 24(%rdi)
; CHECK-AVX512-NEXT:    movl -6(%rdi), %eax
; CHECK-AVX512-NEXT:    movl %eax, 26(%rdi)
; CHECK-AVX512-NEXT:    movzbl -2(%rdi), %eax
; CHECK-AVX512-NEXT:    movb %al, 30(%rdi)
; CHECK-AVX512-NEXT:    movzbl -1(%rdi), %eax
; CHECK-AVX512-NEXT:    movb %al, 31(%rdi)
; CHECK-AVX512-NEXT:    retq
entry:
  %add.ptr = getelementptr inbounds i8, ptr %A, i64 -16
  %add.ptr1 = getelementptr inbounds i8, ptr %A, i64 -10
  store i32 7, ptr %add.ptr1, align 4
  tail call void @llvm.memcpy.p0.p0.i64(ptr align 4 %A, ptr nonnull align 4 %add.ptr, i64 16, i1 false)
  %conv = sext i32 %x to i64
  %add.ptr2 = getelementptr inbounds i8, ptr %A, i64 -9
  store i64 %conv, ptr %add.ptr2, align 8
  store i64 %conv, ptr %add.ptr, align 8
  %add.ptr5 = getelementptr inbounds i8, ptr %A, i64 -1
  store i8 0, ptr %add.ptr5, align 1
  %add.ptr6 = getelementptr inbounds i8, ptr %A, i64 16
  tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 4 %add.ptr6, ptr nonnull align 4 %add.ptr, i64 16, i1 false)
  ret void
}

; Function Attrs: nounwind uwtable
define dso_local void @test_overlap_4(ptr nocapture %A, i32 %x) local_unnamed_addr #0 {
; CHECK-LABEL: test_overlap_4:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    movups -16(%rdi), %xmm0
; CHECK-NEXT:    movups %xmm0, (%rdi)
; CHECK-NEXT:    movslq %esi, %rax
; CHECK-NEXT:    movq %rax, -8(%rdi)
; CHECK-NEXT:    movl %eax, -16(%rdi)
; CHECK-NEXT:    movl $0, -11(%rdi)
; CHECK-NEXT:    movl -16(%rdi), %eax
; CHECK-NEXT:    movl %eax, 16(%rdi)
; CHECK-NEXT:    movzbl -12(%rdi), %eax
; CHECK-NEXT:    movb %al, 20(%rdi)
; CHECK-NEXT:    movl -11(%rdi), %eax
; CHECK-NEXT:    movl %eax, 21(%rdi)
; CHECK-NEXT:    movl -7(%rdi), %eax
; CHECK-NEXT:    movl %eax, 25(%rdi)
; CHECK-NEXT:    movzwl -3(%rdi), %eax
; CHECK-NEXT:    movw %ax, 29(%rdi)
; CHECK-NEXT:    movzbl -1(%rdi), %eax
; CHECK-NEXT:    movb %al, 31(%rdi)
; CHECK-NEXT:    retq
;
; DISABLED-LABEL: test_overlap_4:
; DISABLED:       # %bb.0: # %entry
; DISABLED-NEXT:    movups -16(%rdi), %xmm0
; DISABLED-NEXT:    movups %xmm0, (%rdi)
; DISABLED-NEXT:    movslq %esi, %rax
; DISABLED-NEXT:    movq %rax, -8(%rdi)
; DISABLED-NEXT:    movl %eax, -16(%rdi)
; DISABLED-NEXT:    movl $0, -11(%rdi)
; DISABLED-NEXT:    movups -16(%rdi), %xmm0
; DISABLED-NEXT:    movups %xmm0, 16(%rdi)
; DISABLED-NEXT:    retq
;
; CHECK-AVX2-LABEL: test_overlap_4:
; CHECK-AVX2:       # %bb.0: # %entry
; CHECK-AVX2-NEXT:    vmovups -16(%rdi), %xmm0
; CHECK-AVX2-NEXT:    vmovups %xmm0, (%rdi)
; CHECK-AVX2-NEXT:    movslq %esi, %rax
; CHECK-AVX2-NEXT:    movq %rax, -8(%rdi)
; CHECK-AVX2-NEXT:    movl %eax, -16(%rdi)
; CHECK-AVX2-NEXT:    movl $0, -11(%rdi)
; CHECK-AVX2-NEXT:    movl -16(%rdi), %eax
; CHECK-AVX2-NEXT:    movl %eax, 16(%rdi)
; CHECK-AVX2-NEXT:    movzbl -12(%rdi), %eax
; CHECK-AVX2-NEXT:    movb %al, 20(%rdi)
; CHECK-AVX2-NEXT:    movl -11(%rdi), %eax
; CHECK-AVX2-NEXT:    movl %eax, 21(%rdi)
; CHECK-AVX2-NEXT:    movl -7(%rdi), %eax
; CHECK-AVX2-NEXT:    movl %eax, 25(%rdi)
; CHECK-AVX2-NEXT:    movzwl -3(%rdi), %eax
; CHECK-AVX2-NEXT:    movw %ax, 29(%rdi)
; CHECK-AVX2-NEXT:    movzbl -1(%rdi), %eax
; CHECK-AVX2-NEXT:    movb %al, 31(%rdi)
; CHECK-AVX2-NEXT:    retq
;
; CHECK-AVX512-LABEL: test_overlap_4:
; CHECK-AVX512:       # %bb.0: # %entry
; CHECK-AVX512-NEXT:    vmovups -16(%rdi), %xmm0
; CHECK-AVX512-NEXT:    vmovups %xmm0, (%rdi)
; CHECK-AVX512-NEXT:    movslq %esi, %rax
; CHECK-AVX512-NEXT:    movq %rax, -8(%rdi)
; CHECK-AVX512-NEXT:    movl %eax, -16(%rdi)
; CHECK-AVX512-NEXT:    movl $0, -11(%rdi)
; CHECK-AVX512-NEXT:    movl -16(%rdi), %eax
; CHECK-AVX512-NEXT:    movl %eax, 16(%rdi)
; CHECK-AVX512-NEXT:    movzbl -12(%rdi), %eax
; CHECK-AVX512-NEXT:    movb %al, 20(%rdi)
; CHECK-AVX512-NEXT:    movl -11(%rdi), %eax
; CHECK-AVX512-NEXT:    movl %eax, 21(%rdi)
; CHECK-AVX512-NEXT:    movl -7(%rdi), %eax
; CHECK-AVX512-NEXT:    movl %eax, 25(%rdi)
; CHECK-AVX512-NEXT:    movzwl -3(%rdi), %eax
; CHECK-AVX512-NEXT:    movw %ax, 29(%rdi)
; CHECK-AVX512-NEXT:    movzbl -1(%rdi), %eax
; CHECK-AVX512-NEXT:    movb %al, 31(%rdi)
; CHECK-AVX512-NEXT:    retq
entry:
  %add.ptr = getelementptr inbounds i8, ptr %A, i64 -16
  tail call void @llvm.memcpy.p0.p0.i64(ptr align 4 %A, ptr nonnull align 4 %add.ptr, i64 16, i1 false)
  %conv = sext i32 %x to i64
  %add.ptr1 = getelementptr inbounds i8, ptr %A, i64 -8
  store i64 %conv, ptr %add.ptr1, align 8
  store i32 %x, ptr %add.ptr, align 4
  %add.ptr3 = getelementptr inbounds i8, ptr %A, i64 -11
  store i32 0, ptr %add.ptr3, align 4
  %add.ptr4 = getelementptr inbounds i8, ptr %A, i64 16
  tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 4 %add.ptr4, ptr nonnull align 4 %add.ptr, i64 16, i1 false)
  ret void
}

; Function Attrs: nounwind uwtable
define dso_local void @test_overlap_5(ptr nocapture %A, i32 %x) local_unnamed_addr #0 {
; CHECK-LABEL: test_overlap_5:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    movups -16(%rdi), %xmm0
; CHECK-NEXT:    movups %xmm0, (%rdi)
; CHECK-NEXT:    movslq %esi, %rax
; CHECK-NEXT:    movq %rax, -16(%rdi)
; CHECK-NEXT:    movb %al, -14(%rdi)
; CHECK-NEXT:    movb $0, -11(%rdi)
; CHECK-NEXT:    movzwl -16(%rdi), %eax
; CHECK-NEXT:    movw %ax, 16(%rdi)
; CHECK-NEXT:    movzbl -14(%rdi), %eax
; CHECK-NEXT:    movb %al, 18(%rdi)
; CHECK-NEXT:    movzwl -13(%rdi), %eax
; CHECK-NEXT:    movw %ax, 19(%rdi)
; CHECK-NEXT:    movzbl -11(%rdi), %eax
; CHECK-NEXT:    movb %al, 21(%rdi)
; CHECK-NEXT:    movq -10(%rdi), %rax
; CHECK-NEXT:    movq %rax, 22(%rdi)
; CHECK-NEXT:    movzwl -2(%rdi), %eax
; CHECK-NEXT:    movw %ax, 30(%rdi)
; CHECK-NEXT:    retq
;
; DISABLED-LABEL: test_overlap_5:
; DISABLED:       # %bb.0: # %entry
; DISABLED-NEXT:    movups -16(%rdi), %xmm0
; DISABLED-NEXT:    movups %xmm0, (%rdi)
; DISABLED-NEXT:    movslq %esi, %rax
; DISABLED-NEXT:    movq %rax, -16(%rdi)
; DISABLED-NEXT:    movb %al, -14(%rdi)
; DISABLED-NEXT:    movb $0, -11(%rdi)
; DISABLED-NEXT:    movups -16(%rdi), %xmm0
; DISABLED-NEXT:    movups %xmm0, 16(%rdi)
; DISABLED-NEXT:    retq
;
; CHECK-AVX2-LABEL: test_overlap_5:
; CHECK-AVX2:       # %bb.0: # %entry
; CHECK-AVX2-NEXT:    vmovups -16(%rdi), %xmm0
; CHECK-AVX2-NEXT:    vmovups %xmm0, (%rdi)
; CHECK-AVX2-NEXT:    movslq %esi, %rax
; CHECK-AVX2-NEXT:    movq %rax, -16(%rdi)
; CHECK-AVX2-NEXT:    movb %al, -14(%rdi)
; CHECK-AVX2-NEXT:    movb $0, -11(%rdi)
; CHECK-AVX2-NEXT:    movzwl -16(%rdi), %eax
; CHECK-AVX2-NEXT:    movw %ax, 16(%rdi)
; CHECK-AVX2-NEXT:    movzbl -14(%rdi), %eax
; CHECK-AVX2-NEXT:    movb %al, 18(%rdi)
; CHECK-AVX2-NEXT:    movzwl -13(%rdi), %eax
; CHECK-AVX2-NEXT:    movw %ax, 19(%rdi)
; CHECK-AVX2-NEXT:    movzbl -11(%rdi), %eax
; CHECK-AVX2-NEXT:    movb %al, 21(%rdi)
; CHECK-AVX2-NEXT:    movq -10(%rdi), %rax
; CHECK-AVX2-NEXT:    movq %rax, 22(%rdi)
; CHECK-AVX2-NEXT:    movzwl -2(%rdi), %eax
; CHECK-AVX2-NEXT:    movw %ax, 30(%rdi)
; CHECK-AVX2-NEXT:    retq
;
; CHECK-AVX512-LABEL: test_overlap_5:
; CHECK-AVX512:       # %bb.0: # %entry
; CHECK-AVX512-NEXT:    vmovups -16(%rdi), %xmm0
; CHECK-AVX512-NEXT:    vmovups %xmm0, (%rdi)
; CHECK-AVX512-NEXT:    movslq %esi, %rax
; CHECK-AVX512-NEXT:    movq %rax, -16(%rdi)
; CHECK-AVX512-NEXT:    movb %al, -14(%rdi)
; CHECK-AVX512-NEXT:    movb $0, -11(%rdi)
; CHECK-AVX512-NEXT:    movzwl -16(%rdi), %eax
; CHECK-AVX512-NEXT:    movw %ax, 16(%rdi)
; CHECK-AVX512-NEXT:    movzbl -14(%rdi), %eax
; CHECK-AVX512-NEXT:    movb %al, 18(%rdi)
; CHECK-AVX512-NEXT:    movzwl -13(%rdi), %eax
; CHECK-AVX512-NEXT:    movw %ax, 19(%rdi)
; CHECK-AVX512-NEXT:    movzbl -11(%rdi), %eax
; CHECK-AVX512-NEXT:    movb %al, 21(%rdi)
; CHECK-AVX512-NEXT:    movq -10(%rdi), %rax
; CHECK-AVX512-NEXT:    movq %rax, 22(%rdi)
; CHECK-AVX512-NEXT:    movzwl -2(%rdi), %eax
; CHECK-AVX512-NEXT:    movw %ax, 30(%rdi)
; CHECK-AVX512-NEXT:    retq
entry:
  %add.ptr = getelementptr inbounds i8, ptr %A, i64 -16
  tail call void @llvm.memcpy.p0.p0.i64(ptr align 4 %A, ptr nonnull align 4 %add.ptr, i64 16, i1 false)
  %conv = sext i32 %x to i64
  store i64 %conv, ptr %add.ptr, align 8
  %conv2 = trunc i32 %x to i8
  %add.ptr3 = getelementptr inbounds i8, ptr %A, i64 -14
  store i8 %conv2, ptr %add.ptr3, align 1
  %add.ptr4 = getelementptr inbounds i8, ptr %A, i64 -11
  store i8 0, ptr %add.ptr4, align 1
  %add.ptr5 = getelementptr inbounds i8, ptr %A, i64 16
  tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 4 %add.ptr5, ptr nonnull align 4 %add.ptr, i64 16, i1 false)
  ret void
}

attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }

attributes #1 = { argmemonly nounwind }