llvm/llvm/test/CodeGen/X86/nontemporal-4.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4a | FileCheck %s --check-prefixes=CHECK,SSE,SSE4A
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512

; Test codegen for under aligned nontemporal vector stores

; XMM versions.

define void @test_constant_v2f64_align1(ptr %dst) nounwind {
; CHECK-LABEL: test_constant_v2f64_align1:
; CHECK:       # %bb.0:
; CHECK-NEXT:    movabsq $4611686018427387904, %rax # imm = 0x4000000000000000
; CHECK-NEXT:    movntiq %rax, 8(%rdi)
; CHECK-NEXT:    movabsq $4607182418800017408, %rax # imm = 0x3FF0000000000000
; CHECK-NEXT:    movntiq %rax, (%rdi)
; CHECK-NEXT:    retq
  store <2 x double> <double 1.0, double 2.0>, ptr %dst, align 1, !nontemporal !1
  ret void
}

define void @test_constant_v4f32_align1(ptr %dst) nounwind {
; SSE2-LABEL: test_constant_v4f32_align1:
; SSE2:       # %bb.0:
; SSE2-NEXT:    movabsq $4647714816524288000, %rax # imm = 0x4080000040400000
; SSE2-NEXT:    movntiq %rax, 8(%rdi)
; SSE2-NEXT:    movabsq $4611686019492741120, %rax # imm = 0x400000003F800000
; SSE2-NEXT:    movntiq %rax, (%rdi)
; SSE2-NEXT:    retq
;
; SSE4A-LABEL: test_constant_v4f32_align1:
; SSE4A:       # %bb.0:
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [2.0000004731118679E+0,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
; SSE4A-NEXT:    retq
;
; SSE41-LABEL: test_constant_v4f32_align1:
; SSE41:       # %bb.0:
; SSE41-NEXT:    movabsq $4647714816524288000, %rax # imm = 0x4080000040400000
; SSE41-NEXT:    movntiq %rax, 8(%rdi)
; SSE41-NEXT:    movabsq $4611686019492741120, %rax # imm = 0x400000003F800000
; SSE41-NEXT:    movntiq %rax, (%rdi)
; SSE41-NEXT:    retq
;
; AVX-LABEL: test_constant_v4f32_align1:
; AVX:       # %bb.0:
; AVX-NEXT:    movabsq $4647714816524288000, %rax # imm = 0x4080000040400000
; AVX-NEXT:    movntiq %rax, 8(%rdi)
; AVX-NEXT:    movabsq $4611686019492741120, %rax # imm = 0x400000003F800000
; AVX-NEXT:    movntiq %rax, (%rdi)
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v4f32_align1:
; AVX512:       # %bb.0:
; AVX512-NEXT:    movabsq $4647714816524288000, %rax # imm = 0x4080000040400000
; AVX512-NEXT:    movntiq %rax, 8(%rdi)
; AVX512-NEXT:    movabsq $4611686019492741120, %rax # imm = 0x400000003F800000
; AVX512-NEXT:    movntiq %rax, (%rdi)
; AVX512-NEXT:    retq
  store <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, ptr %dst, align 1, !nontemporal !1
  ret void
}

define void @test_constant_v2i64_align1(ptr %dst) nounwind {
; SSE2-LABEL: test_constant_v2i64_align1:
; SSE2:       # %bb.0:
; SSE2-NEXT:    movl $1, %eax
; SSE2-NEXT:    movntiq %rax, 8(%rdi)
; SSE2-NEXT:    xorl %eax, %eax
; SSE2-NEXT:    movntiq %rax, (%rdi)
; SSE2-NEXT:    retq
;
; SSE4A-LABEL: test_constant_v2i64_align1:
; SSE4A:       # %bb.0:
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [4.9406564584124654E-324,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
; SSE4A-NEXT:    xorl %eax, %eax
; SSE4A-NEXT:    movntiq %rax, (%rdi)
; SSE4A-NEXT:    retq
;
; SSE41-LABEL: test_constant_v2i64_align1:
; SSE41:       # %bb.0:
; SSE41-NEXT:    movl $1, %eax
; SSE41-NEXT:    movntiq %rax, 8(%rdi)
; SSE41-NEXT:    xorl %eax, %eax
; SSE41-NEXT:    movntiq %rax, (%rdi)
; SSE41-NEXT:    retq
;
; AVX-LABEL: test_constant_v2i64_align1:
; AVX:       # %bb.0:
; AVX-NEXT:    movl $1, %eax
; AVX-NEXT:    movntiq %rax, 8(%rdi)
; AVX-NEXT:    xorl %eax, %eax
; AVX-NEXT:    movntiq %rax, (%rdi)
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v2i64_align1:
; AVX512:       # %bb.0:
; AVX512-NEXT:    movl $1, %eax
; AVX512-NEXT:    movntiq %rax, 8(%rdi)
; AVX512-NEXT:    xorl %eax, %eax
; AVX512-NEXT:    movntiq %rax, (%rdi)
; AVX512-NEXT:    retq
  store <2 x i64> <i64 0, i64 1>, ptr %dst, align 1, !nontemporal !1
  ret void
}

define void @test_constant_v4i32_align1(ptr %dst) nounwind {
; SSE2-LABEL: test_constant_v4i32_align1:
; SSE2:       # %bb.0:
; SSE2-NEXT:    movabsq $12884901890, %rax # imm = 0x300000002
; SSE2-NEXT:    movntiq %rax, 8(%rdi)
; SSE2-NEXT:    movabsq $4294967296, %rax # imm = 0x100000000
; SSE2-NEXT:    movntiq %rax, (%rdi)
; SSE2-NEXT:    retq
;
; SSE4A-LABEL: test_constant_v4i32_align1:
; SSE4A:       # %bb.0:
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [2.1219957909652723E-314,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
; SSE4A-NEXT:    retq
;
; SSE41-LABEL: test_constant_v4i32_align1:
; SSE41:       # %bb.0:
; SSE41-NEXT:    movabsq $12884901890, %rax # imm = 0x300000002
; SSE41-NEXT:    movntiq %rax, 8(%rdi)
; SSE41-NEXT:    movabsq $4294967296, %rax # imm = 0x100000000
; SSE41-NEXT:    movntiq %rax, (%rdi)
; SSE41-NEXT:    retq
;
; AVX-LABEL: test_constant_v4i32_align1:
; AVX:       # %bb.0:
; AVX-NEXT:    movabsq $12884901890, %rax # imm = 0x300000002
; AVX-NEXT:    movntiq %rax, 8(%rdi)
; AVX-NEXT:    movabsq $4294967296, %rax # imm = 0x100000000
; AVX-NEXT:    movntiq %rax, (%rdi)
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v4i32_align1:
; AVX512:       # %bb.0:
; AVX512-NEXT:    movabsq $12884901890, %rax # imm = 0x300000002
; AVX512-NEXT:    movntiq %rax, 8(%rdi)
; AVX512-NEXT:    movabsq $4294967296, %rax # imm = 0x100000000
; AVX512-NEXT:    movntiq %rax, (%rdi)
; AVX512-NEXT:    retq
  store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, ptr %dst, align 1, !nontemporal !1
  ret void
}

define void @test_constant_v8i16_align1(ptr %dst) nounwind {
; SSE2-LABEL: test_constant_v8i16_align1:
; SSE2:       # %bb.0:
; SSE2-NEXT:    movabsq $1970350607106052, %rax # imm = 0x7000600050004
; SSE2-NEXT:    movntiq %rax, 8(%rdi)
; SSE2-NEXT:    movabsq $844433520132096, %rax # imm = 0x3000200010000
; SSE2-NEXT:    movntiq %rax, (%rdi)
; SSE2-NEXT:    retq
;
; SSE4A-LABEL: test_constant_v8i16_align1:
; SSE4A:       # %bb.0:
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [4.1720559249406128E-309,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
; SSE4A-NEXT:    retq
;
; SSE41-LABEL: test_constant_v8i16_align1:
; SSE41:       # %bb.0:
; SSE41-NEXT:    movabsq $1970350607106052, %rax # imm = 0x7000600050004
; SSE41-NEXT:    movntiq %rax, 8(%rdi)
; SSE41-NEXT:    movabsq $844433520132096, %rax # imm = 0x3000200010000
; SSE41-NEXT:    movntiq %rax, (%rdi)
; SSE41-NEXT:    retq
;
; AVX-LABEL: test_constant_v8i16_align1:
; AVX:       # %bb.0:
; AVX-NEXT:    movabsq $1970350607106052, %rax # imm = 0x7000600050004
; AVX-NEXT:    movntiq %rax, 8(%rdi)
; AVX-NEXT:    movabsq $844433520132096, %rax # imm = 0x3000200010000
; AVX-NEXT:    movntiq %rax, (%rdi)
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v8i16_align1:
; AVX512:       # %bb.0:
; AVX512-NEXT:    movabsq $1970350607106052, %rax # imm = 0x7000600050004
; AVX512-NEXT:    movntiq %rax, 8(%rdi)
; AVX512-NEXT:    movabsq $844433520132096, %rax # imm = 0x3000200010000
; AVX512-NEXT:    movntiq %rax, (%rdi)
; AVX512-NEXT:    retq
  store <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, ptr %dst, align 1, !nontemporal !1
  ret void
}

define void @test_constant_v16i8_align1(ptr %dst) nounwind {
; SSE2-LABEL: test_constant_v16i8_align1:
; SSE2:       # %bb.0:
; SSE2-NEXT:    movabsq $1084818905618843912, %rax # imm = 0xF0E0D0C0B0A0908
; SSE2-NEXT:    movntiq %rax, 8(%rdi)
; SSE2-NEXT:    movabsq $506097522914230528, %rax # imm = 0x706050403020100
; SSE2-NEXT:    movntiq %rax, (%rdi)
; SSE2-NEXT:    retq
;
; SSE4A-LABEL: test_constant_v16i8_align1:
; SSE4A:       # %bb.0:
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [7.9499288951273625E-275,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
; SSE4A-NEXT:    retq
;
; SSE41-LABEL: test_constant_v16i8_align1:
; SSE41:       # %bb.0:
; SSE41-NEXT:    movabsq $1084818905618843912, %rax # imm = 0xF0E0D0C0B0A0908
; SSE41-NEXT:    movntiq %rax, 8(%rdi)
; SSE41-NEXT:    movabsq $506097522914230528, %rax # imm = 0x706050403020100
; SSE41-NEXT:    movntiq %rax, (%rdi)
; SSE41-NEXT:    retq
;
; AVX-LABEL: test_constant_v16i8_align1:
; AVX:       # %bb.0:
; AVX-NEXT:    movabsq $1084818905618843912, %rax # imm = 0xF0E0D0C0B0A0908
; AVX-NEXT:    movntiq %rax, 8(%rdi)
; AVX-NEXT:    movabsq $506097522914230528, %rax # imm = 0x706050403020100
; AVX-NEXT:    movntiq %rax, (%rdi)
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v16i8_align1:
; AVX512:       # %bb.0:
; AVX512-NEXT:    movabsq $1084818905618843912, %rax # imm = 0xF0E0D0C0B0A0908
; AVX512-NEXT:    movntiq %rax, 8(%rdi)
; AVX512-NEXT:    movabsq $506097522914230528, %rax # imm = 0x706050403020100
; AVX512-NEXT:    movntiq %rax, (%rdi)
; AVX512-NEXT:    retq
  store <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, ptr %dst, align 1, !nontemporal !1
  ret void
}

; YMM versions.

define void @test_constant_v4f64_align1(ptr %dst) nounwind {
; CHECK-LABEL: test_constant_v4f64_align1:
; CHECK:       # %bb.0:
; CHECK-NEXT:    movabsq $-4616189618054758400, %rax # imm = 0xBFF0000000000000
; CHECK-NEXT:    movntiq %rax, 8(%rdi)
; CHECK-NEXT:    movabsq $-4611686018427387904, %rax # imm = 0xC000000000000000
; CHECK-NEXT:    movntiq %rax, (%rdi)
; CHECK-NEXT:    movabsq $4607182418800017408, %rax # imm = 0x3FF0000000000000
; CHECK-NEXT:    movntiq %rax, 24(%rdi)
; CHECK-NEXT:    xorl %eax, %eax
; CHECK-NEXT:    movntiq %rax, 16(%rdi)
; CHECK-NEXT:    retq
  store <4 x double> <double -2.0, double -1.0, double 0.0, double 1.0>, ptr %dst, align 1, !nontemporal !1
  ret void
}

define void @test_constant_v8f32_align1(ptr %dst) nounwind {
; SSE2-LABEL: test_constant_v8f32_align1:
; SSE2:       # %bb.0:
; SSE2-NEXT:    movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000
; SSE2-NEXT:    movntiq %rax, 8(%rdi)
; SSE2-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; SSE2-NEXT:    movntiq %rax, (%rdi)
; SSE2-NEXT:    movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000
; SSE2-NEXT:    movntiq %rax, 24(%rdi)
; SSE2-NEXT:    movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000
; SSE2-NEXT:    movntiq %rax, 16(%rdi)
; SSE2-NEXT:    retq
;
; SSE4A-LABEL: test_constant_v8f32_align1:
; SSE4A:       # %bb.0:
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [-0.0E+0,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 24(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [-5.1200036668777466E+2,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, 16(%rdi)
; SSE4A-NEXT:    retq
;
; SSE41-LABEL: test_constant_v8f32_align1:
; SSE41:       # %bb.0:
; SSE41-NEXT:    movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000
; SSE41-NEXT:    movntiq %rax, 8(%rdi)
; SSE41-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; SSE41-NEXT:    movntiq %rax, (%rdi)
; SSE41-NEXT:    movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000
; SSE41-NEXT:    movntiq %rax, 24(%rdi)
; SSE41-NEXT:    movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000
; SSE41-NEXT:    movntiq %rax, 16(%rdi)
; SSE41-NEXT:    retq
;
; AVX-LABEL: test_constant_v8f32_align1:
; AVX:       # %bb.0:
; AVX-NEXT:    movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000
; AVX-NEXT:    movntiq %rax, 8(%rdi)
; AVX-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; AVX-NEXT:    movntiq %rax, (%rdi)
; AVX-NEXT:    movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000
; AVX-NEXT:    movntiq %rax, 24(%rdi)
; AVX-NEXT:    movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000
; AVX-NEXT:    movntiq %rax, 16(%rdi)
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v8f32_align1:
; AVX512:       # %bb.0:
; AVX512-NEXT:    movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000
; AVX512-NEXT:    movntiq %rax, 8(%rdi)
; AVX512-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; AVX512-NEXT:    movntiq %rax, (%rdi)
; AVX512-NEXT:    movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000
; AVX512-NEXT:    movntiq %rax, 24(%rdi)
; AVX512-NEXT:    movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000
; AVX512-NEXT:    movntiq %rax, 16(%rdi)
; AVX512-NEXT:    retq
  store <8 x float> <float 0.0, float -0.0, float -1.0, float -2.0, float -3.0, float -4.0, float -5.0, float -6.0>, ptr %dst, align 1, !nontemporal !1
  ret void
}

define void @test_constant_v4i64_align1(ptr %dst) nounwind {
; SSE2-LABEL: test_constant_v4i64_align1:
; SSE2:       # %bb.0:
; SSE2-NEXT:    movq $-1, %rax
; SSE2-NEXT:    movntiq %rax, 8(%rdi)
; SSE2-NEXT:    movq $-3, %rax
; SSE2-NEXT:    movntiq %rax, 24(%rdi)
; SSE2-NEXT:    movq $-2, %rax
; SSE2-NEXT:    movntiq %rax, 16(%rdi)
; SSE2-NEXT:    xorl %eax, %eax
; SSE2-NEXT:    movntiq %rax, (%rdi)
; SSE2-NEXT:    retq
;
; SSE4A-LABEL: test_constant_v4i64_align1:
; SSE4A:       # %bb.0:
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
; SSE4A-NEXT:    xorl %eax, %eax
; SSE4A-NEXT:    movntiq %rax, (%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 24(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, 16(%rdi)
; SSE4A-NEXT:    retq
;
; SSE41-LABEL: test_constant_v4i64_align1:
; SSE41:       # %bb.0:
; SSE41-NEXT:    movq $-1, %rax
; SSE41-NEXT:    movntiq %rax, 8(%rdi)
; SSE41-NEXT:    movq $-3, %rax
; SSE41-NEXT:    movntiq %rax, 24(%rdi)
; SSE41-NEXT:    movq $-2, %rax
; SSE41-NEXT:    movntiq %rax, 16(%rdi)
; SSE41-NEXT:    xorl %eax, %eax
; SSE41-NEXT:    movntiq %rax, (%rdi)
; SSE41-NEXT:    retq
;
; AVX-LABEL: test_constant_v4i64_align1:
; AVX:       # %bb.0:
; AVX-NEXT:    movq $-1, %rax
; AVX-NEXT:    movntiq %rax, 8(%rdi)
; AVX-NEXT:    movq $-3, %rax
; AVX-NEXT:    movntiq %rax, 24(%rdi)
; AVX-NEXT:    movq $-2, %rax
; AVX-NEXT:    movntiq %rax, 16(%rdi)
; AVX-NEXT:    xorl %eax, %eax
; AVX-NEXT:    movntiq %rax, (%rdi)
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v4i64_align1:
; AVX512:       # %bb.0:
; AVX512-NEXT:    movq $-1, %rax
; AVX512-NEXT:    movntiq %rax, 8(%rdi)
; AVX512-NEXT:    movq $-3, %rax
; AVX512-NEXT:    movntiq %rax, 24(%rdi)
; AVX512-NEXT:    movq $-2, %rax
; AVX512-NEXT:    movntiq %rax, 16(%rdi)
; AVX512-NEXT:    xorl %eax, %eax
; AVX512-NEXT:    movntiq %rax, (%rdi)
; AVX512-NEXT:    retq
  store <4 x i64> <i64 0, i64 -1, i64 -2, i64 -3>, ptr %dst, align 1, !nontemporal !1
  ret void
}

define void @test_constant_v8i32_align1(ptr %dst) nounwind {
; SSE2-LABEL: test_constant_v8i32_align1:
; SSE2:       # %bb.0:
; SSE2-NEXT:    movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE
; SSE2-NEXT:    movntiq %rax, 8(%rdi)
; SSE2-NEXT:    movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
; SSE2-NEXT:    movntiq %rax, (%rdi)
; SSE2-NEXT:    movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA
; SSE2-NEXT:    movntiq %rax, 24(%rdi)
; SSE2-NEXT:    movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC
; SSE2-NEXT:    movntiq %rax, 16(%rdi)
; SSE2-NEXT:    retq
;
; SSE4A-LABEL: test_constant_v8i32_align1:
; SSE4A:       # %bb.0:
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 24(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, 16(%rdi)
; SSE4A-NEXT:    retq
;
; SSE41-LABEL: test_constant_v8i32_align1:
; SSE41:       # %bb.0:
; SSE41-NEXT:    movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE
; SSE41-NEXT:    movntiq %rax, 8(%rdi)
; SSE41-NEXT:    movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
; SSE41-NEXT:    movntiq %rax, (%rdi)
; SSE41-NEXT:    movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA
; SSE41-NEXT:    movntiq %rax, 24(%rdi)
; SSE41-NEXT:    movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC
; SSE41-NEXT:    movntiq %rax, 16(%rdi)
; SSE41-NEXT:    retq
;
; AVX-LABEL: test_constant_v8i32_align1:
; AVX:       # %bb.0:
; AVX-NEXT:    movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE
; AVX-NEXT:    movntiq %rax, 8(%rdi)
; AVX-NEXT:    movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
; AVX-NEXT:    movntiq %rax, (%rdi)
; AVX-NEXT:    movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA
; AVX-NEXT:    movntiq %rax, 24(%rdi)
; AVX-NEXT:    movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC
; AVX-NEXT:    movntiq %rax, 16(%rdi)
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v8i32_align1:
; AVX512:       # %bb.0:
; AVX512-NEXT:    movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE
; AVX512-NEXT:    movntiq %rax, 8(%rdi)
; AVX512-NEXT:    movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
; AVX512-NEXT:    movntiq %rax, (%rdi)
; AVX512-NEXT:    movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA
; AVX512-NEXT:    movntiq %rax, 24(%rdi)
; AVX512-NEXT:    movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC
; AVX512-NEXT:    movntiq %rax, 16(%rdi)
; AVX512-NEXT:    retq
  store <8 x i32> <i32 0, i32 -1, i32 -2, i32 -3, i32 -4, i32 -5, i32 -6, i32 -7>, ptr %dst, align 1, !nontemporal !1
  ret void
}

define void @test_constant_v16i16_align1(ptr %dst) nounwind {
; SSE2-LABEL: test_constant_v16i16_align1:
; SSE2:       # %bb.0:
; SSE2-NEXT:    movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC
; SSE2-NEXT:    movntiq %rax, 8(%rdi)
; SSE2-NEXT:    movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000
; SSE2-NEXT:    movntiq %rax, (%rdi)
; SSE2-NEXT:    movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4
; SSE2-NEXT:    movntiq %rax, 24(%rdi)
; SSE2-NEXT:    movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8
; SSE2-NEXT:    movntiq %rax, 16(%rdi)
; SSE2-NEXT:    retq
;
; SSE4A-LABEL: test_constant_v16i16_align1:
; SSE4A:       # %bb.0:
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 24(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, 16(%rdi)
; SSE4A-NEXT:    retq
;
; SSE41-LABEL: test_constant_v16i16_align1:
; SSE41:       # %bb.0:
; SSE41-NEXT:    movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC
; SSE41-NEXT:    movntiq %rax, 8(%rdi)
; SSE41-NEXT:    movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000
; SSE41-NEXT:    movntiq %rax, (%rdi)
; SSE41-NEXT:    movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4
; SSE41-NEXT:    movntiq %rax, 24(%rdi)
; SSE41-NEXT:    movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8
; SSE41-NEXT:    movntiq %rax, 16(%rdi)
; SSE41-NEXT:    retq
;
; AVX-LABEL: test_constant_v16i16_align1:
; AVX:       # %bb.0:
; AVX-NEXT:    movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC
; AVX-NEXT:    movntiq %rax, 8(%rdi)
; AVX-NEXT:    movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000
; AVX-NEXT:    movntiq %rax, (%rdi)
; AVX-NEXT:    movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4
; AVX-NEXT:    movntiq %rax, 24(%rdi)
; AVX-NEXT:    movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8
; AVX-NEXT:    movntiq %rax, 16(%rdi)
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v16i16_align1:
; AVX512:       # %bb.0:
; AVX512-NEXT:    movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC
; AVX512-NEXT:    movntiq %rax, 8(%rdi)
; AVX512-NEXT:    movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000
; AVX512-NEXT:    movntiq %rax, (%rdi)
; AVX512-NEXT:    movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4
; AVX512-NEXT:    movntiq %rax, 24(%rdi)
; AVX512-NEXT:    movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8
; AVX512-NEXT:    movntiq %rax, 16(%rdi)
; AVX512-NEXT:    retq
  store <16 x i16> <i16 0, i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -8, i16 -9, i16 -10, i16 -11, i16 -12, i16 -13, i16 -14, i16 -15>, ptr %dst, align 1, !nontemporal !1
  ret void
}

define void @test_constant_v32i8_align1(ptr %dst) nounwind {
; SSE2-LABEL: test_constant_v32i8_align1:
; SSE2:       # %bb.0:
; SSE2-NEXT:    movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8
; SSE2-NEXT:    movntiq %rax, 8(%rdi)
; SSE2-NEXT:    movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00
; SSE2-NEXT:    movntiq %rax, (%rdi)
; SSE2-NEXT:    movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8
; SSE2-NEXT:    movntiq %rax, 24(%rdi)
; SSE2-NEXT:    movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0
; SSE2-NEXT:    movntiq %rax, 16(%rdi)
; SSE2-NEXT:    retq
;
; SSE4A-LABEL: test_constant_v32i8_align1:
; SSE4A:       # %bb.0:
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [-3.826728214441238E+279,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 24(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [-1.6485712323024388E+202,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, 16(%rdi)
; SSE4A-NEXT:    retq
;
; SSE41-LABEL: test_constant_v32i8_align1:
; SSE41:       # %bb.0:
; SSE41-NEXT:    movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8
; SSE41-NEXT:    movntiq %rax, 8(%rdi)
; SSE41-NEXT:    movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00
; SSE41-NEXT:    movntiq %rax, (%rdi)
; SSE41-NEXT:    movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8
; SSE41-NEXT:    movntiq %rax, 24(%rdi)
; SSE41-NEXT:    movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0
; SSE41-NEXT:    movntiq %rax, 16(%rdi)
; SSE41-NEXT:    retq
;
; AVX-LABEL: test_constant_v32i8_align1:
; AVX:       # %bb.0:
; AVX-NEXT:    movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8
; AVX-NEXT:    movntiq %rax, 8(%rdi)
; AVX-NEXT:    movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00
; AVX-NEXT:    movntiq %rax, (%rdi)
; AVX-NEXT:    movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8
; AVX-NEXT:    movntiq %rax, 24(%rdi)
; AVX-NEXT:    movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0
; AVX-NEXT:    movntiq %rax, 16(%rdi)
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v32i8_align1:
; AVX512:       # %bb.0:
; AVX512-NEXT:    movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8
; AVX512-NEXT:    movntiq %rax, 8(%rdi)
; AVX512-NEXT:    movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00
; AVX512-NEXT:    movntiq %rax, (%rdi)
; AVX512-NEXT:    movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8
; AVX512-NEXT:    movntiq %rax, 24(%rdi)
; AVX512-NEXT:    movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0
; AVX512-NEXT:    movntiq %rax, 16(%rdi)
; AVX512-NEXT:    retq
  store <32 x i8> <i8 0, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 -17, i8 -18, i8 -19, i8 -20, i8 -21, i8 -22, i8 -23, i8 -24, i8 -25, i8 -26, i8 -27, i8 -28, i8 -29, i8 -30, i8 -31>, ptr %dst, align 1, !nontemporal !1
  ret void
}

define void @test_constant_v4f64_align16(ptr %dst) nounwind {
; SSE-LABEL: test_constant_v4f64_align16:
; SSE:       # %bb.0:
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [-2.0E+0,-1.0E+0]
; SSE-NEXT:    movntps %xmm0, (%rdi)
; SSE-NEXT:    xorps %xmm0, %xmm0
; SSE-NEXT:    movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; SSE-NEXT:    movntps %xmm0, 16(%rdi)
; SSE-NEXT:    retq
;
; AVX-LABEL: test_constant_v4f64_align16:
; AVX:       # %bb.0:
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-2.0E+0,-1.0E+0]
; AVX-NEXT:    vmovntps %xmm0, (%rdi)
; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT:    vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; AVX-NEXT:    vmovntps %xmm0, 16(%rdi)
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v4f64_align16:
; AVX512:       # %bb.0:
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [-2.0E+0,-1.0E+0]
; AVX512-NEXT:    vmovntps %xmm0, (%rdi)
; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0
; AVX512-NEXT:    vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; AVX512-NEXT:    vmovntps %xmm0, 16(%rdi)
; AVX512-NEXT:    retq
  store <4 x double> <double -2.0, double -1.0, double 0.0, double 1.0>, ptr %dst, align 16, !nontemporal !1
  ret void
}

define void @test_constant_v8f32_align16(ptr %dst) nounwind {
; SSE-LABEL: test_constant_v8f32_align16:
; SSE:       # %bb.0:
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [-3.0E+0,-4.0E+0,-5.0E+0,-6.0E+0]
; SSE-NEXT:    movntps %xmm0, 16(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0.0E+0,-0.0E+0,-1.0E+0,-2.0E+0]
; SSE-NEXT:    movntps %xmm0, (%rdi)
; SSE-NEXT:    retq
;
; AVX-LABEL: test_constant_v8f32_align16:
; AVX:       # %bb.0:
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-3.0E+0,-4.0E+0,-5.0E+0,-6.0E+0]
; AVX-NEXT:    vmovntps %xmm0, 16(%rdi)
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0.0E+0,-0.0E+0,-1.0E+0,-2.0E+0]
; AVX-NEXT:    vmovntps %xmm0, (%rdi)
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v8f32_align16:
; AVX512:       # %bb.0:
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [-3.0E+0,-4.0E+0,-5.0E+0,-6.0E+0]
; AVX512-NEXT:    vmovntps %xmm0, 16(%rdi)
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [0.0E+0,-0.0E+0,-1.0E+0,-2.0E+0]
; AVX512-NEXT:    vmovntps %xmm0, (%rdi)
; AVX512-NEXT:    retq
  store <8 x float> <float 0.0, float -0.0, float -1.0, float -2.0, float -3.0, float -4.0, float -5.0, float -6.0>, ptr %dst, align 16, !nontemporal !1
  ret void
}

define void @test_constant_v4i64_align16(ptr %dst) nounwind {
; SSE-LABEL: test_constant_v4i64_align16:
; SSE:       # %bb.0:
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551614,18446744073709551613]
; SSE-NEXT:    movntps %xmm0, 16(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255]
; SSE-NEXT:    movntps %xmm0, (%rdi)
; SSE-NEXT:    retq
;
; AVX-LABEL: test_constant_v4i64_align16:
; AVX:       # %bb.0:
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551614,18446744073709551613]
; AVX-NEXT:    vmovntps %xmm0, 16(%rdi)
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255]
; AVX-NEXT:    vmovntps %xmm0, (%rdi)
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v4i64_align16:
; AVX512:       # %bb.0:
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551614,18446744073709551613]
; AVX512-NEXT:    vmovntps %xmm0, 16(%rdi)
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255]
; AVX512-NEXT:    vmovntps %xmm0, (%rdi)
; AVX512-NEXT:    retq
  store <4 x i64> <i64 0, i64 -1, i64 -2, i64 -3>, ptr %dst, align 16, !nontemporal !1
  ret void
}

define void @test_constant_v8i32_align16(ptr %dst) nounwind {
; SSE-LABEL: test_constant_v8i32_align16:
; SSE:       # %bb.0:
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967292,4294967291,4294967290,4294967289]
; SSE-NEXT:    movntps %xmm0, 16(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,4294967295,4294967294,4294967293]
; SSE-NEXT:    movntps %xmm0, (%rdi)
; SSE-NEXT:    retq
;
; AVX-LABEL: test_constant_v8i32_align16:
; AVX:       # %bb.0:
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967292,4294967291,4294967290,4294967289]
; AVX-NEXT:    vmovntps %xmm0, 16(%rdi)
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0,4294967295,4294967294,4294967293]
; AVX-NEXT:    vmovntps %xmm0, (%rdi)
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v8i32_align16:
; AVX512:       # %bb.0:
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967292,4294967291,4294967290,4294967289]
; AVX512-NEXT:    vmovntps %xmm0, 16(%rdi)
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [0,4294967295,4294967294,4294967293]
; AVX512-NEXT:    vmovntps %xmm0, (%rdi)
; AVX512-NEXT:    retq
  store <8 x i32> <i32 0, i32 -1, i32 -2, i32 -3, i32 -4, i32 -5, i32 -6, i32 -7>, ptr %dst, align 16, !nontemporal !1
  ret void
}

define void @test_constant_v16i16_align16(ptr %dst) nounwind {
; SSE-LABEL: test_constant_v16i16_align16:
; SSE:       # %bb.0:
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65528,65527,65526,65525,65524,65523,65522,65521]
; SSE-NEXT:    movntps %xmm0, 16(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,65535,65534,65533,65532,65531,65530,65529]
; SSE-NEXT:    movntps %xmm0, (%rdi)
; SSE-NEXT:    retq
;
; AVX-LABEL: test_constant_v16i16_align16:
; AVX:       # %bb.0:
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [65528,65527,65526,65525,65524,65523,65522,65521]
; AVX-NEXT:    vmovntps %xmm0, 16(%rdi)
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0,65535,65534,65533,65532,65531,65530,65529]
; AVX-NEXT:    vmovntps %xmm0, (%rdi)
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v16i16_align16:
; AVX512:       # %bb.0:
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [65528,65527,65526,65525,65524,65523,65522,65521]
; AVX512-NEXT:    vmovntps %xmm0, 16(%rdi)
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [0,65535,65534,65533,65532,65531,65530,65529]
; AVX512-NEXT:    vmovntps %xmm0, (%rdi)
; AVX512-NEXT:    retq
  store <16 x i16> <i16 0, i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -8, i16 -9, i16 -10, i16 -11, i16 -12, i16 -13, i16 -14, i16 -15>, ptr %dst, align 16, !nontemporal !1
  ret void
}

define void @test_constant_v32i8_align16(ptr %dst) nounwind {
; SSE-LABEL: test_constant_v32i8_align16:
; SSE:       # %bb.0:
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [240,239,238,237,236,235,234,233,232,231,230,229,228,227,226,225]
; SSE-NEXT:    movntps %xmm0, 16(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241]
; SSE-NEXT:    movntps %xmm0, (%rdi)
; SSE-NEXT:    retq
;
; AVX-LABEL: test_constant_v32i8_align16:
; AVX:       # %bb.0:
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [240,239,238,237,236,235,234,233,232,231,230,229,228,227,226,225]
; AVX-NEXT:    vmovntps %xmm0, 16(%rdi)
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241]
; AVX-NEXT:    vmovntps %xmm0, (%rdi)
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v32i8_align16:
; AVX512:       # %bb.0:
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [240,239,238,237,236,235,234,233,232,231,230,229,228,227,226,225]
; AVX512-NEXT:    vmovntps %xmm0, 16(%rdi)
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241]
; AVX512-NEXT:    vmovntps %xmm0, (%rdi)
; AVX512-NEXT:    retq
  store <32 x i8> <i8 0, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 -17, i8 -18, i8 -19, i8 -20, i8 -21, i8 -22, i8 -23, i8 -24, i8 -25, i8 -26, i8 -27, i8 -28, i8 -29, i8 -30, i8 -31>, ptr %dst, align 16, !nontemporal !1
  ret void
}

; ZMM versions.

define void @test_constant_v8f64_align1(ptr %dst) nounwind {
; CHECK-LABEL: test_constant_v8f64_align1:
; CHECK:       # %bb.0:
; CHECK-NEXT:    movabsq $-4616189618054758400, %rax # imm = 0xBFF0000000000000
; CHECK-NEXT:    movntiq %rax, 8(%rdi)
; CHECK-NEXT:    movabsq $-4611686018427387904, %rax # imm = 0xC000000000000000
; CHECK-NEXT:    movntiq %rax, (%rdi)
; CHECK-NEXT:    movabsq $4607182418800017408, %rax # imm = 0x3FF0000000000000
; CHECK-NEXT:    movntiq %rax, 24(%rdi)
; CHECK-NEXT:    movabsq $4613937818241073152, %rax # imm = 0x4008000000000000
; CHECK-NEXT:    movntiq %rax, 40(%rdi)
; CHECK-NEXT:    movabsq $4611686018427387904, %rax # imm = 0x4000000000000000
; CHECK-NEXT:    movntiq %rax, 32(%rdi)
; CHECK-NEXT:    movabsq $4617315517961601024, %rax # imm = 0x4014000000000000
; CHECK-NEXT:    movntiq %rax, 56(%rdi)
; CHECK-NEXT:    movabsq $4616189618054758400, %rax # imm = 0x4010000000000000
; CHECK-NEXT:    movntiq %rax, 48(%rdi)
; CHECK-NEXT:    xorl %eax, %eax
; CHECK-NEXT:    movntiq %rax, 16(%rdi)
; CHECK-NEXT:    retq
  store <8 x double> <double -2.0, double -1.0, double 0.0, double 1.0, double 2.0, double 3.0, double 4.0, double 5.0>, ptr %dst, align 1, !nontemporal !1
  ret void
}

define void @test_constant_v16f32_align1(ptr %dst) nounwind {
; SSE2-LABEL: test_constant_v16f32_align1:
; SSE2:       # %bb.0:
; SSE2-NEXT:    movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000
; SSE2-NEXT:    movntiq %rax, 8(%rdi)
; SSE2-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; SSE2-NEXT:    movntiq %rax, (%rdi)
; SSE2-NEXT:    movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000
; SSE2-NEXT:    movntiq %rax, 24(%rdi)
; SSE2-NEXT:    movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000
; SSE2-NEXT:    movntiq %rax, 16(%rdi)
; SSE2-NEXT:    movabsq $-4530621221895667712, %rax # imm = 0xC1200000C1100000
; SSE2-NEXT:    movntiq %rax, 40(%rdi)
; SSE2-NEXT:    movabsq $-4539628421153554432, %rax # imm = 0xC1000000C0E00000
; SSE2-NEXT:    movntiq %rax, 32(%rdi)
; SSE2-NEXT:    movabsq $-4512606823381991424, %rax # imm = 0xC1600000C1500000
; SSE2-NEXT:    movntiq %rax, 56(%rdi)
; SSE2-NEXT:    movabsq $-4521614022638829568, %rax # imm = 0xC1400000C1300000
; SSE2-NEXT:    movntiq %rax, 48(%rdi)
; SSE2-NEXT:    retq
;
; SSE4A-LABEL: test_constant_v16f32_align1:
; SSE4A:       # %bb.0:
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [-0.0E+0,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 24(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [-5.1200036668777466E+2,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, 16(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 40(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [-1.3107209417724609E+5,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, 32(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 56(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [-2.0971535092773438E+6,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, 48(%rdi)
; SSE4A-NEXT:    retq
;
; SSE41-LABEL: test_constant_v16f32_align1:
; SSE41:       # %bb.0:
; SSE41-NEXT:    movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000
; SSE41-NEXT:    movntiq %rax, 8(%rdi)
; SSE41-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; SSE41-NEXT:    movntiq %rax, (%rdi)
; SSE41-NEXT:    movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000
; SSE41-NEXT:    movntiq %rax, 24(%rdi)
; SSE41-NEXT:    movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000
; SSE41-NEXT:    movntiq %rax, 16(%rdi)
; SSE41-NEXT:    movabsq $-4530621221895667712, %rax # imm = 0xC1200000C1100000
; SSE41-NEXT:    movntiq %rax, 40(%rdi)
; SSE41-NEXT:    movabsq $-4539628421153554432, %rax # imm = 0xC1000000C0E00000
; SSE41-NEXT:    movntiq %rax, 32(%rdi)
; SSE41-NEXT:    movabsq $-4512606823381991424, %rax # imm = 0xC1600000C1500000
; SSE41-NEXT:    movntiq %rax, 56(%rdi)
; SSE41-NEXT:    movabsq $-4521614022638829568, %rax # imm = 0xC1400000C1300000
; SSE41-NEXT:    movntiq %rax, 48(%rdi)
; SSE41-NEXT:    retq
;
; AVX-LABEL: test_constant_v16f32_align1:
; AVX:       # %bb.0:
; AVX-NEXT:    movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000
; AVX-NEXT:    movntiq %rax, 8(%rdi)
; AVX-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; AVX-NEXT:    movntiq %rax, (%rdi)
; AVX-NEXT:    movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000
; AVX-NEXT:    movntiq %rax, 24(%rdi)
; AVX-NEXT:    movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000
; AVX-NEXT:    movntiq %rax, 16(%rdi)
; AVX-NEXT:    movabsq $-4530621221895667712, %rax # imm = 0xC1200000C1100000
; AVX-NEXT:    movntiq %rax, 40(%rdi)
; AVX-NEXT:    movabsq $-4539628421153554432, %rax # imm = 0xC1000000C0E00000
; AVX-NEXT:    movntiq %rax, 32(%rdi)
; AVX-NEXT:    movabsq $-4512606823381991424, %rax # imm = 0xC1600000C1500000
; AVX-NEXT:    movntiq %rax, 56(%rdi)
; AVX-NEXT:    movabsq $-4521614022638829568, %rax # imm = 0xC1400000C1300000
; AVX-NEXT:    movntiq %rax, 48(%rdi)
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v16f32_align1:
; AVX512:       # %bb.0:
; AVX512-NEXT:    movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000
; AVX512-NEXT:    movntiq %rax, 8(%rdi)
; AVX512-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; AVX512-NEXT:    movntiq %rax, (%rdi)
; AVX512-NEXT:    movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000
; AVX512-NEXT:    movntiq %rax, 24(%rdi)
; AVX512-NEXT:    movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000
; AVX512-NEXT:    movntiq %rax, 16(%rdi)
; AVX512-NEXT:    movabsq $-4530621221895667712, %rax # imm = 0xC1200000C1100000
; AVX512-NEXT:    movntiq %rax, 40(%rdi)
; AVX512-NEXT:    movabsq $-4539628421153554432, %rax # imm = 0xC1000000C0E00000
; AVX512-NEXT:    movntiq %rax, 32(%rdi)
; AVX512-NEXT:    movabsq $-4512606823381991424, %rax # imm = 0xC1600000C1500000
; AVX512-NEXT:    movntiq %rax, 56(%rdi)
; AVX512-NEXT:    movabsq $-4521614022638829568, %rax # imm = 0xC1400000C1300000
; AVX512-NEXT:    movntiq %rax, 48(%rdi)
; AVX512-NEXT:    retq
  store <16 x float> <float 0.0, float -0.0, float -1.0, float -2.0, float -3.0, float -4.0, float -5.0, float -6.0, float -7.0, float -8.0, float -9.0, float -10.0, float -11.0, float -12.0, float -13.0, float -14.0>, ptr %dst, align 1, !nontemporal !1
  ret void
}

define void @test_constant_v8i64_align1(ptr %dst) nounwind {
; SSE2-LABEL: test_constant_v8i64_align1:
; SSE2:       # %bb.0:
; SSE2-NEXT:    movq $-1, %rax
; SSE2-NEXT:    movntiq %rax, 8(%rdi)
; SSE2-NEXT:    movq $-3, %rax
; SSE2-NEXT:    movntiq %rax, 24(%rdi)
; SSE2-NEXT:    movq $-2, %rax
; SSE2-NEXT:    movntiq %rax, 16(%rdi)
; SSE2-NEXT:    movq $-5, %rax
; SSE2-NEXT:    movntiq %rax, 40(%rdi)
; SSE2-NEXT:    movq $-4, %rax
; SSE2-NEXT:    movntiq %rax, 32(%rdi)
; SSE2-NEXT:    movq $-7, %rax
; SSE2-NEXT:    movntiq %rax, 56(%rdi)
; SSE2-NEXT:    movq $-6, %rax
; SSE2-NEXT:    movntiq %rax, 48(%rdi)
; SSE2-NEXT:    xorl %eax, %eax
; SSE2-NEXT:    movntiq %rax, (%rdi)
; SSE2-NEXT:    retq
;
; SSE4A-LABEL: test_constant_v8i64_align1:
; SSE4A:       # %bb.0:
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
; SSE4A-NEXT:    xorl %eax, %eax
; SSE4A-NEXT:    movntiq %rax, (%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 24(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, 16(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 40(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, 32(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 56(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, 48(%rdi)
; SSE4A-NEXT:    retq
;
; SSE41-LABEL: test_constant_v8i64_align1:
; SSE41:       # %bb.0:
; SSE41-NEXT:    movq $-1, %rax
; SSE41-NEXT:    movntiq %rax, 8(%rdi)
; SSE41-NEXT:    movq $-3, %rax
; SSE41-NEXT:    movntiq %rax, 24(%rdi)
; SSE41-NEXT:    movq $-2, %rax
; SSE41-NEXT:    movntiq %rax, 16(%rdi)
; SSE41-NEXT:    movq $-5, %rax
; SSE41-NEXT:    movntiq %rax, 40(%rdi)
; SSE41-NEXT:    movq $-4, %rax
; SSE41-NEXT:    movntiq %rax, 32(%rdi)
; SSE41-NEXT:    movq $-7, %rax
; SSE41-NEXT:    movntiq %rax, 56(%rdi)
; SSE41-NEXT:    movq $-6, %rax
; SSE41-NEXT:    movntiq %rax, 48(%rdi)
; SSE41-NEXT:    xorl %eax, %eax
; SSE41-NEXT:    movntiq %rax, (%rdi)
; SSE41-NEXT:    retq
;
; AVX-LABEL: test_constant_v8i64_align1:
; AVX:       # %bb.0:
; AVX-NEXT:    movq $-1, %rax
; AVX-NEXT:    movntiq %rax, 8(%rdi)
; AVX-NEXT:    movq $-3, %rax
; AVX-NEXT:    movntiq %rax, 24(%rdi)
; AVX-NEXT:    movq $-2, %rax
; AVX-NEXT:    movntiq %rax, 16(%rdi)
; AVX-NEXT:    movq $-5, %rax
; AVX-NEXT:    movntiq %rax, 40(%rdi)
; AVX-NEXT:    movq $-4, %rax
; AVX-NEXT:    movntiq %rax, 32(%rdi)
; AVX-NEXT:    movq $-7, %rax
; AVX-NEXT:    movntiq %rax, 56(%rdi)
; AVX-NEXT:    movq $-6, %rax
; AVX-NEXT:    movntiq %rax, 48(%rdi)
; AVX-NEXT:    xorl %eax, %eax
; AVX-NEXT:    movntiq %rax, (%rdi)
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v8i64_align1:
; AVX512:       # %bb.0:
; AVX512-NEXT:    movq $-1, %rax
; AVX512-NEXT:    movntiq %rax, 8(%rdi)
; AVX512-NEXT:    movq $-3, %rax
; AVX512-NEXT:    movntiq %rax, 24(%rdi)
; AVX512-NEXT:    movq $-2, %rax
; AVX512-NEXT:    movntiq %rax, 16(%rdi)
; AVX512-NEXT:    movq $-5, %rax
; AVX512-NEXT:    movntiq %rax, 40(%rdi)
; AVX512-NEXT:    movq $-4, %rax
; AVX512-NEXT:    movntiq %rax, 32(%rdi)
; AVX512-NEXT:    movq $-7, %rax
; AVX512-NEXT:    movntiq %rax, 56(%rdi)
; AVX512-NEXT:    movq $-6, %rax
; AVX512-NEXT:    movntiq %rax, 48(%rdi)
; AVX512-NEXT:    xorl %eax, %eax
; AVX512-NEXT:    movntiq %rax, (%rdi)
; AVX512-NEXT:    retq
  store <8 x i64> <i64 0, i64 -1, i64 -2, i64 -3, i64 -4, i64 -5, i64 -6, i64 -7>, ptr %dst, align 1, !nontemporal !1
  ret void
}

define void @test_constant_v16i32_align1(ptr %dst) nounwind {
; SSE2-LABEL: test_constant_v16i32_align1:
; SSE2:       # %bb.0:
; SSE2-NEXT:    movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE
; SSE2-NEXT:    movntiq %rax, 8(%rdi)
; SSE2-NEXT:    movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
; SSE2-NEXT:    movntiq %rax, (%rdi)
; SSE2-NEXT:    movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA
; SSE2-NEXT:    movntiq %rax, 24(%rdi)
; SSE2-NEXT:    movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC
; SSE2-NEXT:    movntiq %rax, 16(%rdi)
; SSE2-NEXT:    movabsq $-42949672970, %rax # imm = 0xFFFFFFF5FFFFFFF6
; SSE2-NEXT:    movntiq %rax, 40(%rdi)
; SSE2-NEXT:    movabsq $-34359738376, %rax # imm = 0xFFFFFFF7FFFFFFF8
; SSE2-NEXT:    movntiq %rax, 32(%rdi)
; SSE2-NEXT:    movabsq $-60129542158, %rax # imm = 0xFFFFFFF1FFFFFFF2
; SSE2-NEXT:    movntiq %rax, 56(%rdi)
; SSE2-NEXT:    movabsq $-51539607564, %rax # imm = 0xFFFFFFF3FFFFFFF4
; SSE2-NEXT:    movntiq %rax, 48(%rdi)
; SSE2-NEXT:    retq
;
; SSE4A-LABEL: test_constant_v16i32_align1:
; SSE4A:       # %bb.0:
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 24(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, 16(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 40(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, 32(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 56(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, 48(%rdi)
; SSE4A-NEXT:    retq
;
; SSE41-LABEL: test_constant_v16i32_align1:
; SSE41:       # %bb.0:
; SSE41-NEXT:    movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE
; SSE41-NEXT:    movntiq %rax, 8(%rdi)
; SSE41-NEXT:    movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
; SSE41-NEXT:    movntiq %rax, (%rdi)
; SSE41-NEXT:    movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA
; SSE41-NEXT:    movntiq %rax, 24(%rdi)
; SSE41-NEXT:    movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC
; SSE41-NEXT:    movntiq %rax, 16(%rdi)
; SSE41-NEXT:    movabsq $-42949672970, %rax # imm = 0xFFFFFFF5FFFFFFF6
; SSE41-NEXT:    movntiq %rax, 40(%rdi)
; SSE41-NEXT:    movabsq $-34359738376, %rax # imm = 0xFFFFFFF7FFFFFFF8
; SSE41-NEXT:    movntiq %rax, 32(%rdi)
; SSE41-NEXT:    movabsq $-60129542158, %rax # imm = 0xFFFFFFF1FFFFFFF2
; SSE41-NEXT:    movntiq %rax, 56(%rdi)
; SSE41-NEXT:    movabsq $-51539607564, %rax # imm = 0xFFFFFFF3FFFFFFF4
; SSE41-NEXT:    movntiq %rax, 48(%rdi)
; SSE41-NEXT:    retq
;
; AVX-LABEL: test_constant_v16i32_align1:
; AVX:       # %bb.0:
; AVX-NEXT:    movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE
; AVX-NEXT:    movntiq %rax, 8(%rdi)
; AVX-NEXT:    movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
; AVX-NEXT:    movntiq %rax, (%rdi)
; AVX-NEXT:    movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA
; AVX-NEXT:    movntiq %rax, 24(%rdi)
; AVX-NEXT:    movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC
; AVX-NEXT:    movntiq %rax, 16(%rdi)
; AVX-NEXT:    movabsq $-42949672970, %rax # imm = 0xFFFFFFF5FFFFFFF6
; AVX-NEXT:    movntiq %rax, 40(%rdi)
; AVX-NEXT:    movabsq $-34359738376, %rax # imm = 0xFFFFFFF7FFFFFFF8
; AVX-NEXT:    movntiq %rax, 32(%rdi)
; AVX-NEXT:    movabsq $-60129542158, %rax # imm = 0xFFFFFFF1FFFFFFF2
; AVX-NEXT:    movntiq %rax, 56(%rdi)
; AVX-NEXT:    movabsq $-51539607564, %rax # imm = 0xFFFFFFF3FFFFFFF4
; AVX-NEXT:    movntiq %rax, 48(%rdi)
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v16i32_align1:
; AVX512:       # %bb.0:
; AVX512-NEXT:    movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE
; AVX512-NEXT:    movntiq %rax, 8(%rdi)
; AVX512-NEXT:    movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
; AVX512-NEXT:    movntiq %rax, (%rdi)
; AVX512-NEXT:    movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA
; AVX512-NEXT:    movntiq %rax, 24(%rdi)
; AVX512-NEXT:    movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC
; AVX512-NEXT:    movntiq %rax, 16(%rdi)
; AVX512-NEXT:    movabsq $-42949672970, %rax # imm = 0xFFFFFFF5FFFFFFF6
; AVX512-NEXT:    movntiq %rax, 40(%rdi)
; AVX512-NEXT:    movabsq $-34359738376, %rax # imm = 0xFFFFFFF7FFFFFFF8
; AVX512-NEXT:    movntiq %rax, 32(%rdi)
; AVX512-NEXT:    movabsq $-60129542158, %rax # imm = 0xFFFFFFF1FFFFFFF2
; AVX512-NEXT:    movntiq %rax, 56(%rdi)
; AVX512-NEXT:    movabsq $-51539607564, %rax # imm = 0xFFFFFFF3FFFFFFF4
; AVX512-NEXT:    movntiq %rax, 48(%rdi)
; AVX512-NEXT:    retq
  store <16 x i32> <i32 0, i32 -1, i32 -2, i32 -3, i32 -4, i32 -5, i32 -6, i32 -7, i32 -8, i32 -9, i32 -10, i32 -11, i32 -12, i32 -13, i32 -14, i32 -15>, ptr %dst, align 1, !nontemporal !1
  ret void
}

define void @test_constant_v32i16_align1(ptr %dst) nounwind {
; SSE2-LABEL: test_constant_v32i16_align1:
; SSE2:       # %bb.0:
; SSE2-NEXT:    movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC
; SSE2-NEXT:    movntiq %rax, 8(%rdi)
; SSE2-NEXT:    movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000
; SSE2-NEXT:    movntiq %rax, (%rdi)
; SSE2-NEXT:    movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4
; SSE2-NEXT:    movntiq %rax, 24(%rdi)
; SSE2-NEXT:    movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8
; SSE2-NEXT:    movntiq %rax, 16(%rdi)
; SSE2-NEXT:    movabsq $-6192539683258388, %rax # imm = 0xFFE9FFEAFFEBFFEC
; SSE2-NEXT:    movntiq %rax, 40(%rdi)
; SSE2-NEXT:    movabsq $-5066622596284432, %rax # imm = 0xFFEDFFEEFFEFFFF0
; SSE2-NEXT:    movntiq %rax, 32(%rdi)
; SSE2-NEXT:    movabsq $-8444373857206300, %rax # imm = 0xFFE1FFE2FFE3FFE4
; SSE2-NEXT:    movntiq %rax, 56(%rdi)
; SSE2-NEXT:    movabsq $-7318456770232344, %rax # imm = 0xFFE5FFE6FFE7FFE8
; SSE2-NEXT:    movntiq %rax, 48(%rdi)
; SSE2-NEXT:    retq
;
; SSE4A-LABEL: test_constant_v32i16_align1:
; SSE4A:       # %bb.0:
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 24(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, 16(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 40(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [-1.6853227412070812E+308,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, 32(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 56(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [-1.2358925997317751E+308,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, 48(%rdi)
; SSE4A-NEXT:    retq
;
; SSE41-LABEL: test_constant_v32i16_align1:
; SSE41:       # %bb.0:
; SSE41-NEXT:    movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC
; SSE41-NEXT:    movntiq %rax, 8(%rdi)
; SSE41-NEXT:    movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000
; SSE41-NEXT:    movntiq %rax, (%rdi)
; SSE41-NEXT:    movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4
; SSE41-NEXT:    movntiq %rax, 24(%rdi)
; SSE41-NEXT:    movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8
; SSE41-NEXT:    movntiq %rax, 16(%rdi)
; SSE41-NEXT:    movabsq $-6192539683258388, %rax # imm = 0xFFE9FFEAFFEBFFEC
; SSE41-NEXT:    movntiq %rax, 40(%rdi)
; SSE41-NEXT:    movabsq $-5066622596284432, %rax # imm = 0xFFEDFFEEFFEFFFF0
; SSE41-NEXT:    movntiq %rax, 32(%rdi)
; SSE41-NEXT:    movabsq $-8444373857206300, %rax # imm = 0xFFE1FFE2FFE3FFE4
; SSE41-NEXT:    movntiq %rax, 56(%rdi)
; SSE41-NEXT:    movabsq $-7318456770232344, %rax # imm = 0xFFE5FFE6FFE7FFE8
; SSE41-NEXT:    movntiq %rax, 48(%rdi)
; SSE41-NEXT:    retq
;
; AVX-LABEL: test_constant_v32i16_align1:
; AVX:       # %bb.0:
; AVX-NEXT:    movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC
; AVX-NEXT:    movntiq %rax, 8(%rdi)
; AVX-NEXT:    movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000
; AVX-NEXT:    movntiq %rax, (%rdi)
; AVX-NEXT:    movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4
; AVX-NEXT:    movntiq %rax, 24(%rdi)
; AVX-NEXT:    movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8
; AVX-NEXT:    movntiq %rax, 16(%rdi)
; AVX-NEXT:    movabsq $-6192539683258388, %rax # imm = 0xFFE9FFEAFFEBFFEC
; AVX-NEXT:    movntiq %rax, 40(%rdi)
; AVX-NEXT:    movabsq $-5066622596284432, %rax # imm = 0xFFEDFFEEFFEFFFF0
; AVX-NEXT:    movntiq %rax, 32(%rdi)
; AVX-NEXT:    movabsq $-8444373857206300, %rax # imm = 0xFFE1FFE2FFE3FFE4
; AVX-NEXT:    movntiq %rax, 56(%rdi)
; AVX-NEXT:    movabsq $-7318456770232344, %rax # imm = 0xFFE5FFE6FFE7FFE8
; AVX-NEXT:    movntiq %rax, 48(%rdi)
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v32i16_align1:
; AVX512:       # %bb.0:
; AVX512-NEXT:    movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC
; AVX512-NEXT:    movntiq %rax, 8(%rdi)
; AVX512-NEXT:    movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000
; AVX512-NEXT:    movntiq %rax, (%rdi)
; AVX512-NEXT:    movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4
; AVX512-NEXT:    movntiq %rax, 24(%rdi)
; AVX512-NEXT:    movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8
; AVX512-NEXT:    movntiq %rax, 16(%rdi)
; AVX512-NEXT:    movabsq $-6192539683258388, %rax # imm = 0xFFE9FFEAFFEBFFEC
; AVX512-NEXT:    movntiq %rax, 40(%rdi)
; AVX512-NEXT:    movabsq $-5066622596284432, %rax # imm = 0xFFEDFFEEFFEFFFF0
; AVX512-NEXT:    movntiq %rax, 32(%rdi)
; AVX512-NEXT:    movabsq $-8444373857206300, %rax # imm = 0xFFE1FFE2FFE3FFE4
; AVX512-NEXT:    movntiq %rax, 56(%rdi)
; AVX512-NEXT:    movabsq $-7318456770232344, %rax # imm = 0xFFE5FFE6FFE7FFE8
; AVX512-NEXT:    movntiq %rax, 48(%rdi)
; AVX512-NEXT:    retq
  store <32 x i16> <i16 0, i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -8, i16 -9, i16 -10, i16 -11, i16 -12, i16 -13, i16 -14, i16 -15, i16 -16, i16 -17, i16 -18, i16 -19, i16 -20, i16 -21, i16 -22, i16 -23, i16 -24, i16 -25, i16 -26, i16 -27, i16 -28, i16 -29, i16 -30, i16 -31>, ptr %dst, align 1, !nontemporal !1
  ret void
}

define void @test_constant_v64i8_align1(ptr %dst) nounwind {
; SSE2-LABEL: test_constant_v64i8_align1:
; SSE2:       # %bb.0:
; SSE2-NEXT:    movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8
; SSE2-NEXT:    movntiq %rax, 8(%rdi)
; SSE2-NEXT:    movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00
; SSE2-NEXT:    movntiq %rax, (%rdi)
; SSE2-NEXT:    movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8
; SSE2-NEXT:    movntiq %rax, 24(%rdi)
; SSE2-NEXT:    movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0
; SSE2-NEXT:    movntiq %rax, 16(%rdi)
; SSE2-NEXT:    movabsq $-3327364263599220776, %rax # imm = 0xD1D2D3D4D5D6D7D8
; SSE2-NEXT:    movntiq %rax, 40(%rdi)
; SSE2-NEXT:    movabsq $-2748642880894607392, %rax # imm = 0xD9DADBDCDDDEDFE0
; SSE2-NEXT:    movntiq %rax, 32(%rdi)
; SSE2-NEXT:    movabsq $-4484807029008447544, %rax # imm = 0xC1C2C3C4C5C6C7C8
; SSE2-NEXT:    movntiq %rax, 56(%rdi)
; SSE2-NEXT:    movabsq $-3906085646303834160, %rax # imm = 0xC9CACBCCCDCECFD0
; SSE2-NEXT:    movntiq %rax, 48(%rdi)
; SSE2-NEXT:    retq
;
; SSE4A-LABEL: test_constant_v64i8_align1:
; SSE4A:       # %bb.0:
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 8(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [-3.826728214441238E+279,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 24(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [-1.6485712323024388E+202,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, 16(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 40(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [-7.1020783099933495E+124,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, 32(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT:    movntsd %xmm0, 56(%rdi)
; SSE4A-NEXT:    movsd {{.*#+}} xmm0 = [-3.0595730451167367E+47,0.0E+0]
; SSE4A-NEXT:    movntsd %xmm0, 48(%rdi)
; SSE4A-NEXT:    retq
;
; SSE41-LABEL: test_constant_v64i8_align1:
; SSE41:       # %bb.0:
; SSE41-NEXT:    movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8
; SSE41-NEXT:    movntiq %rax, 8(%rdi)
; SSE41-NEXT:    movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00
; SSE41-NEXT:    movntiq %rax, (%rdi)
; SSE41-NEXT:    movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8
; SSE41-NEXT:    movntiq %rax, 24(%rdi)
; SSE41-NEXT:    movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0
; SSE41-NEXT:    movntiq %rax, 16(%rdi)
; SSE41-NEXT:    movabsq $-3327364263599220776, %rax # imm = 0xD1D2D3D4D5D6D7D8
; SSE41-NEXT:    movntiq %rax, 40(%rdi)
; SSE41-NEXT:    movabsq $-2748642880894607392, %rax # imm = 0xD9DADBDCDDDEDFE0
; SSE41-NEXT:    movntiq %rax, 32(%rdi)
; SSE41-NEXT:    movabsq $-4484807029008447544, %rax # imm = 0xC1C2C3C4C5C6C7C8
; SSE41-NEXT:    movntiq %rax, 56(%rdi)
; SSE41-NEXT:    movabsq $-3906085646303834160, %rax # imm = 0xC9CACBCCCDCECFD0
; SSE41-NEXT:    movntiq %rax, 48(%rdi)
; SSE41-NEXT:    retq
;
; AVX-LABEL: test_constant_v64i8_align1:
; AVX:       # %bb.0:
; AVX-NEXT:    movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8
; AVX-NEXT:    movntiq %rax, 8(%rdi)
; AVX-NEXT:    movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00
; AVX-NEXT:    movntiq %rax, (%rdi)
; AVX-NEXT:    movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8
; AVX-NEXT:    movntiq %rax, 24(%rdi)
; AVX-NEXT:    movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0
; AVX-NEXT:    movntiq %rax, 16(%rdi)
; AVX-NEXT:    movabsq $-3327364263599220776, %rax # imm = 0xD1D2D3D4D5D6D7D8
; AVX-NEXT:    movntiq %rax, 40(%rdi)
; AVX-NEXT:    movabsq $-2748642880894607392, %rax # imm = 0xD9DADBDCDDDEDFE0
; AVX-NEXT:    movntiq %rax, 32(%rdi)
; AVX-NEXT:    movabsq $-4484807029008447544, %rax # imm = 0xC1C2C3C4C5C6C7C8
; AVX-NEXT:    movntiq %rax, 56(%rdi)
; AVX-NEXT:    movabsq $-3906085646303834160, %rax # imm = 0xC9CACBCCCDCECFD0
; AVX-NEXT:    movntiq %rax, 48(%rdi)
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v64i8_align1:
; AVX512:       # %bb.0:
; AVX512-NEXT:    movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8
; AVX512-NEXT:    movntiq %rax, 8(%rdi)
; AVX512-NEXT:    movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00
; AVX512-NEXT:    movntiq %rax, (%rdi)
; AVX512-NEXT:    movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8
; AVX512-NEXT:    movntiq %rax, 24(%rdi)
; AVX512-NEXT:    movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0
; AVX512-NEXT:    movntiq %rax, 16(%rdi)
; AVX512-NEXT:    movabsq $-3327364263599220776, %rax # imm = 0xD1D2D3D4D5D6D7D8
; AVX512-NEXT:    movntiq %rax, 40(%rdi)
; AVX512-NEXT:    movabsq $-2748642880894607392, %rax # imm = 0xD9DADBDCDDDEDFE0
; AVX512-NEXT:    movntiq %rax, 32(%rdi)
; AVX512-NEXT:    movabsq $-4484807029008447544, %rax # imm = 0xC1C2C3C4C5C6C7C8
; AVX512-NEXT:    movntiq %rax, 56(%rdi)
; AVX512-NEXT:    movabsq $-3906085646303834160, %rax # imm = 0xC9CACBCCCDCECFD0
; AVX512-NEXT:    movntiq %rax, 48(%rdi)
; AVX512-NEXT:    retq
  store <64 x i8> <i8 0, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 -17, i8 -18, i8 -19, i8 -20, i8 -21, i8 -22, i8 -23, i8 -24, i8 -25, i8 -26, i8 -27, i8 -28, i8 -29, i8 -30, i8 -31, i8 -32, i8 -33, i8 -34, i8 -35, i8 -36, i8 -37, i8 -38, i8 -39, i8 -40, i8 -41, i8 -42, i8 -43, i8 -44, i8 -45, i8 -46, i8 -47, i8 -48, i8 -49, i8 -50, i8 -51, i8 -52, i8 -53, i8 -54, i8 -55, i8 -56, i8 -57, i8 -58, i8 -59, i8 -60, i8 -61, i8 -62, i8 -63>, ptr %dst, align 1, !nontemporal !1
  ret void
}

define void @test_constant_v8f64_align16(ptr %dst) nounwind {
; SSE-LABEL: test_constant_v8f64_align16:
; SSE:       # %bb.0:
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [-2.0E+0,-1.0E+0]
; SSE-NEXT:    movntps %xmm0, (%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4.0E+0,5.0E+0]
; SSE-NEXT:    movntps %xmm0, 48(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [2.0E+0,3.0E+0]
; SSE-NEXT:    movntps %xmm0, 32(%rdi)
; SSE-NEXT:    xorps %xmm0, %xmm0
; SSE-NEXT:    movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; SSE-NEXT:    movntps %xmm0, 16(%rdi)
; SSE-NEXT:    retq
;
; AVX-LABEL: test_constant_v8f64_align16:
; AVX:       # %bb.0:
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-2.0E+0,-1.0E+0]
; AVX-NEXT:    vmovntps %xmm0, (%rdi)
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [4.0E+0,5.0E+0]
; AVX-NEXT:    vmovntps %xmm0, 48(%rdi)
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [2.0E+0,3.0E+0]
; AVX-NEXT:    vmovntps %xmm0, 32(%rdi)
; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT:    vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; AVX-NEXT:    vmovntps %xmm0, 16(%rdi)
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v8f64_align16:
; AVX512:       # %bb.0:
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [-2.0E+0,-1.0E+0]
; AVX512-NEXT:    vmovntps %xmm0, (%rdi)
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [4.0E+0,5.0E+0]
; AVX512-NEXT:    vmovntps %xmm0, 48(%rdi)
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [2.0E+0,3.0E+0]
; AVX512-NEXT:    vmovntps %xmm0, 32(%rdi)
; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0
; AVX512-NEXT:    vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; AVX512-NEXT:    vmovntps %xmm0, 16(%rdi)
; AVX512-NEXT:    retq
  store <8 x double> <double -2.0, double -1.0, double 0.0, double 1.0, double 2.0, double 3.0, double 4.0, double 5.0>, ptr %dst, align 16, !nontemporal !1
  ret void
}

define void @test_constant_v16f32_align16(ptr %dst) nounwind {
; SSE-LABEL: test_constant_v16f32_align16:
; SSE:       # %bb.0:
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [-3.0E+0,-4.0E+0,-5.0E+0,-6.0E+0]
; SSE-NEXT:    movntps %xmm0, 16(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0.0E+0,-0.0E+0,-1.0E+0,-2.0E+0]
; SSE-NEXT:    movntps %xmm0, (%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [-1.1E+1,-1.2E+1,-1.3E+1,-1.4E+1]
; SSE-NEXT:    movntps %xmm0, 48(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [-7.0E+0,-8.0E+0,-9.0E+0,-1.0E+1]
; SSE-NEXT:    movntps %xmm0, 32(%rdi)
; SSE-NEXT:    retq
;
; AVX-LABEL: test_constant_v16f32_align16:
; AVX:       # %bb.0:
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-3.0E+0,-4.0E+0,-5.0E+0,-6.0E+0]
; AVX-NEXT:    vmovntps %xmm0, 16(%rdi)
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0.0E+0,-0.0E+0,-1.0E+0,-2.0E+0]
; AVX-NEXT:    vmovntps %xmm0, (%rdi)
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-1.1E+1,-1.2E+1,-1.3E+1,-1.4E+1]
; AVX-NEXT:    vmovntps %xmm0, 48(%rdi)
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [-7.0E+0,-8.0E+0,-9.0E+0,-1.0E+1]
; AVX-NEXT:    vmovntps %xmm0, 32(%rdi)
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v16f32_align16:
; AVX512:       # %bb.0:
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [-3.0E+0,-4.0E+0,-5.0E+0,-6.0E+0]
; AVX512-NEXT:    vmovntps %xmm0, 16(%rdi)
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [0.0E+0,-0.0E+0,-1.0E+0,-2.0E+0]
; AVX512-NEXT:    vmovntps %xmm0, (%rdi)
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [-1.1E+1,-1.2E+1,-1.3E+1,-1.4E+1]
; AVX512-NEXT:    vmovntps %xmm0, 48(%rdi)
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [-7.0E+0,-8.0E+0,-9.0E+0,-1.0E+1]
; AVX512-NEXT:    vmovntps %xmm0, 32(%rdi)
; AVX512-NEXT:    retq
  store <16 x float> <float 0.0, float -0.0, float -1.0, float -2.0, float -3.0, float -4.0, float -5.0, float -6.0, float -7.0, float -8.0, float -9.0, float -10.0, float -11.0, float -12.0, float -13.0, float -14.0>, ptr %dst, align 16, !nontemporal !1
  ret void
}

define void @test_constant_v8i64_align16(ptr %dst) nounwind {
; SSE-LABEL: test_constant_v8i64_align16:
; SSE:       # %bb.0:
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551614,18446744073709551613]
; SSE-NEXT:    movntps %xmm0, 16(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255]
; SSE-NEXT:    movntps %xmm0, (%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551610,18446744073709551609]
; SSE-NEXT:    movntps %xmm0, 48(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551612,18446744073709551611]
; SSE-NEXT:    movntps %xmm0, 32(%rdi)
; SSE-NEXT:    retq
;
; AVX-LABEL: test_constant_v8i64_align16:
; AVX:       # %bb.0:
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551614,18446744073709551613]
; AVX-NEXT:    vmovntps %xmm0, 16(%rdi)
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255]
; AVX-NEXT:    vmovntps %xmm0, (%rdi)
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551610,18446744073709551609]
; AVX-NEXT:    vmovntps %xmm0, 48(%rdi)
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551612,18446744073709551611]
; AVX-NEXT:    vmovntps %xmm0, 32(%rdi)
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v8i64_align16:
; AVX512:       # %bb.0:
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551614,18446744073709551613]
; AVX512-NEXT:    vmovntps %xmm0, 16(%rdi)
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255]
; AVX512-NEXT:    vmovntps %xmm0, (%rdi)
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551610,18446744073709551609]
; AVX512-NEXT:    vmovntps %xmm0, 48(%rdi)
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551612,18446744073709551611]
; AVX512-NEXT:    vmovntps %xmm0, 32(%rdi)
; AVX512-NEXT:    retq
  store <8 x i64> <i64 0, i64 -1, i64 -2, i64 -3, i64 -4, i64 -5, i64 -6, i64 -7>, ptr %dst, align 16, !nontemporal !1
  ret void
}

define void @test_constant_v16i32_align16(ptr %dst) nounwind {
; SSE-LABEL: test_constant_v16i32_align16:
; SSE:       # %bb.0:
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967292,4294967291,4294967290,4294967289]
; SSE-NEXT:    movntps %xmm0, 16(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,4294967295,4294967294,4294967293]
; SSE-NEXT:    movntps %xmm0, (%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967284,4294967283,4294967282,4294967281]
; SSE-NEXT:    movntps %xmm0, 48(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967288,4294967287,4294967286,4294967285]
; SSE-NEXT:    movntps %xmm0, 32(%rdi)
; SSE-NEXT:    retq
;
; AVX-LABEL: test_constant_v16i32_align16:
; AVX:       # %bb.0:
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967292,4294967291,4294967290,4294967289]
; AVX-NEXT:    vmovntps %xmm0, 16(%rdi)
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0,4294967295,4294967294,4294967293]
; AVX-NEXT:    vmovntps %xmm0, (%rdi)
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967284,4294967283,4294967282,4294967281]
; AVX-NEXT:    vmovntps %xmm0, 48(%rdi)
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967288,4294967287,4294967286,4294967285]
; AVX-NEXT:    vmovntps %xmm0, 32(%rdi)
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v16i32_align16:
; AVX512:       # %bb.0:
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967292,4294967291,4294967290,4294967289]
; AVX512-NEXT:    vmovntps %xmm0, 16(%rdi)
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [0,4294967295,4294967294,4294967293]
; AVX512-NEXT:    vmovntps %xmm0, (%rdi)
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967284,4294967283,4294967282,4294967281]
; AVX512-NEXT:    vmovntps %xmm0, 48(%rdi)
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967288,4294967287,4294967286,4294967285]
; AVX512-NEXT:    vmovntps %xmm0, 32(%rdi)
; AVX512-NEXT:    retq
  store <16 x i32> <i32 0, i32 -1, i32 -2, i32 -3, i32 -4, i32 -5, i32 -6, i32 -7, i32 -8, i32 -9, i32 -10, i32 -11, i32 -12, i32 -13, i32 -14, i32 -15>, ptr %dst, align 16, !nontemporal !1
  ret void
}

define void @test_constant_v32i16_align16(ptr %dst) nounwind {
; SSE-LABEL: test_constant_v32i16_align16:
; SSE:       # %bb.0:
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65528,65527,65526,65525,65524,65523,65522,65521]
; SSE-NEXT:    movntps %xmm0, 16(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,65535,65534,65533,65532,65531,65530,65529]
; SSE-NEXT:    movntps %xmm0, (%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65512,65511,65510,65509,65508,65507,65506,65505]
; SSE-NEXT:    movntps %xmm0, 48(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65520,65519,65518,65517,65516,65515,65514,65513]
; SSE-NEXT:    movntps %xmm0, 32(%rdi)
; SSE-NEXT:    retq
;
; AVX-LABEL: test_constant_v32i16_align16:
; AVX:       # %bb.0:
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [65528,65527,65526,65525,65524,65523,65522,65521]
; AVX-NEXT:    vmovntps %xmm0, 16(%rdi)
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0,65535,65534,65533,65532,65531,65530,65529]
; AVX-NEXT:    vmovntps %xmm0, (%rdi)
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [65512,65511,65510,65509,65508,65507,65506,65505]
; AVX-NEXT:    vmovntps %xmm0, 48(%rdi)
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [65520,65519,65518,65517,65516,65515,65514,65513]
; AVX-NEXT:    vmovntps %xmm0, 32(%rdi)
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v32i16_align16:
; AVX512:       # %bb.0:
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [65528,65527,65526,65525,65524,65523,65522,65521]
; AVX512-NEXT:    vmovntps %xmm0, 16(%rdi)
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [0,65535,65534,65533,65532,65531,65530,65529]
; AVX512-NEXT:    vmovntps %xmm0, (%rdi)
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [65512,65511,65510,65509,65508,65507,65506,65505]
; AVX512-NEXT:    vmovntps %xmm0, 48(%rdi)
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [65520,65519,65518,65517,65516,65515,65514,65513]
; AVX512-NEXT:    vmovntps %xmm0, 32(%rdi)
; AVX512-NEXT:    retq
  store <32 x i16> <i16 0, i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -8, i16 -9, i16 -10, i16 -11, i16 -12, i16 -13, i16 -14, i16 -15, i16 -16, i16 -17, i16 -18, i16 -19, i16 -20, i16 -21, i16 -22, i16 -23, i16 -24, i16 -25, i16 -26, i16 -27, i16 -28, i16 -29, i16 -30, i16 -31>, ptr %dst, align 16, !nontemporal !1
  ret void
}

define void @test_constant_v64i8_align16(ptr %dst) nounwind {
; SSE-LABEL: test_constant_v64i8_align16:
; SSE:       # %bb.0:
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [240,239,238,237,236,235,234,233,232,231,230,229,228,227,226,225]
; SSE-NEXT:    movntps %xmm0, 16(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241]
; SSE-NEXT:    movntps %xmm0, (%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [208,207,206,205,204,203,202,201,200,199,198,197,196,195,194,193]
; SSE-NEXT:    movntps %xmm0, 48(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [224,223,222,221,220,219,218,217,216,215,214,213,212,211,210,209]
; SSE-NEXT:    movntps %xmm0, 32(%rdi)
; SSE-NEXT:    retq
;
; AVX-LABEL: test_constant_v64i8_align16:
; AVX:       # %bb.0:
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [240,239,238,237,236,235,234,233,232,231,230,229,228,227,226,225]
; AVX-NEXT:    vmovntps %xmm0, 16(%rdi)
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241]
; AVX-NEXT:    vmovntps %xmm0, (%rdi)
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [208,207,206,205,204,203,202,201,200,199,198,197,196,195,194,193]
; AVX-NEXT:    vmovntps %xmm0, 48(%rdi)
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [224,223,222,221,220,219,218,217,216,215,214,213,212,211,210,209]
; AVX-NEXT:    vmovntps %xmm0, 32(%rdi)
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v64i8_align16:
; AVX512:       # %bb.0:
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [240,239,238,237,236,235,234,233,232,231,230,229,228,227,226,225]
; AVX512-NEXT:    vmovntps %xmm0, 16(%rdi)
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241]
; AVX512-NEXT:    vmovntps %xmm0, (%rdi)
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [208,207,206,205,204,203,202,201,200,199,198,197,196,195,194,193]
; AVX512-NEXT:    vmovntps %xmm0, 48(%rdi)
; AVX512-NEXT:    vmovaps {{.*#+}} xmm0 = [224,223,222,221,220,219,218,217,216,215,214,213,212,211,210,209]
; AVX512-NEXT:    vmovntps %xmm0, 32(%rdi)
; AVX512-NEXT:    retq
  store <64 x i8> <i8 0, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 -17, i8 -18, i8 -19, i8 -20, i8 -21, i8 -22, i8 -23, i8 -24, i8 -25, i8 -26, i8 -27, i8 -28, i8 -29, i8 -30, i8 -31, i8 -32, i8 -33, i8 -34, i8 -35, i8 -36, i8 -37, i8 -38, i8 -39, i8 -40, i8 -41, i8 -42, i8 -43, i8 -44, i8 -45, i8 -46, i8 -47, i8 -48, i8 -49, i8 -50, i8 -51, i8 -52, i8 -53, i8 -54, i8 -55, i8 -56, i8 -57, i8 -58, i8 -59, i8 -60, i8 -61, i8 -62, i8 -63>, ptr %dst, align 16, !nontemporal !1
  ret void
}

define void @test_constant_v8f64_align32(ptr %dst) nounwind {
; SSE-LABEL: test_constant_v8f64_align32:
; SSE:       # %bb.0:
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4.0E+0,5.0E+0]
; SSE-NEXT:    movntps %xmm0, 48(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [2.0E+0,3.0E+0]
; SSE-NEXT:    movntps %xmm0, 32(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [-2.0E+0,-1.0E+0]
; SSE-NEXT:    movntps %xmm0, (%rdi)
; SSE-NEXT:    xorps %xmm0, %xmm0
; SSE-NEXT:    movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; SSE-NEXT:    movntps %xmm0, 16(%rdi)
; SSE-NEXT:    retq
;
; AVX-LABEL: test_constant_v8f64_align32:
; AVX:       # %bb.0:
; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [2.0E+0,3.0E+0,4.0E+0,5.0E+0]
; AVX-NEXT:    vmovntps %ymm0, 32(%rdi)
; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [-2.0E+0,-1.0E+0,0.0E+0,1.0E+0]
; AVX-NEXT:    vmovntps %ymm0, (%rdi)
; AVX-NEXT:    vzeroupper
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v8f64_align32:
; AVX512:       # %bb.0:
; AVX512-NEXT:    vmovaps {{.*#+}} ymm0 = [2.0E+0,3.0E+0,4.0E+0,5.0E+0]
; AVX512-NEXT:    vmovntps %ymm0, 32(%rdi)
; AVX512-NEXT:    vmovaps {{.*#+}} ymm0 = [-2.0E+0,-1.0E+0,0.0E+0,1.0E+0]
; AVX512-NEXT:    vmovntps %ymm0, (%rdi)
; AVX512-NEXT:    vzeroupper
; AVX512-NEXT:    retq
  store <8 x double> <double -2.0, double -1.0, double 0.0, double 1.0, double 2.0, double 3.0, double 4.0, double 5.0>, ptr %dst, align 32, !nontemporal !1
  ret void
}

define void @test_constant_v16f32_align32(ptr %dst) nounwind {
; SSE-LABEL: test_constant_v16f32_align32:
; SSE:       # %bb.0:
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [-1.1E+1,-1.2E+1,-1.3E+1,-1.4E+1]
; SSE-NEXT:    movntps %xmm0, 48(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [-7.0E+0,-8.0E+0,-9.0E+0,-1.0E+1]
; SSE-NEXT:    movntps %xmm0, 32(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [-3.0E+0,-4.0E+0,-5.0E+0,-6.0E+0]
; SSE-NEXT:    movntps %xmm0, 16(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0.0E+0,-0.0E+0,-1.0E+0,-2.0E+0]
; SSE-NEXT:    movntps %xmm0, (%rdi)
; SSE-NEXT:    retq
;
; AVX-LABEL: test_constant_v16f32_align32:
; AVX:       # %bb.0:
; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [-7.0E+0,-8.0E+0,-9.0E+0,-1.0E+1,-1.1E+1,-1.2E+1,-1.3E+1,-1.4E+1]
; AVX-NEXT:    vmovntps %ymm0, 32(%rdi)
; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0.0E+0,-0.0E+0,-1.0E+0,-2.0E+0,-3.0E+0,-4.0E+0,-5.0E+0,-6.0E+0]
; AVX-NEXT:    vmovntps %ymm0, (%rdi)
; AVX-NEXT:    vzeroupper
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v16f32_align32:
; AVX512:       # %bb.0:
; AVX512-NEXT:    vmovaps {{.*#+}} ymm0 = [-7.0E+0,-8.0E+0,-9.0E+0,-1.0E+1,-1.1E+1,-1.2E+1,-1.3E+1,-1.4E+1]
; AVX512-NEXT:    vmovntps %ymm0, 32(%rdi)
; AVX512-NEXT:    vmovaps {{.*#+}} ymm0 = [0.0E+0,-0.0E+0,-1.0E+0,-2.0E+0,-3.0E+0,-4.0E+0,-5.0E+0,-6.0E+0]
; AVX512-NEXT:    vmovntps %ymm0, (%rdi)
; AVX512-NEXT:    vzeroupper
; AVX512-NEXT:    retq
  store <16 x float> <float 0.0, float -0.0, float -1.0, float -2.0, float -3.0, float -4.0, float -5.0, float -6.0, float -7.0, float -8.0, float -9.0, float -10.0, float -11.0, float -12.0, float -13.0, float -14.0>, ptr %dst, align 32, !nontemporal !1
  ret void
}

define void @test_constant_v8i64_align32(ptr %dst) nounwind {
; SSE-LABEL: test_constant_v8i64_align32:
; SSE:       # %bb.0:
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551610,18446744073709551609]
; SSE-NEXT:    movntps %xmm0, 48(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551612,18446744073709551611]
; SSE-NEXT:    movntps %xmm0, 32(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551614,18446744073709551613]
; SSE-NEXT:    movntps %xmm0, 16(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255]
; SSE-NEXT:    movntps %xmm0, (%rdi)
; SSE-NEXT:    retq
;
; AVX-LABEL: test_constant_v8i64_align32:
; AVX:       # %bb.0:
; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [18446744073709551612,18446744073709551611,18446744073709551610,18446744073709551609]
; AVX-NEXT:    vmovntps %ymm0, 32(%rdi)
; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,18446744073709551615,18446744073709551614,18446744073709551613]
; AVX-NEXT:    vmovntps %ymm0, (%rdi)
; AVX-NEXT:    vzeroupper
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v8i64_align32:
; AVX512:       # %bb.0:
; AVX512-NEXT:    vmovaps {{.*#+}} ymm0 = [18446744073709551612,18446744073709551611,18446744073709551610,18446744073709551609]
; AVX512-NEXT:    vmovntps %ymm0, 32(%rdi)
; AVX512-NEXT:    vmovaps {{.*#+}} ymm0 = [0,18446744073709551615,18446744073709551614,18446744073709551613]
; AVX512-NEXT:    vmovntps %ymm0, (%rdi)
; AVX512-NEXT:    vzeroupper
; AVX512-NEXT:    retq
  store <8 x i64> <i64 0, i64 -1, i64 -2, i64 -3, i64 -4, i64 -5, i64 -6, i64 -7>, ptr %dst, align 32, !nontemporal !1
  ret void
}

define void @test_constant_v16i32_align32(ptr %dst) nounwind {
; SSE-LABEL: test_constant_v16i32_align32:
; SSE:       # %bb.0:
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967284,4294967283,4294967282,4294967281]
; SSE-NEXT:    movntps %xmm0, 48(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967288,4294967287,4294967286,4294967285]
; SSE-NEXT:    movntps %xmm0, 32(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967292,4294967291,4294967290,4294967289]
; SSE-NEXT:    movntps %xmm0, 16(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,4294967295,4294967294,4294967293]
; SSE-NEXT:    movntps %xmm0, (%rdi)
; SSE-NEXT:    retq
;
; AVX-LABEL: test_constant_v16i32_align32:
; AVX:       # %bb.0:
; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [4294967288,4294967287,4294967286,4294967285,4294967284,4294967283,4294967282,4294967281]
; AVX-NEXT:    vmovntps %ymm0, 32(%rdi)
; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,4294967295,4294967294,4294967293,4294967292,4294967291,4294967290,4294967289]
; AVX-NEXT:    vmovntps %ymm0, (%rdi)
; AVX-NEXT:    vzeroupper
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v16i32_align32:
; AVX512:       # %bb.0:
; AVX512-NEXT:    vmovaps {{.*#+}} ymm0 = [4294967288,4294967287,4294967286,4294967285,4294967284,4294967283,4294967282,4294967281]
; AVX512-NEXT:    vmovntps %ymm0, 32(%rdi)
; AVX512-NEXT:    vmovaps {{.*#+}} ymm0 = [0,4294967295,4294967294,4294967293,4294967292,4294967291,4294967290,4294967289]
; AVX512-NEXT:    vmovntps %ymm0, (%rdi)
; AVX512-NEXT:    vzeroupper
; AVX512-NEXT:    retq
  store <16 x i32> <i32 0, i32 -1, i32 -2, i32 -3, i32 -4, i32 -5, i32 -6, i32 -7, i32 -8, i32 -9, i32 -10, i32 -11, i32 -12, i32 -13, i32 -14, i32 -15>, ptr %dst, align 32, !nontemporal !1
  ret void
}

define void @test_constant_v32i16_align32(ptr %dst) nounwind {
; SSE-LABEL: test_constant_v32i16_align32:
; SSE:       # %bb.0:
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65512,65511,65510,65509,65508,65507,65506,65505]
; SSE-NEXT:    movntps %xmm0, 48(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65520,65519,65518,65517,65516,65515,65514,65513]
; SSE-NEXT:    movntps %xmm0, 32(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65528,65527,65526,65525,65524,65523,65522,65521]
; SSE-NEXT:    movntps %xmm0, 16(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,65535,65534,65533,65532,65531,65530,65529]
; SSE-NEXT:    movntps %xmm0, (%rdi)
; SSE-NEXT:    retq
;
; AVX-LABEL: test_constant_v32i16_align32:
; AVX:       # %bb.0:
; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [65520,65519,65518,65517,65516,65515,65514,65513,65512,65511,65510,65509,65508,65507,65506,65505]
; AVX-NEXT:    vmovntps %ymm0, 32(%rdi)
; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,65535,65534,65533,65532,65531,65530,65529,65528,65527,65526,65525,65524,65523,65522,65521]
; AVX-NEXT:    vmovntps %ymm0, (%rdi)
; AVX-NEXT:    vzeroupper
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v32i16_align32:
; AVX512:       # %bb.0:
; AVX512-NEXT:    vmovaps {{.*#+}} ymm0 = [65520,65519,65518,65517,65516,65515,65514,65513,65512,65511,65510,65509,65508,65507,65506,65505]
; AVX512-NEXT:    vmovntps %ymm0, 32(%rdi)
; AVX512-NEXT:    vmovaps {{.*#+}} ymm0 = [0,65535,65534,65533,65532,65531,65530,65529,65528,65527,65526,65525,65524,65523,65522,65521]
; AVX512-NEXT:    vmovntps %ymm0, (%rdi)
; AVX512-NEXT:    vzeroupper
; AVX512-NEXT:    retq
  store <32 x i16> <i16 0, i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 -8, i16 -9, i16 -10, i16 -11, i16 -12, i16 -13, i16 -14, i16 -15, i16 -16, i16 -17, i16 -18, i16 -19, i16 -20, i16 -21, i16 -22, i16 -23, i16 -24, i16 -25, i16 -26, i16 -27, i16 -28, i16 -29, i16 -30, i16 -31>, ptr %dst, align 32, !nontemporal !1
  ret void
}

define void @test_constant_v64i8_align32(ptr %dst) nounwind {
; SSE-LABEL: test_constant_v64i8_align32:
; SSE:       # %bb.0:
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [208,207,206,205,204,203,202,201,200,199,198,197,196,195,194,193]
; SSE-NEXT:    movntps %xmm0, 48(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [224,223,222,221,220,219,218,217,216,215,214,213,212,211,210,209]
; SSE-NEXT:    movntps %xmm0, 32(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [240,239,238,237,236,235,234,233,232,231,230,229,228,227,226,225]
; SSE-NEXT:    movntps %xmm0, 16(%rdi)
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241]
; SSE-NEXT:    movntps %xmm0, (%rdi)
; SSE-NEXT:    retq
;
; AVX-LABEL: test_constant_v64i8_align32:
; AVX:       # %bb.0:
; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [224,223,222,221,220,219,218,217,216,215,214,213,212,211,210,209,208,207,206,205,204,203,202,201,200,199,198,197,196,195,194,193]
; AVX-NEXT:    vmovntps %ymm0, 32(%rdi)
; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241,240,239,238,237,236,235,234,233,232,231,230,229,228,227,226,225]
; AVX-NEXT:    vmovntps %ymm0, (%rdi)
; AVX-NEXT:    vzeroupper
; AVX-NEXT:    retq
;
; AVX512-LABEL: test_constant_v64i8_align32:
; AVX512:       # %bb.0:
; AVX512-NEXT:    vmovaps {{.*#+}} ymm0 = [224,223,222,221,220,219,218,217,216,215,214,213,212,211,210,209,208,207,206,205,204,203,202,201,200,199,198,197,196,195,194,193]
; AVX512-NEXT:    vmovntps %ymm0, 32(%rdi)
; AVX512-NEXT:    vmovaps {{.*#+}} ymm0 = [0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241,240,239,238,237,236,235,234,233,232,231,230,229,228,227,226,225]
; AVX512-NEXT:    vmovntps %ymm0, (%rdi)
; AVX512-NEXT:    vzeroupper
; AVX512-NEXT:    retq
  store <64 x i8> <i8 0, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15, i8 -16, i8 -17, i8 -18, i8 -19, i8 -20, i8 -21, i8 -22, i8 -23, i8 -24, i8 -25, i8 -26, i8 -27, i8 -28, i8 -29, i8 -30, i8 -31, i8 -32, i8 -33, i8 -34, i8 -35, i8 -36, i8 -37, i8 -38, i8 -39, i8 -40, i8 -41, i8 -42, i8 -43, i8 -44, i8 -45, i8 -46, i8 -47, i8 -48, i8 -49, i8 -50, i8 -51, i8 -52, i8 -53, i8 -54, i8 -55, i8 -56, i8 -57, i8 -58, i8 -59, i8 -60, i8 -61, i8 -62, i8 -63>, ptr %dst, align 32, !nontemporal !1
  ret void
}

!1 = !{i32 1}