; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9 -verify-machineinstrs -mattr=cx16 | FileCheck %s --check-prefixes=CHECK,CHECK-NOAVX
; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9 -verify-machineinstrs -mattr=cx16,avx | FileCheck %s --check-prefixes=CHECK,CHECK-AVX
; Codegen of i128 without cx16 is tested in atomic-nocx16.ll
@var = global i128 0
; Due to the scheduling right after isel for cmpxchg and given the
; machine scheduler and copy coalescer do not mess up with physical
; register live-ranges, we end up with a useless copy.
define i128 @val_compare_and_swap(ptr %p, i128 %oldval, i128 %newval) {
; CHECK-LABEL: val_compare_and_swap:
; CHECK: ## %bb.0:
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset %rbx, -16
; CHECK-NEXT: movq %rcx, %rbx
; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: movq %r8, %rcx
; CHECK-NEXT: lock cmpxchg16b (%rdi)
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: retq
%pair = cmpxchg ptr %p, i128 %oldval, i128 %newval acquire acquire
%val = extractvalue { i128, i1 } %pair, 0
ret i128 %val
}
@cmpxchg16b_global = external dso_local global { i128, i128 }, align 16
;; Make sure we retain the offset of the global variable.
define i128 @load_global_with_offset() nounwind {
; CHECK-NOAVX-LABEL: load_global_with_offset:
; CHECK-NOAVX: ## %bb.0: ## %entry
; CHECK-NOAVX-NEXT: pushq %rbx
; CHECK-NOAVX-NEXT: xorl %eax, %eax
; CHECK-NOAVX-NEXT: xorl %edx, %edx
; CHECK-NOAVX-NEXT: xorl %ecx, %ecx
; CHECK-NOAVX-NEXT: xorl %ebx, %ebx
; CHECK-NOAVX-NEXT: lock cmpxchg16b _cmpxchg16b_global+16(%rip)
; CHECK-NOAVX-NEXT: popq %rbx
; CHECK-NOAVX-NEXT: retq
;
; CHECK-AVX-LABEL: load_global_with_offset:
; CHECK-AVX: ## %bb.0: ## %entry
; CHECK-AVX-NEXT: vmovdqa _cmpxchg16b_global+16(%rip), %xmm0
; CHECK-AVX-NEXT: vmovq %xmm0, %rax
; CHECK-AVX-NEXT: vpextrq $1, %xmm0, %rdx
; CHECK-AVX-NEXT: retq
entry:
%0 = load atomic i128, ptr getelementptr inbounds ({i128, i128}, ptr @cmpxchg16b_global, i64 0, i32 1) acquire, align 16
ret i128 %0
}
define void @fetch_and_nand(ptr %p, i128 %bits) {
; CHECK-LABEL: fetch_and_nand:
; CHECK: ## %bb.0:
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset %rbx, -16
; CHECK-NEXT: movq %rdx, %r8
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: movq 8(%rdi), %rdx
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: LBB2_1: ## %atomicrmw.start
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movq %rdx, %rcx
; CHECK-NEXT: andq %r8, %rcx
; CHECK-NEXT: movq %rax, %rbx
; CHECK-NEXT: andq %rsi, %rbx
; CHECK-NEXT: notq %rbx
; CHECK-NEXT: notq %rcx
; CHECK-NEXT: lock cmpxchg16b (%rdi)
; CHECK-NEXT: jne LBB2_1
; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end
; CHECK-NEXT: movq %rax, _var(%rip)
; CHECK-NEXT: movq %rdx, _var+8(%rip)
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: retq
%val = atomicrmw nand ptr %p, i128 %bits release
store i128 %val, ptr @var, align 16
ret void
}
define void @fetch_and_or(ptr %p, i128 %bits) {
; CHECK-LABEL: fetch_and_or:
; CHECK: ## %bb.0:
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset %rbx, -16
; CHECK-NEXT: movq %rdx, %r8
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: movq 8(%rdi), %rdx
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: LBB3_1: ## %atomicrmw.start
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movq %rax, %rbx
; CHECK-NEXT: orq %rsi, %rbx
; CHECK-NEXT: movq %rdx, %rcx
; CHECK-NEXT: orq %r8, %rcx
; CHECK-NEXT: lock cmpxchg16b (%rdi)
; CHECK-NEXT: jne LBB3_1
; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end
; CHECK-NEXT: movq %rax, _var(%rip)
; CHECK-NEXT: movq %rdx, _var+8(%rip)
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: retq
%val = atomicrmw or ptr %p, i128 %bits seq_cst
store i128 %val, ptr @var, align 16
ret void
}
define void @fetch_and_add(ptr %p, i128 %bits) {
; CHECK-LABEL: fetch_and_add:
; CHECK: ## %bb.0:
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset %rbx, -16
; CHECK-NEXT: movq %rdx, %r8
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: movq 8(%rdi), %rdx
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: LBB4_1: ## %atomicrmw.start
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movq %rax, %rbx
; CHECK-NEXT: addq %rsi, %rbx
; CHECK-NEXT: movq %rdx, %rcx
; CHECK-NEXT: adcq %r8, %rcx
; CHECK-NEXT: lock cmpxchg16b (%rdi)
; CHECK-NEXT: jne LBB4_1
; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end
; CHECK-NEXT: movq %rax, _var(%rip)
; CHECK-NEXT: movq %rdx, _var+8(%rip)
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: retq
%val = atomicrmw add ptr %p, i128 %bits seq_cst
store i128 %val, ptr @var, align 16
ret void
}
define void @fetch_and_sub(ptr %p, i128 %bits) {
; CHECK-LABEL: fetch_and_sub:
; CHECK: ## %bb.0:
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset %rbx, -16
; CHECK-NEXT: movq %rdx, %r8
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: movq 8(%rdi), %rdx
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: LBB5_1: ## %atomicrmw.start
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movq %rax, %rbx
; CHECK-NEXT: subq %rsi, %rbx
; CHECK-NEXT: movq %rdx, %rcx
; CHECK-NEXT: sbbq %r8, %rcx
; CHECK-NEXT: lock cmpxchg16b (%rdi)
; CHECK-NEXT: jne LBB5_1
; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end
; CHECK-NEXT: movq %rax, _var(%rip)
; CHECK-NEXT: movq %rdx, _var+8(%rip)
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: retq
%val = atomicrmw sub ptr %p, i128 %bits seq_cst
store i128 %val, ptr @var, align 16
ret void
}
define void @fetch_and_min(ptr %p, i128 %bits) {
; CHECK-LABEL: fetch_and_min:
; CHECK: ## %bb.0:
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset %rbx, -16
; CHECK-NEXT: movq %rdx, %r8
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: movq 8(%rdi), %rdx
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: LBB6_1: ## %atomicrmw.start
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
; CHECK-NEXT: cmpq %rax, %rsi
; CHECK-NEXT: movq %r8, %rcx
; CHECK-NEXT: sbbq %rdx, %rcx
; CHECK-NEXT: movq %r8, %rcx
; CHECK-NEXT: cmovgeq %rdx, %rcx
; CHECK-NEXT: movq %rsi, %rbx
; CHECK-NEXT: cmovgeq %rax, %rbx
; CHECK-NEXT: lock cmpxchg16b (%rdi)
; CHECK-NEXT: jne LBB6_1
; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end
; CHECK-NEXT: movq %rax, _var(%rip)
; CHECK-NEXT: movq %rdx, _var+8(%rip)
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: retq
%val = atomicrmw min ptr %p, i128 %bits seq_cst
store i128 %val, ptr @var, align 16
ret void
}
define void @fetch_and_max(ptr %p, i128 %bits) {
; CHECK-LABEL: fetch_and_max:
; CHECK: ## %bb.0:
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset %rbx, -16
; CHECK-NEXT: movq %rdx, %r8
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: movq 8(%rdi), %rdx
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: LBB7_1: ## %atomicrmw.start
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
; CHECK-NEXT: cmpq %rax, %rsi
; CHECK-NEXT: movq %r8, %rcx
; CHECK-NEXT: sbbq %rdx, %rcx
; CHECK-NEXT: movq %r8, %rcx
; CHECK-NEXT: cmovlq %rdx, %rcx
; CHECK-NEXT: movq %rsi, %rbx
; CHECK-NEXT: cmovlq %rax, %rbx
; CHECK-NEXT: lock cmpxchg16b (%rdi)
; CHECK-NEXT: jne LBB7_1
; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end
; CHECK-NEXT: movq %rax, _var(%rip)
; CHECK-NEXT: movq %rdx, _var+8(%rip)
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: retq
%val = atomicrmw max ptr %p, i128 %bits seq_cst
store i128 %val, ptr @var, align 16
ret void
}
define void @fetch_and_umin(ptr %p, i128 %bits) {
; CHECK-LABEL: fetch_and_umin:
; CHECK: ## %bb.0:
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset %rbx, -16
; CHECK-NEXT: movq %rdx, %r8
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: movq 8(%rdi), %rdx
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: LBB8_1: ## %atomicrmw.start
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
; CHECK-NEXT: cmpq %rax, %rsi
; CHECK-NEXT: movq %r8, %rcx
; CHECK-NEXT: sbbq %rdx, %rcx
; CHECK-NEXT: movq %r8, %rcx
; CHECK-NEXT: cmovaeq %rdx, %rcx
; CHECK-NEXT: movq %rsi, %rbx
; CHECK-NEXT: cmovaeq %rax, %rbx
; CHECK-NEXT: lock cmpxchg16b (%rdi)
; CHECK-NEXT: jne LBB8_1
; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end
; CHECK-NEXT: movq %rax, _var(%rip)
; CHECK-NEXT: movq %rdx, _var+8(%rip)
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: retq
%val = atomicrmw umin ptr %p, i128 %bits seq_cst
store i128 %val, ptr @var, align 16
ret void
}
define void @fetch_and_umax(ptr %p, i128 %bits) {
; CHECK-LABEL: fetch_and_umax:
; CHECK: ## %bb.0:
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset %rbx, -16
; CHECK-NEXT: movq %rdx, %r8
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: movq 8(%rdi), %rdx
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: LBB9_1: ## %atomicrmw.start
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
; CHECK-NEXT: cmpq %rax, %rsi
; CHECK-NEXT: movq %r8, %rcx
; CHECK-NEXT: sbbq %rdx, %rcx
; CHECK-NEXT: movq %r8, %rcx
; CHECK-NEXT: cmovbq %rdx, %rcx
; CHECK-NEXT: movq %rsi, %rbx
; CHECK-NEXT: cmovbq %rax, %rbx
; CHECK-NEXT: lock cmpxchg16b (%rdi)
; CHECK-NEXT: jne LBB9_1
; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end
; CHECK-NEXT: movq %rax, _var(%rip)
; CHECK-NEXT: movq %rdx, _var+8(%rip)
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: retq
%val = atomicrmw umax ptr %p, i128 %bits seq_cst
store i128 %val, ptr @var, align 16
ret void
}
define i128 @atomic_load_seq_cst(ptr %p) {
; CHECK-NOAVX-LABEL: atomic_load_seq_cst:
; CHECK-NOAVX: ## %bb.0:
; CHECK-NOAVX-NEXT: pushq %rbx
; CHECK-NOAVX-NEXT: .cfi_def_cfa_offset 16
; CHECK-NOAVX-NEXT: .cfi_offset %rbx, -16
; CHECK-NOAVX-NEXT: xorl %eax, %eax
; CHECK-NOAVX-NEXT: xorl %edx, %edx
; CHECK-NOAVX-NEXT: xorl %ecx, %ecx
; CHECK-NOAVX-NEXT: xorl %ebx, %ebx
; CHECK-NOAVX-NEXT: lock cmpxchg16b (%rdi)
; CHECK-NOAVX-NEXT: popq %rbx
; CHECK-NOAVX-NEXT: retq
;
; CHECK-AVX-LABEL: atomic_load_seq_cst:
; CHECK-AVX: ## %bb.0:
; CHECK-AVX-NEXT: vmovdqa (%rdi), %xmm0
; CHECK-AVX-NEXT: vmovq %xmm0, %rax
; CHECK-AVX-NEXT: vpextrq $1, %xmm0, %rdx
; CHECK-AVX-NEXT: retq
%r = load atomic i128, ptr %p seq_cst, align 16
ret i128 %r
}
define i128 @atomic_load_relaxed(ptr %p) {
; CHECK-NOAVX-LABEL: atomic_load_relaxed:
; CHECK-NOAVX: ## %bb.0:
; CHECK-NOAVX-NEXT: pushq %rbx
; CHECK-NOAVX-NEXT: .cfi_def_cfa_offset 16
; CHECK-NOAVX-NEXT: .cfi_offset %rbx, -16
; CHECK-NOAVX-NEXT: xorl %eax, %eax
; CHECK-NOAVX-NEXT: xorl %edx, %edx
; CHECK-NOAVX-NEXT: xorl %ecx, %ecx
; CHECK-NOAVX-NEXT: xorl %ebx, %ebx
; CHECK-NOAVX-NEXT: lock cmpxchg16b (%rdi)
; CHECK-NOAVX-NEXT: popq %rbx
; CHECK-NOAVX-NEXT: retq
;
; CHECK-AVX-LABEL: atomic_load_relaxed:
; CHECK-AVX: ## %bb.0:
; CHECK-AVX-NEXT: vmovdqa (%rdi), %xmm0
; CHECK-AVX-NEXT: vmovq %xmm0, %rax
; CHECK-AVX-NEXT: vpextrq $1, %xmm0, %rdx
; CHECK-AVX-NEXT: retq
%r = load atomic i128, ptr %p monotonic, align 16
ret i128 %r
}
define void @atomic_store_seq_cst(ptr %p, i128 %in) {
; CHECK-NOAVX-LABEL: atomic_store_seq_cst:
; CHECK-NOAVX: ## %bb.0:
; CHECK-NOAVX-NEXT: pushq %rbx
; CHECK-NOAVX-NEXT: .cfi_def_cfa_offset 16
; CHECK-NOAVX-NEXT: .cfi_offset %rbx, -16
; CHECK-NOAVX-NEXT: movq %rdx, %rcx
; CHECK-NOAVX-NEXT: movq %rsi, %rbx
; CHECK-NOAVX-NEXT: movq (%rdi), %rax
; CHECK-NOAVX-NEXT: movq 8(%rdi), %rdx
; CHECK-NOAVX-NEXT: .p2align 4
; CHECK-NOAVX-NEXT: LBB12_1: ## %atomicrmw.start
; CHECK-NOAVX-NEXT: ## =>This Inner Loop Header: Depth=1
; CHECK-NOAVX-NEXT: lock cmpxchg16b (%rdi)
; CHECK-NOAVX-NEXT: jne LBB12_1
; CHECK-NOAVX-NEXT: ## %bb.2: ## %atomicrmw.end
; CHECK-NOAVX-NEXT: popq %rbx
; CHECK-NOAVX-NEXT: retq
;
; CHECK-AVX-LABEL: atomic_store_seq_cst:
; CHECK-AVX: ## %bb.0:
; CHECK-AVX-NEXT: vmovq %rdx, %xmm0
; CHECK-AVX-NEXT: vmovq %rsi, %xmm1
; CHECK-AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; CHECK-AVX-NEXT: vmovdqa %xmm0, (%rdi)
; CHECK-AVX-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
; CHECK-AVX-NEXT: retq
store atomic i128 %in, ptr %p seq_cst, align 16
ret void
}
define void @atomic_store_release(ptr %p, i128 %in) {
; CHECK-NOAVX-LABEL: atomic_store_release:
; CHECK-NOAVX: ## %bb.0:
; CHECK-NOAVX-NEXT: pushq %rbx
; CHECK-NOAVX-NEXT: .cfi_def_cfa_offset 16
; CHECK-NOAVX-NEXT: .cfi_offset %rbx, -16
; CHECK-NOAVX-NEXT: movq %rdx, %rcx
; CHECK-NOAVX-NEXT: movq %rsi, %rbx
; CHECK-NOAVX-NEXT: movq (%rdi), %rax
; CHECK-NOAVX-NEXT: movq 8(%rdi), %rdx
; CHECK-NOAVX-NEXT: .p2align 4
; CHECK-NOAVX-NEXT: LBB13_1: ## %atomicrmw.start
; CHECK-NOAVX-NEXT: ## =>This Inner Loop Header: Depth=1
; CHECK-NOAVX-NEXT: lock cmpxchg16b (%rdi)
; CHECK-NOAVX-NEXT: jne LBB13_1
; CHECK-NOAVX-NEXT: ## %bb.2: ## %atomicrmw.end
; CHECK-NOAVX-NEXT: popq %rbx
; CHECK-NOAVX-NEXT: retq
;
; CHECK-AVX-LABEL: atomic_store_release:
; CHECK-AVX: ## %bb.0:
; CHECK-AVX-NEXT: vmovq %rdx, %xmm0
; CHECK-AVX-NEXT: vmovq %rsi, %xmm1
; CHECK-AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; CHECK-AVX-NEXT: vmovdqa %xmm0, (%rdi)
; CHECK-AVX-NEXT: retq
store atomic i128 %in, ptr %p release, align 16
ret void
}
define void @atomic_store_relaxed(ptr %p, i128 %in) {
; CHECK-NOAVX-LABEL: atomic_store_relaxed:
; CHECK-NOAVX: ## %bb.0:
; CHECK-NOAVX-NEXT: pushq %rbx
; CHECK-NOAVX-NEXT: .cfi_def_cfa_offset 16
; CHECK-NOAVX-NEXT: .cfi_offset %rbx, -16
; CHECK-NOAVX-NEXT: movq %rdx, %rcx
; CHECK-NOAVX-NEXT: movq %rsi, %rbx
; CHECK-NOAVX-NEXT: movq (%rdi), %rax
; CHECK-NOAVX-NEXT: movq 8(%rdi), %rdx
; CHECK-NOAVX-NEXT: .p2align 4
; CHECK-NOAVX-NEXT: LBB14_1: ## %atomicrmw.start
; CHECK-NOAVX-NEXT: ## =>This Inner Loop Header: Depth=1
; CHECK-NOAVX-NEXT: lock cmpxchg16b (%rdi)
; CHECK-NOAVX-NEXT: jne LBB14_1
; CHECK-NOAVX-NEXT: ## %bb.2: ## %atomicrmw.end
; CHECK-NOAVX-NEXT: popq %rbx
; CHECK-NOAVX-NEXT: retq
;
; CHECK-AVX-LABEL: atomic_store_relaxed:
; CHECK-AVX: ## %bb.0:
; CHECK-AVX-NEXT: vmovq %rdx, %xmm0
; CHECK-AVX-NEXT: vmovq %rsi, %xmm1
; CHECK-AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; CHECK-AVX-NEXT: vmovdqa %xmm0, (%rdi)
; CHECK-AVX-NEXT: retq
store atomic i128 %in, ptr %p unordered, align 16
ret void
}