; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s -o - | FileCheck %s
; RUN: llc -mattr=+alu-lsl-fast --aarch64-enable-sink-fold=false < %s -o - | FileCheck %s -check-prefix=LSLFAST
target triple = "aarch64-linux"
declare void @g(...)
; Check that ADDWrs/ADDXrs with shift > 4 is considered relatively
; slow, thus CSE-d.
define void @f0(i1 %c0, i1 %c1, ptr %a, i64 %i) {
; CHECK-LABEL: f0:
; CHECK: // %bb.0: // %E
; CHECK-NEXT: tbz w0, #0, .LBB0_5
; CHECK-NEXT: // %bb.1: // %A
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: add x0, x2, x3, lsl #5
; CHECK-NEXT: tbz w1, #0, .LBB0_3
; CHECK-NEXT: // %bb.2: // %B
; CHECK-NEXT: bl g
; CHECK-NEXT: b .LBB0_4
; CHECK-NEXT: .LBB0_3: // %C
; CHECK-NEXT: mov x1, x0
; CHECK-NEXT: bl g
; CHECK-NEXT: .LBB0_4:
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .LBB0_5: // %X
; CHECK-NEXT: ret
;
; LSLFAST-LABEL: f0:
; LSLFAST: // %bb.0: // %E
; LSLFAST-NEXT: tbz w0, #0, .LBB0_5
; LSLFAST-NEXT: // %bb.1: // %A
; LSLFAST-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; LSLFAST-NEXT: .cfi_def_cfa_offset 16
; LSLFAST-NEXT: .cfi_offset w30, -16
; LSLFAST-NEXT: add x0, x2, x3, lsl #5
; LSLFAST-NEXT: tbz w1, #0, .LBB0_3
; LSLFAST-NEXT: // %bb.2: // %B
; LSLFAST-NEXT: bl g
; LSLFAST-NEXT: b .LBB0_4
; LSLFAST-NEXT: .LBB0_3: // %C
; LSLFAST-NEXT: mov x1, x0
; LSLFAST-NEXT: bl g
; LSLFAST-NEXT: .LBB0_4:
; LSLFAST-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; LSLFAST-NEXT: .LBB0_5: // %X
; LSLFAST-NEXT: ret
E:
%p0 = getelementptr {i64, i64, i64, i64}, ptr %a, i64 %i
br i1 %c0, label %A, label %X
A:
br i1 %c1, label %B, label %C
B:
call void @g(ptr %p0)
br label %X
C:
%p1 = getelementptr {i64, i64, i64, i64}, ptr %a, i64 %i
call void @g(ptr %p1, ptr %p0)
br label %X
X:
ret void
}
; Check that ADDWrs/ADDXrs with shift <= 4 is considered relatively fast on sub-targets
; with feature +alu-lsl-fast, thus *not* CSE-d.
define void @f1(i1 %c0, i1 %c1, ptr %a, i64 %i) {
; CHECK-LABEL: f1:
; CHECK: // %bb.0: // %E
; CHECK-NEXT: tbz w0, #0, .LBB1_5
; CHECK-NEXT: // %bb.1: // %A
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: add x0, x2, x3, lsl #4
; CHECK-NEXT: tbz w1, #0, .LBB1_3
; CHECK-NEXT: // %bb.2: // %B
; CHECK-NEXT: bl g
; CHECK-NEXT: b .LBB1_4
; CHECK-NEXT: .LBB1_3: // %C
; CHECK-NEXT: mov x1, x0
; CHECK-NEXT: bl g
; CHECK-NEXT: .LBB1_4:
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .LBB1_5: // %X
; CHECK-NEXT: ret
;
; LSLFAST-LABEL: f1:
; LSLFAST: // %bb.0: // %E
; LSLFAST-NEXT: tbz w0, #0, .LBB1_5
; LSLFAST-NEXT: // %bb.1: // %A
; LSLFAST-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; LSLFAST-NEXT: .cfi_def_cfa_offset 16
; LSLFAST-NEXT: .cfi_offset w30, -16
; LSLFAST-NEXT: add x8, x2, x3, lsl #4
; LSLFAST-NEXT: tbz w1, #0, .LBB1_3
; LSLFAST-NEXT: // %bb.2: // %B
; LSLFAST-NEXT: mov x0, x8
; LSLFAST-NEXT: bl g
; LSLFAST-NEXT: b .LBB1_4
; LSLFAST-NEXT: .LBB1_3: // %C
; LSLFAST-NEXT: add x0, x2, x3, lsl #4
; LSLFAST-NEXT: mov x1, x8
; LSLFAST-NEXT: bl g
; LSLFAST-NEXT: .LBB1_4:
; LSLFAST-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; LSLFAST-NEXT: .LBB1_5: // %X
; LSLFAST-NEXT: ret
E:
%p0 = getelementptr {i64, i64}, ptr %a, i64 %i
br i1 %c0, label %A, label %X
A:
br i1 %c1, label %B, label %C
B:
call void @g(ptr %p0)
br label %X
C:
%p1 = getelementptr {i64, i64}, ptr %a, i64 %i
call void @g(ptr %p1, ptr %p0)
br label %X
X:
ret void
}