llvm/llvm/test/CodeGen/AArch64/sshl_sat.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s

declare i16 @llvm.sshl.sat.i16(i16, i16)
declare <4 x i16> @llvm.sshl.sat.v4i16(<4 x i16>, <4 x i16>)

; fold (shlsat undef, x) -> 0
define i16 @combine_shl_undef(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: combine_shl_undef:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov w0, wzr
; CHECK-NEXT:    ret
  %tmp = call i16 @llvm.sshl.sat.i16(i16 undef, i16 %y)
  ret i16 %tmp
}

; fold (shlsat x, undef) -> undef
define i16 @combine_shl_by_undef(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: combine_shl_by_undef:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ret
  %tmp = call i16 @llvm.sshl.sat.i16(i16 %x, i16 undef)
  ret i16 %tmp
}

; fold (shlsat poison, x) -> 0
define i16 @combine_shl_poison(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: combine_shl_poison:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov w0, wzr
; CHECK-NEXT:    ret
  %tmp = call i16 @llvm.sshl.sat.i16(i16 poison, i16 %y)
  ret i16 %tmp
}

; fold (shlsat x, poison) -> undef
define i16 @combine_shl_by_poison(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: combine_shl_by_poison:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ret
  %tmp = call i16 @llvm.sshl.sat.i16(i16 %x, i16 poison)
  ret i16 %tmp
}

; fold (shlsat x, bitwidth) -> undef
define i16 @combine_shl_by_bitwidth(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: combine_shl_by_bitwidth:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ret
  %tmp = call i16 @llvm.sshl.sat.i16(i16 %x, i16 16)
  ret i16 %tmp
}

; fold (shlsat 0, x) -> 0
define i16 @combine_shl_zero(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: combine_shl_zero:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov w0, wzr
; CHECK-NEXT:    ret
  %tmp = call i16 @llvm.sshl.sat.i16(i16 0, i16 %y)
  ret i16 %tmp
}

; fold (shlsat x, 0) -> x
define i16 @combine_shlsat_by_zero(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: combine_shlsat_by_zero:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ret
  %tmp = call i16 @llvm.sshl.sat.i16(i16 %x, i16 0)
  ret i16 %tmp
}

; fold (shlsat c1, c2) -> c3
define i16 @combine_shlsat_constfold(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: combine_shlsat_constfold:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov w0, #32 // =0x20
; CHECK-NEXT:    ret
  %tmp = call i16 @llvm.sshl.sat.i16(i16 8, i16 2)
  ret i16 %tmp
}

; fold (shlsat c1, c2) -> sat max
define i16 @combine_shlsat_satmax(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: combine_shlsat_satmax:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov w0, #32767 // =0x7fff
; CHECK-NEXT:    ret
  %tmp = call i16 @llvm.sshl.sat.i16(i16 8, i16 15)
  ret i16 %tmp
}

; fold (shlsat c1, c2) -> sat min
define i16 @combine_shlsat_satmin(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: combine_shlsat_satmin:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov w0, #32768 // =0x8000
; CHECK-NEXT:    ret
  %tmp = call i16 @llvm.sshl.sat.i16(i16 -8, i16 15)
  ret i16 %tmp
}

declare void @sink4xi16(i16, i16, i16, i16)

; fold (shlsat c1, c2) -> c3 , c1/c2/c3 being vectors
define void @combine_shlsat_vector() nounwind {
; CHECK-LABEL: combine_shlsat_vector:
; CHECK:       // %bb.0:
; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT:    mov w0, #32 // =0x20
; CHECK-NEXT:    mov w1, #32767 // =0x7fff
; CHECK-NEXT:    mov w2, #65504 // =0xffe0
; CHECK-NEXT:    mov w3, #32768 // =0x8000
; CHECK-NEXT:    bl sink4xi16
; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT:    ret
  %tmp = call <4 x i16> @llvm.sshl.sat.v4i16(
                          <4 x i16><i16 8, i16 8, i16 -8, i16 -8>,
                          <4 x i16><i16 2, i16 15, i16 2, i16 15>)
  ; Pass elements as arguments in a call to get CHECK statements that verify
  ; the constant folding.
  %e0 = extractelement <4 x i16> %tmp, i16 0
  %e1 = extractelement <4 x i16> %tmp, i16 1
  %e2 = extractelement <4 x i16> %tmp, i16 2
  %e3 = extractelement <4 x i16> %tmp, i16 3
  call void @sink4xi16(i16 %e0, i16 %e1, i16 %e2, i16 %e3)
  ret void
}

; Fold shlsat -> shl, if known not to saturate.
define i16 @combine_shlsat_to_shl(i16 %x) nounwind {
; CHECK-LABEL: combine_shlsat_to_shl:
; CHECK:       // %bb.0:
; CHECK-NEXT:    and w0, w0, #0xfffffffc
; CHECK-NEXT:    ret
  %x2 = ashr i16 %x, 2
  %tmp = call i16 @llvm.sshl.sat.i16(i16 %x2, i16 2)
  ret i16 %tmp
}

; Do not fold shlsat -> shl.
define i16 @combine_shlsat_to_shl_no_fold(i16 %x) nounwind {
; CHECK-LABEL: combine_shlsat_to_shl_no_fold:
; CHECK:       // %bb.0:
; CHECK-NEXT:    sxth w8, w0
; CHECK-NEXT:    mov w9, #-65536 // =0xffff0000
; CHECK-NEXT:    mov w10, #-2147483648 // =0x80000000
; CHECK-NEXT:    ands w8, w9, w8, lsl #14
; CHECK-NEXT:    cinv w10, w10, ge
; CHECK-NEXT:    lsl w9, w8, #3
; CHECK-NEXT:    cmp w8, w9, asr #3
; CHECK-NEXT:    csel w8, w10, w9, ne
; CHECK-NEXT:    asr w0, w8, #16
; CHECK-NEXT:    ret
  %x2 = ashr i16 %x, 2
  %tmp = call i16 @llvm.sshl.sat.i16(i16 %x2, i16 3)
  ret i16 %tmp
}

; Fold shlsat -> shl, if known not to saturate.
define <4 x i16> @combine_shlsat_to_shl_vec(<4 x i8> %a) nounwind {
; CHECK-LABEL: combine_shlsat_to_shl_vec:
; CHECK:       // %bb.0:
; CHECK-NEXT:    shl v0.4h, v0.4h, #8
; CHECK-NEXT:    sshr v0.4h, v0.4h, #8
; CHECK-NEXT:    shl v0.4h, v0.4h, #7
; CHECK-NEXT:    ret
  %sext = sext <4 x i8> %a to <4 x i16>
  %tmp = call <4 x i16> @llvm.sshl.sat.v4i16(
                          <4 x i16> %sext,
                          <4 x i16> <i16 7, i16 7, i16 7, i16 7>)
  ret <4 x i16> %tmp
}