llvm/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-lasta-lastb.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=instcombine -S < %s | FileCheck --check-prefix OPT %s

target triple = "aarch64"

; Most of the testing is covered by the lastb cases, but here we ensure that
; lasta with a predicate having no active lanes is treated as an alias to
; extracting the first vector element.
define i8 @lasta_extractelement_0(<vscale x 16 x i8> %v) #0 {
; OPT-LABEL: @lasta_extractelement_0(
; OPT-NEXT:    [[E0:%.*]] = extractelement <vscale x 16 x i8> [[V:%.*]], i64 0
; OPT-NEXT:    ret i8 [[E0]]
;
  %e0 = tail call i8 @llvm.aarch64.sve.lasta.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %v)
  ret i8 %e0
}

; Most of the testing is covered by the lastb cases, but here we check the
; resulting extraction index is one more than the lastb case because lasta
; extracts the element after the last active.
define i8 @lasta_extractelement_8(<vscale x 16 x i8> %v) #0 {
; OPT-LABEL: @lasta_extractelement_8(
; OPT-NEXT:    [[E1:%.*]] = extractelement <vscale x 16 x i8> [[V:%.*]], i64 8
; OPT-NEXT:    ret i8 [[E1]]
;
  %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 8)
  %e1 = tail call i8 @llvm.aarch64.sve.lasta.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %v)
  ret i8 %e1
}

define i8 @lastb_extractelement_0(<vscale x 16 x i8> %v) #0 {
; OPT-LABEL: @lastb_extractelement_0(
; OPT-NEXT:    [[E0:%.*]] = extractelement <vscale x 16 x i8> [[V:%.*]], i64 0
; OPT-NEXT:    ret i8 [[E0]]
;
  %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 1)
  %e0 = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %v)
  ret i8 %e0
}

define i8 @lastb_extractelement_1(<vscale x 16 x i8> %v) #0 {
; OPT-LABEL: @lastb_extractelement_1(
; OPT-NEXT:    [[E1:%.*]] = extractelement <vscale x 16 x i8> [[V:%.*]], i64 1
; OPT-NEXT:    ret i8 [[E1]]
;
  %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 2)
  %e1 = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %v)
  ret i8 %e1
}

define i8 @lastb_extractelement_2(<vscale x 16 x i8> %v) #0 {
; OPT-LABEL: @lastb_extractelement_2(
; OPT-NEXT:    [[E2:%.*]] = extractelement <vscale x 16 x i8> [[V:%.*]], i64 2
; OPT-NEXT:    ret i8 [[E2]]
;
  %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 3)
  %e2 = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %v)
  ret i8 %e2
}

define i8 @lastb_extractelement_3(<vscale x 16 x i8> %v) #0 {
; OPT-LABEL: @lastb_extractelement_3(
; OPT-NEXT:    [[E3:%.*]] = extractelement <vscale x 16 x i8> [[V:%.*]], i64 3
; OPT-NEXT:    ret i8 [[E3]]
;
  %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 4)
  %e3 = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %v)
  ret i8 %e3
}

define i8 @lastb_extractelement_4(<vscale x 16 x i8> %v) #0 {
; OPT-LABEL: @lastb_extractelement_4(
; OPT-NEXT:    [[E4:%.*]] = extractelement <vscale x 16 x i8> [[V:%.*]], i64 4
; OPT-NEXT:    ret i8 [[E4]]
;
  %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 5)
  %e4 = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %v)
  ret i8 %e4
}

define i8 @lastb_extractelement_5(<vscale x 16 x i8> %v) #0 {
; OPT-LABEL: @lastb_extractelement_5(
; OPT-NEXT:    [[E5:%.*]] = extractelement <vscale x 16 x i8> [[V:%.*]], i64 5
; OPT-NEXT:    ret i8 [[E5]]
;
  %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 6)
  %e5 = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %v)
  ret i8 %e5
}

define i8 @lastb_extractelement_6(<vscale x 16 x i8> %v) #0 {
; OPT-LABEL: @lastb_extractelement_6(
; OPT-NEXT:    [[E6:%.*]] = extractelement <vscale x 16 x i8> [[V:%.*]], i64 6
; OPT-NEXT:    ret i8 [[E6]]
;
  %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 7)
  %e6 = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %v)
  ret i8 %e6
}

define i8 @lastb_extractelement_7(<vscale x 16 x i8> %v) #0 {
; OPT-LABEL: @lastb_extractelement_7(
; OPT-NEXT:    [[E7:%.*]] = extractelement <vscale x 16 x i8> [[V:%.*]], i64 7
; OPT-NEXT:    ret i8 [[E7]]
;
  %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 8)
  %e7 = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %v)
  ret i8 %e7
}

define i8 @lastb_extractelement_15(<vscale x 16 x i8> %v) #0 {
; OPT-LABEL: @lastb_extractelement_15(
; OPT-NEXT:    [[E15:%.*]] = extractelement <vscale x 16 x i8> [[V:%.*]], i64 15
; OPT-NEXT:    ret i8 [[E15]]
;
  %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 9)
  %e15 = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %v)
  ret i8 %e15
}

; No transformation because the requested element is beyond the range of the
; known minimum element count so we maintain the user's intentions.
define i8 @lastb_extractelement_31(<vscale x 16 x i8> %v) #0 {
; OPT-LABEL: @lastb_extractelement_31(
; OPT-NEXT:    [[PG:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 10)
; OPT-NEXT:    [[E31:%.*]] = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[V:%.*]])
; OPT-NEXT:    ret i8 [[E31]]
;
  %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 10)
  %e31 = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %v)
  ret i8 %e31
}

; No transformation because the ptrue's predicate pattern is bogus and thus
; nothing can be inferred about the result.
define i8 @lastb_extractelement_invalid_predicate_pattern(<vscale x 16 x i8> %v) #0 {
; OPT-LABEL: @lastb_extractelement_invalid_predicate_pattern(
; OPT-NEXT:    [[PG:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 15)
; OPT-NEXT:    [[E:%.*]] = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[V:%.*]])
; OPT-NEXT:    ret i8 [[E]]
;
  %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 15)
  %e = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %v)
  ret i8 %e
}

; Return the splatted value irrespective of the predicate.
define i8 @lasta_splat(<vscale x 16 x i1> %pg, i8 %a) #0 {
; OPT-LABEL: @lasta_splat(
; OPT-NEXT:    ret i8 [[A:%.*]]
;
  %splat_insert = insertelement <vscale x 16 x i8> poison, i8 %a, i32 0
  %splat = shufflevector <vscale x 16 x i8> %splat_insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
  %last = tail call i8 @llvm.aarch64.sve.lasta.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %splat)
  ret i8 %last
}

define i8 @lastb_splat(<vscale x 16 x i1> %pg, i8 %a) #0 {
; OPT-LABEL: @lastb_splat(
; OPT-NEXT:    ret i8 [[A:%.*]]
;
  %splat_insert = insertelement <vscale x 16 x i8> poison, i8 %a, i32 0
  %splat = shufflevector <vscale x 16 x i8> %splat_insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
  %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %splat)
  ret i8 %last
}

; Check that we move the lastb before the binary operation so that the new binary op is scalar.
define i8 @lastb_binop_RHS_splat_sdiv(<vscale x 16 x i1> %pg, i8 %scalar, <vscale x 16 x i8> %vector) #0 {
; OPT-LABEL: @lastb_binop_RHS_splat_sdiv(
; OPT-NEXT:    [[TMP1:%.*]] = call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[VECTOR:%.*]])
; OPT-NEXT:    [[BINOP1:%.*]] = sdiv i8 [[TMP1]], [[SCALAR:%.*]]
; OPT-NEXT:    ret i8 [[BINOP1]]
;
  %splat_insert = insertelement <vscale x 16 x i8> poison, i8 %scalar, i32 0
  %splat = shufflevector <vscale x 16 x i8> %splat_insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
  %binop = sdiv <vscale x 16 x i8> %vector, %splat
  %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %binop)
  ret i8 %last
}

define i8 @lastb_binop_RHS_splat_sdiv_exact(<vscale x 16 x i1> %pg, i8 %scalar, <vscale x 16 x i8> %vector) #0 {
; OPT-LABEL: @lastb_binop_RHS_splat_sdiv_exact(
; OPT-NEXT:    [[TMP1:%.*]] = call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[VECTOR:%.*]])
; OPT-NEXT:    [[BINOP1:%.*]] = sdiv exact i8 [[TMP1]], [[SCALAR:%.*]]
; OPT-NEXT:    ret i8 [[BINOP1]]
;
  %splat_insert = insertelement <vscale x 16 x i8> poison, i8 %scalar, i32 0
  %splat = shufflevector <vscale x 16 x i8> %splat_insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
  %binop = sdiv exact <vscale x 16 x i8> %vector, %splat
  %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %binop)
  ret i8 %last
}

define float @lastb_binop_RHS_splat_fdiv_float_fast(<vscale x 4 x i1> %pg, float %scalar, <vscale x 4 x float> %vector) #0 {
; OPT-LABEL: @lastb_binop_RHS_splat_fdiv_float_fast(
; OPT-NEXT:    [[TMP1:%.*]] = call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x float> [[VECTOR:%.*]])
; OPT-NEXT:    [[BINOP1:%.*]] = fdiv fast float [[TMP1]], [[SCALAR:%.*]]
; OPT-NEXT:    ret float [[BINOP1]]
;
  %splat_insert = insertelement <vscale x 4 x float> poison, float %scalar, i32 0
  %splat = shufflevector <vscale x 4 x float> %splat_insert, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
  %binop = fdiv fast <vscale x 4 x float> %vector, %splat
  %last = tail call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %binop)
  ret float %last
}

define float @lastb_binop_RHS_splat_fdiv_float(<vscale x 4 x i1> %pg, float %scalar, <vscale x 4 x float> %vector) #0 {
; OPT-LABEL: @lastb_binop_RHS_splat_fdiv_float(
; OPT-NEXT:    [[TMP1:%.*]] = call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x float> [[VECTOR:%.*]])
; OPT-NEXT:    [[BINOP1:%.*]] = fdiv float [[TMP1]], [[SCALAR:%.*]]
; OPT-NEXT:    ret float [[BINOP1]]
;
  %splat_insert = insertelement <vscale x 4 x float> poison, float %scalar, i32 0
  %splat = shufflevector <vscale x 4 x float> %splat_insert, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
  %binop = fdiv <vscale x 4 x float> %vector, %splat
  %last = tail call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %binop)
  ret float %last
}

define i8 @lastb_binop_LHS_splat_sdiv(<vscale x 16 x i1> %pg, i8 %scalar, <vscale x 16 x i8> %vector) #0 {
; OPT-LABEL: @lastb_binop_LHS_splat_sdiv(
; OPT-NEXT:    [[TMP1:%.*]] = call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[VECTOR:%.*]])
; OPT-NEXT:    [[BINOP1:%.*]] = sdiv i8 [[SCALAR:%.*]], [[TMP1]]
; OPT-NEXT:    ret i8 [[BINOP1]]
;
  %splat_insert = insertelement <vscale x 16 x i8> poison, i8 %scalar, i32 0
  %splat = shufflevector <vscale x 16 x i8> %splat_insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
  %binop = sdiv <vscale x 16 x i8> %splat, %vector
  %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %binop)
  ret i8 %last
}

define i8 @lastb_binop_LHS_splat_sdiv_exact(<vscale x 16 x i1> %pg, i8 %scalar, <vscale x 16 x i8> %vector) #0 {
; OPT-LABEL: @lastb_binop_LHS_splat_sdiv_exact(
; OPT-NEXT:    [[TMP1:%.*]] = call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[VECTOR:%.*]])
; OPT-NEXT:    [[BINOP1:%.*]] = sdiv exact i8 [[SCALAR:%.*]], [[TMP1]]
; OPT-NEXT:    ret i8 [[BINOP1]]
;
  %splat_insert = insertelement <vscale x 16 x i8> poison, i8 %scalar, i32 0
  %splat = shufflevector <vscale x 16 x i8> %splat_insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
  %binop = sdiv exact <vscale x 16 x i8> %splat, %vector
  %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %binop)
  ret i8 %last
}

define float @lastb_binop_LHS_splat_fdiv_float_fast(<vscale x 4 x i1> %pg, float %scalar, <vscale x 4 x float> %vector) #0 {
; OPT-LABEL: @lastb_binop_LHS_splat_fdiv_float_fast(
; OPT-NEXT:    [[TMP1:%.*]] = call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x float> [[VECTOR:%.*]])
; OPT-NEXT:    [[BINOP1:%.*]] = fdiv fast float [[SCALAR:%.*]], [[TMP1]]
; OPT-NEXT:    ret float [[BINOP1]]
;
  %splat_insert = insertelement <vscale x 4 x float> poison, float %scalar, i32 0
  %splat = shufflevector <vscale x 4 x float> %splat_insert, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
  %binop = fdiv fast <vscale x 4 x float> %splat, %vector
  %last = tail call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %binop)
  ret float %last
}

define float @lastb_binop_LHS_splat_fdiv_float(<vscale x 4 x i1> %pg, float %scalar, <vscale x 4 x float> %vector) #0 {
; OPT-LABEL: @lastb_binop_LHS_splat_fdiv_float(
; OPT-NEXT:    [[TMP1:%.*]] = call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x float> [[VECTOR:%.*]])
; OPT-NEXT:    [[BINOP1:%.*]] = fdiv float [[SCALAR:%.*]], [[TMP1]]
; OPT-NEXT:    ret float [[BINOP1]]
;
  %splat_insert = insertelement <vscale x 4 x float> poison, float %scalar, i32 0
  %splat = shufflevector <vscale x 4 x float> %splat_insert, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
  %binop = fdiv <vscale x 4 x float> %splat, %vector
  %last = tail call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %binop)
  ret float %last
}

define i8 @lastb_binop_LHS_RHS_splat_sdiv(<vscale x 16 x i1> %pg, i8 %scalar1, i8 %scalar2) #0 {
; OPT-LABEL: @lastb_binop_LHS_RHS_splat_sdiv(
; OPT-NEXT:    [[BINOP1:%.*]] = sdiv i8 [[SCALAR1:%.*]], [[SCALAR2:%.*]]
; OPT-NEXT:    ret i8 [[BINOP1]]
;
  %splat_insert = insertelement <vscale x 16 x i8> poison, i8 %scalar1, i8 0
  %splat = shufflevector <vscale x 16 x i8> %splat_insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
  %splat_insert2 = insertelement <vscale x 16 x i8> poison, i8 %scalar2, i8 0
  %splat2 = shufflevector <vscale x 16 x i8> %splat_insert2, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
  %binop = sdiv <vscale x 16 x i8> %splat, %splat2
  %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %binop)
  ret i8 %last
}

; Check that we don't do anything as the binary op has multiple uses.
define i8 @lastb_binop_nochange(<vscale x 16 x i1> %pg, i8 %scalar, <vscale x 16 x i8> %vector) #0 {
; OPT-LABEL: @lastb_binop_nochange(
; OPT-NEXT:    [[SPLAT_INSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[SCALAR:%.*]], i64 0
; OPT-NEXT:    [[SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_INSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
; OPT-NEXT:    [[BINOP:%.*]] = sdiv <vscale x 16 x i8> [[VECTOR:%.*]], [[SPLAT]]
; OPT-NEXT:    [[LAST:%.*]] = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[BINOP]])
; OPT-NEXT:    call void @use(<vscale x 16 x i8> [[BINOP]])
; OPT-NEXT:    ret i8 [[LAST]]
;
  %splat_insert = insertelement <vscale x 16 x i8> poison, i8 %scalar, i32 0
  %splat = shufflevector <vscale x 16 x i8> %splat_insert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
  %binop = sdiv <vscale x 16 x i8> %vector, %splat
  %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %binop)
  call void @use(<vscale x 16 x i8> %binop)
  ret i8 %last
}

declare void @use(<vscale x 16 x i8>)
declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
declare i8 @llvm.aarch64.sve.lasta.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
declare i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
declare float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)

attributes #0 = { "target-features"="+sve" }