; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon,+lut,+bf16 | FileCheck %s
define <16 x i8> @test_luti2_lane_i8(<8 x i8> %vn, <8 x i8> %vm){
; CHECK-LABEL: test_luti2_lane_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: luti2 v0.16b, { v0.16b }, v1[0]
; CHECK-NEXT: ret
%res= tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8(<8 x i8> %vn, <8 x i8> %vm, i32 0)
ret <16 x i8> %res
}
define <16 x i8> @test_luti2_laneq_i8(<8 x i8> %vn, <16 x i8> %vm){
; CHECK-LABEL: test_luti2_laneq_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: luti2 v0.16b, { v0.16b }, v1[0]
; CHECK-NEXT: ret
%res= tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v8i8(<8 x i8> %vn, <16 x i8> %vm, i32 0)
ret <16 x i8> %res
}
define <16 x i8> @test_luti2q_lane_i8(<16 x i8> %vn, <8 x i8> %vm){
; CHECK-LABEL: test_luti2q_lane_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: luti2 v0.16b, { v0.16b }, v1[0]
; CHECK-NEXT: ret
%res= tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8(<16 x i8> %vn, <8 x i8> %vm, i32 0)
ret <16 x i8> %res
}
define <16 x i8> @test_luti2q_laneq_i8(<16 x i8> %vn, <16 x i8> %vm){
; CHECK-LABEL: test_luti2q_laneq_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: luti2 v0.16b, { v0.16b }, v1[0]
; CHECK-NEXT: ret
%res= tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v16i8(<16 x i8> %vn, <16 x i8> %vm, i32 0)
ret <16 x i8> %res
}
define <8 x i16> @test_luti2_lane_i16(<4 x i16> %vn, <8 x i8> %vm){
; CHECK-LABEL: test_luti2_lane_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0]
; CHECK-NEXT: ret
%res= tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v4i16(<4 x i16> %vn, <8 x i8> %vm, i32 0)
ret <8 x i16> %res
}
define <8 x i16> @test_luti2_laneq_i16(<4 x i16> %vn, <16 x i8> %vm){
; CHECK-LABEL: test_luti2_laneq_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0]
; CHECK-NEXT: ret
%res= tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v4i16(<4 x i16> %vn, <16 x i8> %vm, i32 0)
ret <8 x i16> %res
}
define <8 x i16> @test_luti2q_lane_i16(<4 x i16> %vn, <8 x i8> %vm){
; CHECK-LABEL: test_luti2q_lane_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0]
; CHECK-NEXT: ret
%res= tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v8i16(<4 x i16> %vn, <8 x i8> %vm, i32 0)
ret <8 x i16> %res
}
define <8 x i16> @test_luti2q_laneq_i16(<8 x i16> %vn, <16 x i8> %vm){
; CHECK-LABEL: test_luti2q_laneq_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0]
; CHECK-NEXT: ret
%res= tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v8i16(<8 x i16> %vn, <16 x i8> %vm, i32 0)
ret <8 x i16> %res
}
define <8 x half> @test_luti2_lane_f16(<4 x half> %vn, <8 x i8> %vm){
; CHECK-LABEL: test_luti2_lane_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0]
; CHECK-NEXT: ret
%res= tail call <8 x half> @llvm.aarch64.neon.vluti2.lane.v8f16.v4f16(<4 x half> %vn, <8 x i8> %vm, i32 0)
ret <8 x half> %res
}
define <8 x half> @test_luti2_laneq_f16(<4 x half> %vn, <16 x i8> %vm){
; CHECK-LABEL: test_luti2_laneq_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0]
; CHECK-NEXT: ret
%res= tail call <8 x half> @llvm.aarch64.neon.vluti2.laneq.v8f16.v4i16(<4 x half> %vn, <16 x i8> %vm, i32 0)
ret <8 x half> %res
}
define <8 x half> @test_luti2q_lane_f16(<8 x half> %vn, <8 x i8> %vm){
; CHECK-LABEL: test_luti2q_lane_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0]
; CHECK-NEXT: ret
%res= tail call <8 x half> @llvm.aarch64.neon.vluti2.lane.v8f16.v8f16(<8 x half> %vn, <8 x i8> %vm, i32 0)
ret <8 x half> %res
}
define <8 x half> @test_luti2q_laneq_f16(<8 x half> %vn, <16 x i8> %vm){
; CHECK-LABEL: test_luti2q_laneq_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0]
; CHECK-NEXT: ret
%res= tail call <8 x half> @llvm.aarch64.neon.vluti2.laneq.v8f16.v8f16(<8 x half> %vn, <16 x i8> %vm, i32 0)
ret <8 x half> %res
}
define <8 x bfloat> @test_luti2_lane_bf16(<4 x bfloat> %vn, <8 x i8> %vm){
; CHECK-LABEL: test_luti2_lane_bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0]
; CHECK-NEXT: ret
%res= tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.lane.v8bf16.v4bf16(<4 x bfloat> %vn, <8 x i8> %vm, i32 0)
ret <8 x bfloat> %res
}
define <8 x bfloat> @test_luti2_laneq_bf16(<4 x bfloat> %vn, <16 x i8> %vm){
; CHECK-LABEL: test_luti2_laneq_bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0]
; CHECK-NEXT: ret
%res= tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.laneq.v8bf16.v4bf16(<4 x bfloat> %vn, <16 x i8> %vm, i32 0)
ret <8 x bfloat> %res
}
define <8 x bfloat> @test_luti2q_lane_bf16(<4 x bfloat> %vn, <8 x i8> %vm){
; CHECK-LABEL: test_luti2q_lane_bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0]
; CHECK-NEXT: ret
%res= tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.lane.v8bf16.v8bf16(<4 x bfloat> %vn, <8 x i8> %vm, i32 0)
ret <8 x bfloat> %res
}
define <8 x bfloat> @test_luti2q_laneq_bf16(<8 x bfloat> %vn, <16 x i8> %vm){
; CHECK-LABEL: test_luti2q_laneq_bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0]
; CHECK-NEXT: ret
%res= tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.laneq.v8bf16.v8bf16(<8 x bfloat> %vn, <16 x i8> %vm, i32 0)
ret <8 x bfloat> %res
}
define <16 x i8> @test_luti4q_lane_i8(<16 x i8> %vn, <8 x i8> %vm){
; CHECK-LABEL: test_luti4q_lane_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: luti4 v0.16b, { v0.16b }, v1[0]
; CHECK-NEXT: ret
%res= tail call <16 x i8> @llvm.aarch64.neon.vluti4q.lane.v16i8(<16 x i8> %vn, <8 x i8> %vm, i32 0)
ret <16 x i8> %res
}
define <16 x i8> @test_luti4q_laneq_i8(<16 x i8> %vn, <16 x i8> %vm){
; CHECK-LABEL: test_luti4q_laneq_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: luti4 v0.16b, { v0.16b }, v1[0]
; CHECK-NEXT: ret
%res= tail call <16 x i8> @llvm.aarch64.neon.vluti4q.laneq.v16i8(<16 x i8> %vn, <16 x i8> %vm, i32 0)
ret <16 x i8> %res
}
define <8 x i16> @test_luti4q_lane_x2_i16(<8 x i16> %vn1, <8 x i16> %vn2, <8 x i8> %vm){
; CHECK-LABEL: test_luti4q_lane_x2_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-NEXT: luti4 v0.8h, { v0.8h, v1.8h }, v2[1]
; CHECK-NEXT: ret
%res= tail call <8 x i16> @llvm.aarch64.neon.vluti4q.lane.x2.v8i16(<8 x i16> %vn1, <8 x i16> %vn2, <8 x i8> %vm, i32 1)
ret <8 x i16> %res
}
define <8 x i16> @test_luti4q_laneq_x2_i16(<8 x i16> %vn1, <8 x i16> %vn2, <16 x i8> %vm){
; CHECK-LABEL: test_luti4q_laneq_x2_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-NEXT: luti4 v0.8h, { v0.8h, v1.8h }, v2[1]
; CHECK-NEXT: ret
%res= tail call <8 x i16> @llvm.aarch64.neon.vluti4q.laneq.x2.v8i16(<8 x i16> %vn1, <8 x i16> %vn2, <16 x i8> %vm, i32 1)
ret <8 x i16> %res
}
define <8 x half> @test_luti4q_lane_x2_f16(<8 x half>%vn1, <8 x half> %vn2, <8 x i8> %vm){
; CHECK-LABEL: test_luti4q_lane_x2_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-NEXT: luti4 v0.8h, { v0.8h, v1.8h }, v2[1]
; CHECK-NEXT: ret
%res= tail call <8 x half> @llvm.aarch64.neon.vluti4q.lane.x2.v8f16(<8 x half> %vn1, <8 x half> %vn2, <8 x i8> %vm, i32 1)
ret <8 x half> %res
}
define <8 x half> @test_luti4q_laneq_x2_f16(<8 x half>%vn1, <8 x half> %vn2, <16 x i8> %vm){
; CHECK-LABEL: test_luti4q_laneq_x2_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-NEXT: luti4 v0.8h, { v0.8h, v1.8h }, v2[1]
; CHECK-NEXT: ret
%res= tail call <8 x half> @llvm.aarch64.neon.vluti4q.laneq.x2.v8f16(<8 x half> %vn1, <8 x half> %vn2, <16 x i8> %vm, i32 1)
ret <8 x half> %res
}
define <8 x bfloat> @test_luti4q_laneq_x2_bf16(<8 x bfloat>%vn1, <8 x bfloat> %vn2, <16 x i8> %vm){
; CHECK-LABEL: test_luti4q_laneq_x2_bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-NEXT: luti4 v0.8h, { v0.8h, v1.8h }, v2[1]
; CHECK-NEXT: ret
%res= tail call <8 x bfloat> @llvm.aarch64.neon.vluti4q.laneq.x2.v8bf16(<8 x bfloat> %vn1, <8 x bfloat> %vn2, <16 x i8> %vm, i32 1)
ret <8 x bfloat> %res
}
define <8 x bfloat> @test_luti4q_lane_x2_bf16(<8 x bfloat>%vn1, <8 x bfloat> %vn2, <8 x i8> %vm){
; CHECK-LABEL: test_luti4q_lane_x2_bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-NEXT: luti4 v0.8h, { v0.8h, v1.8h }, v2[1]
; CHECK-NEXT: ret
%res= tail call <8 x bfloat> @llvm.aarch64.neon.vluti4q.lane.x2.v8bf16(<8 x bfloat> %vn1, <8 x bfloat> %vn2, <8 x i8> %vm, i32 1)
ret <8 x bfloat> %res
}