llvm/llvm/test/CodeGen/AArch64/sve2-intrinsics-luti.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+sve2,+lut,+bf16 | FileCheck %s

define <vscale x 16 x i8> @test_luti2_lane_i8(<vscale x 16 x i8> %table, <vscale x 16 x i8> %indices){
; CHECK-LABEL: test_luti2_lane_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    luti2 z0.b, { z0.b }, z1[0]
; CHECK-NEXT:    ret
   %res= tail call <vscale x 16 x i8> @llvm.aarch64.sve.luti2.lane.nxv16i8(<vscale x 16 x i8> %table, <vscale x 16 x i8> %indices, i32 0)
   ret  <vscale x 16 x i8>  %res
}

define <vscale x 8 x i16> @test_luti2_lane_i16(<vscale x 8 x i16> %table, <vscale x 16 x i8> %indices){
; CHECK-LABEL: test_luti2_lane_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    luti2 z0.h, { z0.h }, z1[0]
; CHECK-NEXT:    ret
   %res= tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti2.lane.nxv8i16(<vscale x 8 x i16> %table, <vscale x 16 x i8> %indices, i32 0)
   ret  <vscale x 8 x i16>  %res
}

define <vscale x 8 x half> @test_luti2_lane_f16(<vscale x 8 x half> %table, <vscale x 16 x i8> %indices){
; CHECK-LABEL: test_luti2_lane_f16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    luti2 z0.h, { z0.h }, z1[0]
; CHECK-NEXT:    ret
   %res= tail call <vscale x 8 x half> @llvm.aarch64.sve.luti2.lane.nxv8f16(<vscale x 8 x half> %table, <vscale x 16 x i8> %indices, i32 0)
   ret  <vscale x 8 x half>  %res
}

define <vscale x 8 x bfloat> @test_luti2_lane_bf16(<vscale x 8 x bfloat> %table, <vscale x 16 x i8> %indices){
; CHECK-LABEL: test_luti2_lane_bf16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    luti2 z0.h, { z0.h }, z1[0]
; CHECK-NEXT:    ret
   %res= tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.luti2.lane.nxv8bf16(<vscale x 8 x bfloat> %table, <vscale x 16 x i8> %indices, i32 0)
   ret  <vscale x 8 x bfloat>  %res
}

define <vscale x 16 x i8> @test_luti4_lane_i8(<vscale x 16 x i8> %table, <vscale x 16 x i8> %indices){
; CHECK-LABEL: test_luti4_lane_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    luti4 z0.b, { z0.b }, z1[0]
; CHECK-NEXT:    ret
   %res= tail call <vscale x 16 x i8> @llvm.aarch64.sve.luti4.lane.nxv16i8(<vscale x 16 x i8> %table, <vscale x 16 x i8> %indices, i32 0)
   ret  <vscale x 16 x i8>  %res
}

define <vscale x 8 x i16> @test_luti4_lane_i16(<vscale x 8 x i16> %table, <vscale x 16 x i8> %indices){
; CHECK-LABEL: test_luti4_lane_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    luti4 z0.h, { z0.h }, z1[0]
; CHECK-NEXT:    ret
   %res= tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti4.lane.nxv8i16(<vscale x 8 x i16> %table, <vscale x 16 x i8> %indices, i32 0)
   ret  <vscale x 8 x i16>  %res
}

define <vscale x 8 x half> @test_luti4_lane_f16(<vscale x 8 x half> %table, <vscale x 16 x i8> %indices){
; CHECK-LABEL: test_luti4_lane_f16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    luti4 z0.h, { z0.h }, z1[0]
; CHECK-NEXT:    ret
   %res= tail call <vscale x 8 x half> @llvm.aarch64.sve.luti4.lane.nxv8f16(<vscale x 8 x half> %table, <vscale x 16 x i8> %indices, i32 0)
   ret  <vscale x 8 x half>  %res
}

define <vscale x 8 x bfloat> @test_luti4_lane_bf16(<vscale x 8 x bfloat> %table, <vscale x 16 x i8> %indices){
; CHECK-LABEL: test_luti4_lane_bf16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    luti4 z0.h, { z0.h }, z1[0]
; CHECK-NEXT:    ret
   %res= tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.luti4.lane.nxv8bf16(<vscale x 8 x bfloat> %table, <vscale x 16 x i8> %indices, i32 0)
   ret  <vscale x 8 x bfloat>  %res
}

define <vscale x 8 x i16> @test_luti4_lane_i16_x2(<vscale x 8 x i16> %table, <vscale x 16 x i8> %indices){
; CHECK-LABEL: test_luti4_lane_i16_x2:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z2.d, z0.d
; CHECK-NEXT:    mov z3.d, z2.d
; CHECK-NEXT:    luti4 z0.h, { z2.h, z3.h }, z1[0]
; CHECK-NEXT:    ret
   %res= tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti4.lane.x2.nxv8i16(<vscale x 8 x i16> %table, <vscale x 8 x i16> %table, <vscale x 16 x i8> %indices, i32 0)
   ret  <vscale x 8 x i16>  %res
}

define <vscale x 8 x half> @test_luti4_lane_f16_x2(<vscale x 8 x half> %table, <vscale x 16 x i8> %indices){
; CHECK-LABEL: test_luti4_lane_f16_x2:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z2.d, z0.d
; CHECK-NEXT:    mov z3.d, z2.d
; CHECK-NEXT:    luti4 z0.h, { z2.h, z3.h }, z1[0]
; CHECK-NEXT:    ret
   %res= tail call <vscale x 8 x half> @llvm.aarch64.sve.luti4.lane.x2.nxv8f16(<vscale x 8 x half> %table, <vscale x 8 x half> %table, <vscale x 16 x i8> %indices, i32 0)
   ret  <vscale x 8 x half>  %res
}

define <vscale x 8 x bfloat> @test_luti4_lane_bf16_x2(<vscale x 8 x bfloat> %table, <vscale x 16 x i8> %indices){
; CHECK-LABEL: test_luti4_lane_bf16_x2:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov z2.d, z0.d
; CHECK-NEXT:    mov z3.d, z2.d
; CHECK-NEXT:    luti4 z0.h, { z2.h, z3.h }, z1[0]
; CHECK-NEXT:    ret
   %res= tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.luti4.lane.x2.nxv8bf16(<vscale x 8 x bfloat> %table, <vscale x 8 x bfloat> %table, <vscale x 16 x i8> %indices, i32 0)
   ret  <vscale x 8 x bfloat>  %res
}