amdgpu-simplify-libcall-trunc.ll | Explore in Territory

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s

target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"

declare float @_Z5truncf(float)
declare <2 x float> @_Z5truncDv2_f(<2 x float>)
declare <3 x float> @_Z5truncDv3_f(<3 x float>)
declare <4 x float> @_Z5truncDv4_f(<4 x float>)
declare <8 x float> @_Z5truncDv8_f(<8 x float>)
declare <16 x float> @_Z5truncDv16_f(<16 x float>)

declare double @_Z5truncd(double)
declare <2 x double> @_Z5truncDv2_d(<2 x double>)
declare <3 x double> @_Z5truncDv3_d(<3 x double>)
declare <4 x double> @_Z5truncDv4_d(<4 x double>)
declare <8 x double> @_Z5truncDv8_d(<8 x double>)
declare <16 x double> @_Z5truncDv16_d(<16 x double>)

declare half @_Z5truncDh(half)
declare <2 x half> @_Z5truncDv2_Dh(<2 x half>)
declare <3 x half> @_Z5truncDv3_Dh(<3 x half>)
declare <4 x half> @_Z5truncDv4_Dh(<4 x half>)
declare <8 x half> @_Z5truncDv8_Dh(<8 x half>)
declare <16 x half> @_Z5truncDv16_Dh(<16 x half>)

define float @test_rint_f32(float %arg) {
; CHECK-LABEL: define float @test_rint_f32
; CHECK-SAME: (float [[ARG:%.*]]) {
; CHECK-NEXT:    [[RINT:%.*]] = tail call float @llvm.trunc.f32(float [[ARG]])
; CHECK-NEXT:    ret float [[RINT]]
;
  %rint = tail call float @_Z5truncf(float %arg)
  ret float %rint
}

define <2 x float> @test_rint_v2f32(<2 x float> %arg) {
; CHECK-LABEL: define <2 x float> @test_rint_v2f32
; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x float> @llvm.trunc.v2f32(<2 x float> [[ARG]])
; CHECK-NEXT:    ret <2 x float> [[RINT]]
;
  %rint = tail call <2 x float> @_Z5truncDv2_f(<2 x float> %arg)
  ret <2 x float> %rint
}

define <3 x float> @test_rint_v3f32(<3 x float> %arg) {
; CHECK-LABEL: define <3 x float> @test_rint_v3f32
; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
; CHECK-NEXT:    [[RINT:%.*]] = tail call <3 x float> @llvm.trunc.v3f32(<3 x float> [[ARG]])
; CHECK-NEXT:    ret <3 x float> [[RINT]]
;
  %rint = tail call <3 x float> @_Z5truncDv3_f(<3 x float> %arg)
  ret <3 x float> %rint
}

define <4 x float> @test_rint_v4f32(<4 x float> %arg) {
; CHECK-LABEL: define <4 x float> @test_rint_v4f32
; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
; CHECK-NEXT:    [[RINT:%.*]] = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> [[ARG]])
; CHECK-NEXT:    ret <4 x float> [[RINT]]
;
  %rint = tail call <4 x float> @_Z5truncDv4_f(<4 x float> %arg)
  ret <4 x float> %rint
}

define <8 x float> @test_rint_v8f32(<8 x float> %arg) {
; CHECK-LABEL: define <8 x float> @test_rint_v8f32
; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
; CHECK-NEXT:    [[RINT:%.*]] = tail call <8 x float> @llvm.trunc.v8f32(<8 x float> [[ARG]])
; CHECK-NEXT:    ret <8 x float> [[RINT]]
;
  %rint = tail call <8 x float> @_Z5truncDv8_f(<8 x float> %arg)
  ret <8 x float> %rint
}

define <16 x float> @test_rint_v16f32(<16 x float> %arg) {
; CHECK-LABEL: define <16 x float> @test_rint_v16f32
; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
; CHECK-NEXT:    [[RINT:%.*]] = tail call <16 x float> @llvm.trunc.v16f32(<16 x float> [[ARG]])
; CHECK-NEXT:    ret <16 x float> [[RINT]]
;
  %rint = tail call <16 x float> @_Z5truncDv16_f(<16 x float> %arg)
  ret <16 x float> %rint
}

define double @test_rint_f64(double %arg) {
; CHECK-LABEL: define double @test_rint_f64
; CHECK-SAME: (double [[ARG:%.*]]) {
; CHECK-NEXT:    [[RINT:%.*]] = tail call double @llvm.trunc.f64(double [[ARG]])
; CHECK-NEXT:    ret double [[RINT]]
;
  %rint = tail call double @_Z5truncd(double %arg)
  ret double %rint
}

define <2 x double> @test_rint_v2f64(<2 x double> %arg) {
; CHECK-LABEL: define <2 x double> @test_rint_v2f64
; CHECK-SAME: (<2 x double> [[ARG:%.*]]) {
; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x double> @llvm.trunc.v2f64(<2 x double> [[ARG]])
; CHECK-NEXT:    ret <2 x double> [[RINT]]
;
  %rint = tail call <2 x double> @_Z5truncDv2_d(<2 x double> %arg)
  ret <2 x double> %rint
}

define <3 x double> @test_rint_v3f64(<3 x double> %arg) {
; CHECK-LABEL: define <3 x double> @test_rint_v3f64
; CHECK-SAME: (<3 x double> [[ARG:%.*]]) {
; CHECK-NEXT:    [[RINT:%.*]] = tail call <3 x double> @llvm.trunc.v3f64(<3 x double> [[ARG]])
; CHECK-NEXT:    ret <3 x double> [[RINT]]
;
  %rint = tail call <3 x double> @_Z5truncDv3_d(<3 x double> %arg)
  ret <3 x double> %rint
}

define <4 x double> @test_rint_v4f64(<4 x double> %arg) {
; CHECK-LABEL: define <4 x double> @test_rint_v4f64
; CHECK-SAME: (<4 x double> [[ARG:%.*]]) {
; CHECK-NEXT:    [[RINT:%.*]] = tail call <4 x double> @llvm.trunc.v4f64(<4 x double> [[ARG]])
; CHECK-NEXT:    ret <4 x double> [[RINT]]
;
  %rint = tail call <4 x double> @_Z5truncDv4_d(<4 x double> %arg)
  ret <4 x double> %rint
}

define <8 x double> @test_rint_v8f64(<8 x double> %arg) {
; CHECK-LABEL: define <8 x double> @test_rint_v8f64
; CHECK-SAME: (<8 x double> [[ARG:%.*]]) {
; CHECK-NEXT:    [[RINT:%.*]] = tail call <8 x double> @llvm.trunc.v8f64(<8 x double> [[ARG]])
; CHECK-NEXT:    ret <8 x double> [[RINT]]
;
  %rint = tail call <8 x double> @_Z5truncDv8_d(<8 x double> %arg)
  ret <8 x double> %rint
}

define <16 x double> @test_rint_v16f64(<16 x double> %arg) {
; CHECK-LABEL: define <16 x double> @test_rint_v16f64
; CHECK-SAME: (<16 x double> [[ARG:%.*]]) {
; CHECK-NEXT:    [[RINT:%.*]] = tail call <16 x double> @llvm.trunc.v16f64(<16 x double> [[ARG]])
; CHECK-NEXT:    ret <16 x double> [[RINT]]
;
  %rint = tail call <16 x double> @_Z5truncDv16_d(<16 x double> %arg)
  ret <16 x double> %rint
}

define half @test_rint_f16(half %arg) {
; CHECK-LABEL: define half @test_rint_f16
; CHECK-SAME: (half [[ARG:%.*]]) {
; CHECK-NEXT:    [[RINT:%.*]] = tail call half @llvm.trunc.f16(half [[ARG]])
; CHECK-NEXT:    ret half [[RINT]]
;
  %rint = tail call half @_Z5truncDh(half %arg)
  ret half %rint
}

define <2 x half> @test_rint_v2f16(<2 x half> %arg) {
; CHECK-LABEL: define <2 x half> @test_rint_v2f16
; CHECK-SAME: (<2 x half> [[ARG:%.*]]) {
; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x half> @llvm.trunc.v2f16(<2 x half> [[ARG]])
; CHECK-NEXT:    ret <2 x half> [[RINT]]
;
  %rint = tail call <2 x half> @_Z5truncDv2_Dh(<2 x half> %arg)
  ret <2 x half> %rint
}

define <3 x half> @test_rint_v3f16(<3 x half> %arg) {
; CHECK-LABEL: define <3 x half> @test_rint_v3f16
; CHECK-SAME: (<3 x half> [[ARG:%.*]]) {
; CHECK-NEXT:    [[RINT:%.*]] = tail call <3 x half> @llvm.trunc.v3f16(<3 x half> [[ARG]])
; CHECK-NEXT:    ret <3 x half> [[RINT]]
;
  %rint = tail call <3 x half> @_Z5truncDv3_Dh(<3 x half> %arg)
  ret <3 x half> %rint
}

define <4 x half> @test_rint_v4f16(<4 x half> %arg) {
; CHECK-LABEL: define <4 x half> @test_rint_v4f16
; CHECK-SAME: (<4 x half> [[ARG:%.*]]) {
; CHECK-NEXT:    [[RINT:%.*]] = tail call <4 x half> @llvm.trunc.v4f16(<4 x half> [[ARG]])
; CHECK-NEXT:    ret <4 x half> [[RINT]]
;
  %rint = tail call <4 x half> @_Z5truncDv4_Dh(<4 x half> %arg)
  ret <4 x half> %rint
}

define <8 x half> @test_rint_v8f16(<8 x half> %arg) {
; CHECK-LABEL: define <8 x half> @test_rint_v8f16
; CHECK-SAME: (<8 x half> [[ARG:%.*]]) {
; CHECK-NEXT:    [[RINT:%.*]] = tail call <8 x half> @llvm.trunc.v8f16(<8 x half> [[ARG]])
; CHECK-NEXT:    ret <8 x half> [[RINT]]
;
  %rint = tail call <8 x half> @_Z5truncDv8_Dh(<8 x half> %arg)
  ret <8 x half> %rint
}

define <16 x half> @test_rint_v16f16(<16 x half> %arg) {
; CHECK-LABEL: define <16 x half> @test_rint_v16f16
; CHECK-SAME: (<16 x half> [[ARG:%.*]]) {
; CHECK-NEXT:    [[RINT:%.*]] = tail call <16 x half> @llvm.trunc.v16f16(<16 x half> [[ARG]])
; CHECK-NEXT:    ret <16 x half> [[RINT]]
;
  %rint = tail call <16 x half> @_Z5truncDv16_Dh(<16 x half> %arg)
  ret <16 x half> %rint
}

define float @test_rint_f32_nobuiltin_callsite(float %arg) {
; CHECK-LABEL: define float @test_rint_f32_nobuiltin_callsite
; CHECK-SAME: (float [[ARG:%.*]]) {
; CHECK-NEXT:    [[RINT:%.*]] = tail call float @_Z5truncf(float [[ARG]]) #[[ATTR4:[0-9]+]]
; CHECK-NEXT:    ret float [[RINT]]
;
  %rint = tail call float @_Z5truncf(float %arg) #0
  ret float %rint
}

define <2 x float> @test_rint_v2f32_nobuiltin_callsite(<2 x float> %arg) {
; CHECK-LABEL: define <2 x float> @test_rint_v2f32_nobuiltin_callsite
; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x float> @_Z5truncDv2_f(<2 x float> [[ARG]]) #[[ATTR4]]
; CHECK-NEXT:    ret <2 x float> [[RINT]]
;
  %rint = tail call <2 x float> @_Z5truncDv2_f(<2 x float> %arg) #0
  ret <2 x float> %rint
}

; "no-builtins" should be ignored
define float @test_rint_f32_nobuiltins(float %arg) #1 {
; CHECK-LABEL: define float @test_rint_f32_nobuiltins
; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT:    [[RINT:%.*]] = tail call float @_Z5truncf(float [[ARG]]) #[[ATTR4]]
; CHECK-NEXT:    ret float [[RINT]]
;
  %rint = tail call float @_Z5truncf(float %arg) #0
  ret float %rint
}

define <2 x float> @test_rint_v2f32_nobuiltins(<2 x float> %arg) #1 {
; CHECK-LABEL: define <2 x float> @test_rint_v2f32_nobuiltins
; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[RINT:%.*]] = tail call <2 x float> @_Z5truncDv2_f(<2 x float> [[ARG]]) #[[ATTR4]]
; CHECK-NEXT:    ret <2 x float> [[RINT]]
;
  %rint = tail call <2 x float> @_Z5truncDv2_f(<2 x float> %arg) #0
  ret <2 x float> %rint
}

define float @test_rint_f32_preserve_flags(float %arg) {
; CHECK-LABEL: define float @test_rint_f32_preserve_flags
; CHECK-SAME: (float [[ARG:%.*]]) {
; CHECK-NEXT:    [[RINT:%.*]] = tail call nnan ninf float @llvm.trunc.f32(float [[ARG]])
; CHECK-NEXT:    ret float [[RINT]]
;
  %rint = tail call nnan ninf float @_Z5truncf(float %arg)
  ret float %rint
}

define <2 x float> @test_rint_v2f32_preserve_flags(<2 x float> %arg) {
; CHECK-LABEL: define <2 x float> @test_rint_v2f32_preserve_flags
; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
; CHECK-NEXT:    [[RINT:%.*]] = tail call nnan nsz contract <2 x float> @llvm.trunc.v2f32(<2 x float> [[ARG]])
; CHECK-NEXT:    ret <2 x float> [[RINT]]
;
  %rint = tail call contract nsz nnan <2 x float> @_Z5truncDv2_f(<2 x float> %arg)
  ret <2 x float> %rint
}

define float @test_rint_f32_preserve_flags_md(float %arg) {
; CHECK-LABEL: define float @test_rint_f32_preserve_flags_md
; CHECK-SAME: (float [[ARG:%.*]]) {
; CHECK-NEXT:    [[RINT:%.*]] = tail call nnan ninf float @llvm.trunc.f32(float [[ARG]]), !foo !0
; CHECK-NEXT:    ret float [[RINT]]
;
  %rint = tail call nnan ninf float @_Z5truncf(float %arg), !foo !0
  ret float %rint
}

define <2 x float> @test_rint_v2f32_preserve_flags_md(<2 x float> %arg) {
; CHECK-LABEL: define <2 x float> @test_rint_v2f32_preserve_flags_md
; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
; CHECK-NEXT:    [[RINT:%.*]] = tail call nnan nsz contract <2 x float> @llvm.trunc.v2f32(<2 x float> [[ARG]]), !foo !0
; CHECK-NEXT:    ret <2 x float> [[RINT]]
;
  %rint = tail call contract nsz nnan <2 x float> @_Z5truncDv2_f(<2 x float> %arg), !foo !0
  ret <2 x float> %rint
}

; Test the libm name, not a recognized opencl builtin.
declare float @rintf(float) #2
declare double @rint(double) #2

define float @test_libm_rint_f32(float %arg) {
; CHECK-LABEL: define float @test_libm_rint_f32
; CHECK-SAME: (float [[ARG:%.*]]) {
; CHECK-NEXT:    [[RINT:%.*]] = tail call float @rintf(float [[ARG]])
; CHECK-NEXT:    ret float [[RINT]]
;
  %rint = tail call float @rintf(float %arg)
  ret float %rint
}

define double @test_libm_rint_f64(double %arg) {
; CHECK-LABEL: define double @test_libm_rint_f64
; CHECK-SAME: (double [[ARG:%.*]]) {
; CHECK-NEXT:    [[RINT:%.*]] = tail call double @rint(double [[ARG]])
; CHECK-NEXT:    ret double [[RINT]]
;
  %rint = tail call double @rint(double %arg)
  ret double %rint
}

define float @test_rint_f32_strictfp(float %arg) #3 {
; CHECK-LABEL: define float @test_rint_f32_strictfp
; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
; CHECK-NEXT:    [[RINT:%.*]] = tail call nnan float @_Z5truncf(float [[ARG]]) #[[ATTR2]]
; CHECK-NEXT:    ret float [[RINT]]
;
  %rint = tail call nnan float @_Z5truncf(float %arg) #3
  ret float %rint
}

attributes #0 = { nobuiltin }
attributes #1 = { "no-builtins" }
attributes #2 = { nounwind memory(none) }
attributes #3 = { strictfp }

!0 = !{i32 1234}
llvm/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-trunc.ll