; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
define amdgpu_cs float @v_s_exp_f32(float inreg %src) {
; GFX12-LABEL: v_s_exp_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_cmp_lt_f32 s0, 0xc2fc0000
; GFX12-NEXT: s_cselect_b32 s1, 0x42800000, 0
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
; GFX12-NEXT: s_add_f32 s0, s0, s1
; GFX12-NEXT: s_cselect_b32 s1, 0x1f800000, 1.0
; GFX12-NEXT: v_s_exp_f32 s0, s0
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
; GFX12-NEXT: s_mul_f32 s0, s0, s1
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: ; return to shader part epilog
%result = call float @llvm.exp2.f32(float %src)
ret float %result
}
define amdgpu_cs half @v_s_exp_f16(half inreg %src) {
; GFX12-LABEL: v_s_exp_f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_s_exp_f16 s0, s0
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: ; return to shader part epilog
%result = call half @llvm.exp2.f16(half %src)
ret half %result
}
define amdgpu_cs float @v_s_amdgcn_exp_f32(float inreg %src) {
; GFX12-LABEL: v_s_amdgcn_exp_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_s_exp_f32 s0, s0
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: ; return to shader part epilog
%result = call float @llvm.amdgcn.exp2.f32(float %src)
ret float %result
}
define amdgpu_cs half @v_s_amdgcn_exp_f16(half inreg %src) {
; GFX12-LABEL: v_s_amdgcn_exp_f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_s_exp_f16 s0, s0
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: ; return to shader part epilog
%result = call half @llvm.amdgcn.exp2.f16(half %src)
ret half %result
}
define amdgpu_cs float @v_s_log_f32(float inreg %src) {
; GFX12-LABEL: v_s_log_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_cmp_lt_f32 s0, 0x800000
; GFX12-NEXT: s_cselect_b32 s1, 0x4f800000, 1.0
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
; GFX12-NEXT: s_mul_f32 s0, s0, s1
; GFX12-NEXT: s_cselect_b32 s1, 0x42000000, 0
; GFX12-NEXT: v_s_log_f32 s0, s0
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
; GFX12-NEXT: s_sub_f32 s0, s0, s1
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: ; return to shader part epilog
%result = call float @llvm.log2.f32(float %src)
ret float %result
}
define amdgpu_cs half @v_s_log_f16(half inreg %src) {
; GFX12-LABEL: v_s_log_f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_s_log_f16 s0, s0
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: ; return to shader part epilog
%result = call half @llvm.log2.f16(half %src)
ret half %result
}
define amdgpu_cs float @v_s_amdgcn_log_f32(float inreg %src) {
; GFX12-LABEL: v_s_amdgcn_log_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_s_log_f32 s0, s0
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: ; return to shader part epilog
%result = call float @llvm.amdgcn.log.f32(float %src)
ret float %result
}
define amdgpu_cs half @v_s_amdgcn_log_f16(half inreg %src) {
; GFX12-LABEL: v_s_amdgcn_log_f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_s_log_f16 s0, s0
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: ; return to shader part epilog
%result = call half @llvm.amdgcn.log.f16(half %src)
ret half %result
}
define amdgpu_cs float @v_s_rcp_f32(float inreg %src) {
; GFX12-LABEL: v_s_rcp_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_s_rcp_f32 s0, s0
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: ; return to shader part epilog
%result = call fast float @llvm.amdgcn.rcp.f32(float %src)
ret float %result
}
define amdgpu_cs half @v_s_rcp_f16(half inreg %src) {
; GFX12-LABEL: v_s_rcp_f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_s_rcp_f16 s0, s0
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: ; return to shader part epilog
%result = call fast half @llvm.amdgcn.rcp.f16(half %src)
ret half %result
}
; TODO-GFX12: GlobalISel should generate v_s_rsq.
define amdgpu_cs float @v_s_rsq_f32(float inreg %src) {
; GFX12-SDAG-LABEL: v_s_rsq_f32:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: v_s_rsq_f32 s0, s0
; GFX12-SDAG-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: v_s_rsq_f32:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: v_s_sqrt_f32 s0, s0
; GFX12-GISEL-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_1)
; GFX12-GISEL-NEXT: v_s_rcp_f32 s0, s0
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX12-GISEL-NEXT: ; return to shader part epilog
%sqrt = call fast float @llvm.sqrt.f32(float %src)
%fdiv = fdiv fast float 1.0, %sqrt
ret float %fdiv
}
define amdgpu_cs half @v_s_rsq_f16(half inreg %src) {
; GFX12-LABEL: v_s_rsq_f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_s_rsq_f16 s0, s0
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: ; return to shader part epilog
%sqrt = call fast half @llvm.sqrt.f16(half %src)
%result = fdiv fast half 1.0, %sqrt
ret half %result
}
; TODO-GFX12: Should not use any VALU instructions.
define amdgpu_cs float @v_s_sqrt_f32(float inreg %src) {
; GFX12-SDAG-LABEL: v_s_sqrt_f32:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_mul_f32 s1, s0, 0x4f800000
; GFX12-SDAG-NEXT: s_cmp_lt_f32 s0, 0xf800000
; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX12-SDAG-NEXT: s_cselect_b32 s1, s1, s0
; GFX12-SDAG-NEXT: v_s_sqrt_f32 s2, s1
; GFX12-SDAG-NEXT: s_wait_alu 0xfffe
; GFX12-SDAG-NEXT: s_mov_b32 s4, s1
; GFX12-SDAG-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX12-SDAG-NEXT: s_add_co_i32 s3, s2, -1
; GFX12-SDAG-NEXT: s_xor_b32 s5, s3, 0x80000000
; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
; GFX12-SDAG-NEXT: s_fmac_f32 s4, s5, s2
; GFX12-SDAG-NEXT: s_mov_b32 s5, s1
; GFX12-SDAG-NEXT: s_cmp_le_f32 s4, 0
; GFX12-SDAG-NEXT: s_cselect_b32 s3, s3, s2
; GFX12-SDAG-NEXT: s_add_co_i32 s4, s2, 1
; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX12-SDAG-NEXT: s_xor_b32 s6, s4, 0x80000000
; GFX12-SDAG-NEXT: s_fmac_f32 s5, s6, s2
; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX12-SDAG-NEXT: s_cmp_gt_f32 s5, 0
; GFX12-SDAG-NEXT: s_cselect_b32 s2, s4, s3
; GFX12-SDAG-NEXT: s_cmp_lt_f32 s0, 0xf800000
; GFX12-SDAG-NEXT: s_mul_f32 s0, s2, 0x37800000
; GFX12-SDAG-NEXT: v_cmp_class_f32_e64 s3, s1, 0x260
; GFX12-SDAG-NEXT: s_wait_alu 0xfffe
; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-SDAG-NEXT: s_cselect_b32 s0, s0, s2
; GFX12-SDAG-NEXT: s_and_b32 s2, s3, exec_lo
; GFX12-SDAG-NEXT: s_wait_alu 0xfffe
; GFX12-SDAG-NEXT: s_cselect_b32 s0, s1, s0
; GFX12-SDAG-NEXT: s_wait_alu 0xfffe
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: v_s_sqrt_f32:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: s_cmp_lt_f32 s0, 0xf800000
; GFX12-GISEL-NEXT: s_mul_f32 s2, s0, 0x4f800000
; GFX12-GISEL-NEXT: s_cselect_b32 s1, 1, 0
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX12-GISEL-NEXT: s_cselect_b32 s0, s2, s0
; GFX12-GISEL-NEXT: v_s_sqrt_f32 s2, s0
; GFX12-GISEL-NEXT: s_wait_alu 0xfffe
; GFX12-GISEL-NEXT: s_mov_b32 s4, s0
; GFX12-GISEL-NEXT: s_mov_b32 s6, s0
; GFX12-GISEL-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX12-GISEL-NEXT: s_add_co_i32 s3, s2, -1
; GFX12-GISEL-NEXT: s_xor_b32 s5, s3, 0x80000000
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX12-GISEL-NEXT: s_fmac_f32 s4, s5, s2
; GFX12-GISEL-NEXT: s_add_co_i32 s5, s2, 1
; GFX12-GISEL-NEXT: s_xor_b32 s7, s5, 0x80000000
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_2)
; GFX12-GISEL-NEXT: s_cmp_le_f32 s4, 0
; GFX12-GISEL-NEXT: s_fmac_f32 s6, s7, s2
; GFX12-GISEL-NEXT: s_cselect_b32 s2, s3, s2
; GFX12-GISEL-NEXT: s_cmp_gt_f32 s6, 0
; GFX12-GISEL-NEXT: s_cselect_b32 s2, s5, s2
; GFX12-GISEL-NEXT: s_cmp_lg_u32 s1, 0
; GFX12-GISEL-NEXT: s_mul_f32 s3, s2, 0x37800000
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX12-GISEL-NEXT: s_cselect_b32 s1, s3, s2
; GFX12-GISEL-NEXT: s_wait_alu 0xfffe
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s1
; GFX12-GISEL-NEXT: v_cmp_class_f32_e64 s1, s0, 0x260
; GFX12-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, s0, s1
; GFX12-GISEL-NEXT: ; return to shader part epilog
%result = call float @llvm.sqrt.f32(float %src)
ret float %result
}
define amdgpu_cs half @v_s_sqrt_f16(half inreg %src) {
; GFX12-LABEL: v_s_sqrt_f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_s_sqrt_f16 s0, s0
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: ; return to shader part epilog
%result = call half @llvm.sqrt.f16(half %src)
ret half %result
}
define amdgpu_cs float @v_amdgcn_sqrt_f32(float inreg %src) {
; GFX12-LABEL: v_amdgcn_sqrt_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_s_sqrt_f32 s0, s0
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: ; return to shader part epilog
%result = call float @llvm.amdgcn.sqrt.f32(float %src)
ret float %result
}
define amdgpu_cs half @v_amdgcn_sqrt_f16(half inreg %src) {
; GFX12-LABEL: v_amdgcn_sqrt_f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_s_sqrt_f16 s0, s0
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: ; return to shader part epilog
%result = call half @llvm.amdgcn.sqrt.f16(half %src)
ret half %result
}
define amdgpu_cs float @srcmods_abs_f32(float inreg %src) {
; GFX12-LABEL: srcmods_abs_f32:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_bitset0_b32 s0, 31
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX12-NEXT: s_cmp_lt_f32 s0, 0x800000
; GFX12-NEXT: s_cselect_b32 s1, 0x4f800000, 1.0
; GFX12-NEXT: s_mul_f32 s0, s0, s1
; GFX12-NEXT: s_cselect_b32 s1, 0x42000000, 0
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
; GFX12-NEXT: v_s_log_f32 s0, s0
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_sub_f32 s0, s0, s1
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_2)
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: ; return to shader part epilog
%abs = call float @llvm.fabs.f32(float %src)
%result = call float @llvm.log2.f32(float %abs)
ret float %result
}
define amdgpu_cs float @srcmods_neg_f32(float inreg %src) {
; GFX12-SDAG-LABEL: srcmods_neg_f32:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_xor_b32 s1, s0, 0x80000000
; GFX12-SDAG-NEXT: s_cmp_gt_f32 s0, 0x80800000
; GFX12-SDAG-NEXT: s_cselect_b32 s0, 0x4f800000, 1.0
; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
; GFX12-SDAG-NEXT: s_mul_f32 s0, s1, s0
; GFX12-SDAG-NEXT: s_cselect_b32 s1, 0x42000000, 0
; GFX12-SDAG-NEXT: v_s_log_f32 s0, s0
; GFX12-SDAG-NEXT: s_wait_alu 0xfffe
; GFX12-SDAG-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
; GFX12-SDAG-NEXT: s_sub_f32 s0, s0, s1
; GFX12-SDAG-NEXT: s_wait_alu 0xfffe
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: srcmods_neg_f32:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL-NEXT: s_xor_b32 s0, s0, 0x80000000
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX12-GISEL-NEXT: s_cmp_lt_f32 s0, 0x800000
; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x4f800000, 1.0
; GFX12-GISEL-NEXT: s_mul_f32 s0, s0, s1
; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x42000000, 0
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
; GFX12-GISEL-NEXT: v_s_log_f32 s0, s0
; GFX12-GISEL-NEXT: s_wait_alu 0xfffe
; GFX12-GISEL-NEXT: s_sub_f32 s0, s0, s1
; GFX12-GISEL-NEXT: s_wait_alu 0xfffe
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_2)
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX12-GISEL-NEXT: ; return to shader part epilog
%neg = fneg float %src
%result = call float @llvm.log2.f32(float %neg)
ret float %result
}
define amdgpu_cs half @srcmods_abs_f16(half inreg %src) {
; GFX12-LABEL: srcmods_abs_f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_s_log_f16 s0, |s0|
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: ; return to shader part epilog
%abs = call half @llvm.fabs.f16(half %src)
%result = call half @llvm.log2.f16(half %abs)
ret half %result
}
define amdgpu_cs half @srcmods_neg_f16(half inreg %src) {
; GFX12-LABEL: srcmods_neg_f16:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_s_log_f16 s0, -s0
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: ; return to shader part epilog
%neg = fneg half %src
%result = call half @llvm.log2.f16(half %neg)
ret half %result
}
declare half @llvm.exp2.f16(half)
declare float @llvm.exp2.f32(float)
declare half @llvm.amdgcn.exp2.f16(half)
declare float @llvm.amdgcn.exp2.f32(float)
declare half @llvm.log2.f16(half)
declare float @llvm.log2.f32(float)
declare half @llvm.amdgcn.log.f16(half)
declare float @llvm.amdgcn.log.f32(float)
declare half @llvm.amdgcn.rcp.f16(half)
declare float @llvm.amdgcn.rcp.f32(float)
declare half @llvm.sqrt.f16(half)
declare float @llvm.sqrt.f32(float)
declare half @llvm.amdgcn.sqrt.f16(half)
declare float @llvm.amdgcn.sqrt.f32(float)
declare half @llvm.fabs.f16(half)
declare float @llvm.fabs.f32(float)