llvm/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s

define amdgpu_cs float @v_s_exp_f32(float inreg %src) {
; GFX12-LABEL: v_s_exp_f32:
; GFX12:       ; %bb.0:
; GFX12-NEXT:    s_cmp_lt_f32 s0, 0xc2fc0000
; GFX12-NEXT:    s_cselect_b32 s1, 0x42800000, 0
; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
; GFX12-NEXT:    s_add_f32 s0, s0, s1
; GFX12-NEXT:    s_cselect_b32 s1, 0x1f800000, 1.0
; GFX12-NEXT:    v_s_exp_f32 s0, s0
; GFX12-NEXT:    s_wait_alu 0xfffe
; GFX12-NEXT:    s_delay_alu instid0(TRANS32_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
; GFX12-NEXT:    s_mul_f32 s0, s0, s1
; GFX12-NEXT:    s_wait_alu 0xfffe
; GFX12-NEXT:    v_mov_b32_e32 v0, s0
; GFX12-NEXT:    ; return to shader part epilog
  %result = call float @llvm.exp2.f32(float %src)
  ret float %result
}

define amdgpu_cs half @v_s_exp_f16(half inreg %src) {
; GFX12-LABEL: v_s_exp_f16:
; GFX12:       ; %bb.0:
; GFX12-NEXT:    v_s_exp_f16 s0, s0
; GFX12-NEXT:    s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-NEXT:    v_mov_b32_e32 v0, s0
; GFX12-NEXT:    ; return to shader part epilog
  %result = call half @llvm.exp2.f16(half %src)
  ret half %result
}

define amdgpu_cs float @v_s_amdgcn_exp_f32(float inreg %src) {
; GFX12-LABEL: v_s_amdgcn_exp_f32:
; GFX12:       ; %bb.0:
; GFX12-NEXT:    v_s_exp_f32 s0, s0
; GFX12-NEXT:    s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-NEXT:    v_mov_b32_e32 v0, s0
; GFX12-NEXT:    ; return to shader part epilog
  %result = call float @llvm.amdgcn.exp2.f32(float %src)
  ret float %result
}

define amdgpu_cs half @v_s_amdgcn_exp_f16(half inreg %src) {
; GFX12-LABEL: v_s_amdgcn_exp_f16:
; GFX12:       ; %bb.0:
; GFX12-NEXT:    v_s_exp_f16 s0, s0
; GFX12-NEXT:    s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-NEXT:    v_mov_b32_e32 v0, s0
; GFX12-NEXT:    ; return to shader part epilog
  %result = call half @llvm.amdgcn.exp2.f16(half %src)
  ret half %result
}

define amdgpu_cs float @v_s_log_f32(float inreg %src) {
; GFX12-LABEL: v_s_log_f32:
; GFX12:       ; %bb.0:
; GFX12-NEXT:    s_cmp_lt_f32 s0, 0x800000
; GFX12-NEXT:    s_cselect_b32 s1, 0x4f800000, 1.0
; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
; GFX12-NEXT:    s_mul_f32 s0, s0, s1
; GFX12-NEXT:    s_cselect_b32 s1, 0x42000000, 0
; GFX12-NEXT:    v_s_log_f32 s0, s0
; GFX12-NEXT:    s_wait_alu 0xfffe
; GFX12-NEXT:    s_delay_alu instid0(TRANS32_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
; GFX12-NEXT:    s_sub_f32 s0, s0, s1
; GFX12-NEXT:    s_wait_alu 0xfffe
; GFX12-NEXT:    v_mov_b32_e32 v0, s0
; GFX12-NEXT:    ; return to shader part epilog
  %result = call float @llvm.log2.f32(float %src)
  ret float %result
}

define amdgpu_cs half @v_s_log_f16(half inreg %src) {
; GFX12-LABEL: v_s_log_f16:
; GFX12:       ; %bb.0:
; GFX12-NEXT:    v_s_log_f16 s0, s0
; GFX12-NEXT:    s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-NEXT:    v_mov_b32_e32 v0, s0
; GFX12-NEXT:    ; return to shader part epilog
  %result = call half @llvm.log2.f16(half %src)
  ret half %result
}

define amdgpu_cs float @v_s_amdgcn_log_f32(float inreg %src) {
; GFX12-LABEL: v_s_amdgcn_log_f32:
; GFX12:       ; %bb.0:
; GFX12-NEXT:    v_s_log_f32 s0, s0
; GFX12-NEXT:    s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-NEXT:    v_mov_b32_e32 v0, s0
; GFX12-NEXT:    ; return to shader part epilog
  %result = call float @llvm.amdgcn.log.f32(float %src)
  ret float %result
}

define amdgpu_cs half @v_s_amdgcn_log_f16(half inreg %src) {
; GFX12-LABEL: v_s_amdgcn_log_f16:
; GFX12:       ; %bb.0:
; GFX12-NEXT:    v_s_log_f16 s0, s0
; GFX12-NEXT:    s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-NEXT:    v_mov_b32_e32 v0, s0
; GFX12-NEXT:    ; return to shader part epilog
  %result = call half @llvm.amdgcn.log.f16(half %src)
  ret half %result
}

define amdgpu_cs float @v_s_rcp_f32(float inreg %src) {
; GFX12-LABEL: v_s_rcp_f32:
; GFX12:       ; %bb.0:
; GFX12-NEXT:    v_s_rcp_f32 s0, s0
; GFX12-NEXT:    s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-NEXT:    v_mov_b32_e32 v0, s0
; GFX12-NEXT:    ; return to shader part epilog
  %result = call fast float @llvm.amdgcn.rcp.f32(float %src)
  ret float %result
}

define amdgpu_cs half @v_s_rcp_f16(half inreg %src) {
; GFX12-LABEL: v_s_rcp_f16:
; GFX12:       ; %bb.0:
; GFX12-NEXT:    v_s_rcp_f16 s0, s0
; GFX12-NEXT:    s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-NEXT:    v_mov_b32_e32 v0, s0
; GFX12-NEXT:    ; return to shader part epilog
  %result = call fast half @llvm.amdgcn.rcp.f16(half %src)
  ret half %result
}

; TODO-GFX12: GlobalISel should generate v_s_rsq.
define amdgpu_cs float @v_s_rsq_f32(float inreg %src) {
; GFX12-SDAG-LABEL: v_s_rsq_f32:
; GFX12-SDAG:       ; %bb.0:
; GFX12-SDAG-NEXT:    v_s_rsq_f32 s0, s0
; GFX12-SDAG-NEXT:    s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, s0
; GFX12-SDAG-NEXT:    ; return to shader part epilog
;
; GFX12-GISEL-LABEL: v_s_rsq_f32:
; GFX12-GISEL:       ; %bb.0:
; GFX12-GISEL-NEXT:    v_s_sqrt_f32 s0, s0
; GFX12-GISEL-NEXT:    s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_1)
; GFX12-GISEL-NEXT:    v_s_rcp_f32 s0, s0
; GFX12-GISEL-NEXT:    v_mov_b32_e32 v0, s0
; GFX12-GISEL-NEXT:    ; return to shader part epilog
  %sqrt = call fast float @llvm.sqrt.f32(float %src)
  %fdiv = fdiv fast float 1.0, %sqrt
  ret float %fdiv
}

define amdgpu_cs half @v_s_rsq_f16(half inreg %src) {
; GFX12-LABEL: v_s_rsq_f16:
; GFX12:       ; %bb.0:
; GFX12-NEXT:    v_s_rsq_f16 s0, s0
; GFX12-NEXT:    s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-NEXT:    v_mov_b32_e32 v0, s0
; GFX12-NEXT:    ; return to shader part epilog
  %sqrt = call fast half @llvm.sqrt.f16(half %src)
  %result = fdiv fast half 1.0, %sqrt
  ret half %result
}

; TODO-GFX12: Should not use any VALU instructions.
define amdgpu_cs float @v_s_sqrt_f32(float inreg %src) {
; GFX12-SDAG-LABEL: v_s_sqrt_f32:
; GFX12-SDAG:       ; %bb.0:
; GFX12-SDAG-NEXT:    s_mul_f32 s1, s0, 0x4f800000
; GFX12-SDAG-NEXT:    s_cmp_lt_f32 s0, 0xf800000
; GFX12-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX12-SDAG-NEXT:    s_cselect_b32 s1, s1, s0
; GFX12-SDAG-NEXT:    v_s_sqrt_f32 s2, s1
; GFX12-SDAG-NEXT:    s_wait_alu 0xfffe
; GFX12-SDAG-NEXT:    s_mov_b32 s4, s1
; GFX12-SDAG-NEXT:    s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX12-SDAG-NEXT:    s_add_co_i32 s3, s2, -1
; GFX12-SDAG-NEXT:    s_xor_b32 s5, s3, 0x80000000
; GFX12-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
; GFX12-SDAG-NEXT:    s_fmac_f32 s4, s5, s2
; GFX12-SDAG-NEXT:    s_mov_b32 s5, s1
; GFX12-SDAG-NEXT:    s_cmp_le_f32 s4, 0
; GFX12-SDAG-NEXT:    s_cselect_b32 s3, s3, s2
; GFX12-SDAG-NEXT:    s_add_co_i32 s4, s2, 1
; GFX12-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX12-SDAG-NEXT:    s_xor_b32 s6, s4, 0x80000000
; GFX12-SDAG-NEXT:    s_fmac_f32 s5, s6, s2
; GFX12-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
; GFX12-SDAG-NEXT:    s_cmp_gt_f32 s5, 0
; GFX12-SDAG-NEXT:    s_cselect_b32 s2, s4, s3
; GFX12-SDAG-NEXT:    s_cmp_lt_f32 s0, 0xf800000
; GFX12-SDAG-NEXT:    s_mul_f32 s0, s2, 0x37800000
; GFX12-SDAG-NEXT:    v_cmp_class_f32_e64 s3, s1, 0x260
; GFX12-SDAG-NEXT:    s_wait_alu 0xfffe
; GFX12-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-SDAG-NEXT:    s_cselect_b32 s0, s0, s2
; GFX12-SDAG-NEXT:    s_and_b32 s2, s3, exec_lo
; GFX12-SDAG-NEXT:    s_wait_alu 0xfffe
; GFX12-SDAG-NEXT:    s_cselect_b32 s0, s1, s0
; GFX12-SDAG-NEXT:    s_wait_alu 0xfffe
; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, s0
; GFX12-SDAG-NEXT:    ; return to shader part epilog
;
; GFX12-GISEL-LABEL: v_s_sqrt_f32:
; GFX12-GISEL:       ; %bb.0:
; GFX12-GISEL-NEXT:    s_cmp_lt_f32 s0, 0xf800000
; GFX12-GISEL-NEXT:    s_mul_f32 s2, s0, 0x4f800000
; GFX12-GISEL-NEXT:    s_cselect_b32 s1, 1, 0
; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX12-GISEL-NEXT:    s_cselect_b32 s0, s2, s0
; GFX12-GISEL-NEXT:    v_s_sqrt_f32 s2, s0
; GFX12-GISEL-NEXT:    s_wait_alu 0xfffe
; GFX12-GISEL-NEXT:    s_mov_b32 s4, s0
; GFX12-GISEL-NEXT:    s_mov_b32 s6, s0
; GFX12-GISEL-NEXT:    s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX12-GISEL-NEXT:    s_add_co_i32 s3, s2, -1
; GFX12-GISEL-NEXT:    s_xor_b32 s5, s3, 0x80000000
; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX12-GISEL-NEXT:    s_fmac_f32 s4, s5, s2
; GFX12-GISEL-NEXT:    s_add_co_i32 s5, s2, 1
; GFX12-GISEL-NEXT:    s_xor_b32 s7, s5, 0x80000000
; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_2)
; GFX12-GISEL-NEXT:    s_cmp_le_f32 s4, 0
; GFX12-GISEL-NEXT:    s_fmac_f32 s6, s7, s2
; GFX12-GISEL-NEXT:    s_cselect_b32 s2, s3, s2
; GFX12-GISEL-NEXT:    s_cmp_gt_f32 s6, 0
; GFX12-GISEL-NEXT:    s_cselect_b32 s2, s5, s2
; GFX12-GISEL-NEXT:    s_cmp_lg_u32 s1, 0
; GFX12-GISEL-NEXT:    s_mul_f32 s3, s2, 0x37800000
; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_3) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX12-GISEL-NEXT:    s_cselect_b32 s1, s3, s2
; GFX12-GISEL-NEXT:    s_wait_alu 0xfffe
; GFX12-GISEL-NEXT:    v_mov_b32_e32 v0, s1
; GFX12-GISEL-NEXT:    v_cmp_class_f32_e64 s1, s0, 0x260
; GFX12-GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, s0, s1
; GFX12-GISEL-NEXT:    ; return to shader part epilog
  %result = call float @llvm.sqrt.f32(float %src)
  ret float %result
}

define amdgpu_cs half @v_s_sqrt_f16(half inreg %src) {
; GFX12-LABEL: v_s_sqrt_f16:
; GFX12:       ; %bb.0:
; GFX12-NEXT:    v_s_sqrt_f16 s0, s0
; GFX12-NEXT:    s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-NEXT:    v_mov_b32_e32 v0, s0
; GFX12-NEXT:    ; return to shader part epilog
  %result = call half @llvm.sqrt.f16(half %src)
  ret half %result
}

define amdgpu_cs float @v_amdgcn_sqrt_f32(float inreg %src)  {
; GFX12-LABEL: v_amdgcn_sqrt_f32:
; GFX12:       ; %bb.0:
; GFX12-NEXT:    v_s_sqrt_f32 s0, s0
; GFX12-NEXT:    s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-NEXT:    v_mov_b32_e32 v0, s0
; GFX12-NEXT:    ; return to shader part epilog
  %result = call float @llvm.amdgcn.sqrt.f32(float %src)
  ret float %result
}

define amdgpu_cs half @v_amdgcn_sqrt_f16(half inreg %src)  {
; GFX12-LABEL: v_amdgcn_sqrt_f16:
; GFX12:       ; %bb.0:
; GFX12-NEXT:    v_s_sqrt_f16 s0, s0
; GFX12-NEXT:    s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-NEXT:    v_mov_b32_e32 v0, s0
; GFX12-NEXT:    ; return to shader part epilog
  %result = call half @llvm.amdgcn.sqrt.f16(half %src)
  ret half %result
}

define amdgpu_cs float @srcmods_abs_f32(float inreg %src) {
; GFX12-LABEL: srcmods_abs_f32:
; GFX12:       ; %bb.0:
; GFX12-NEXT:    s_bitset0_b32 s0, 31
; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX12-NEXT:    s_cmp_lt_f32 s0, 0x800000
; GFX12-NEXT:    s_cselect_b32 s1, 0x4f800000, 1.0
; GFX12-NEXT:    s_mul_f32 s0, s0, s1
; GFX12-NEXT:    s_cselect_b32 s1, 0x42000000, 0
; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_2) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
; GFX12-NEXT:    v_s_log_f32 s0, s0
; GFX12-NEXT:    s_wait_alu 0xfffe
; GFX12-NEXT:    s_sub_f32 s0, s0, s1
; GFX12-NEXT:    s_wait_alu 0xfffe
; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_2)
; GFX12-NEXT:    v_mov_b32_e32 v0, s0
; GFX12-NEXT:    ; return to shader part epilog
  %abs = call float @llvm.fabs.f32(float %src)
  %result = call float @llvm.log2.f32(float %abs)
  ret float %result
}

define amdgpu_cs float @srcmods_neg_f32(float inreg %src) {
; GFX12-SDAG-LABEL: srcmods_neg_f32:
; GFX12-SDAG:       ; %bb.0:
; GFX12-SDAG-NEXT:    s_xor_b32 s1, s0, 0x80000000
; GFX12-SDAG-NEXT:    s_cmp_gt_f32 s0, 0x80800000
; GFX12-SDAG-NEXT:    s_cselect_b32 s0, 0x4f800000, 1.0
; GFX12-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
; GFX12-SDAG-NEXT:    s_mul_f32 s0, s1, s0
; GFX12-SDAG-NEXT:    s_cselect_b32 s1, 0x42000000, 0
; GFX12-SDAG-NEXT:    v_s_log_f32 s0, s0
; GFX12-SDAG-NEXT:    s_wait_alu 0xfffe
; GFX12-SDAG-NEXT:    s_delay_alu instid0(TRANS32_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
; GFX12-SDAG-NEXT:    s_sub_f32 s0, s0, s1
; GFX12-SDAG-NEXT:    s_wait_alu 0xfffe
; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, s0
; GFX12-SDAG-NEXT:    ; return to shader part epilog
;
; GFX12-GISEL-LABEL: srcmods_neg_f32:
; GFX12-GISEL:       ; %bb.0:
; GFX12-GISEL-NEXT:    s_xor_b32 s0, s0, 0x80000000
; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX12-GISEL-NEXT:    s_cmp_lt_f32 s0, 0x800000
; GFX12-GISEL-NEXT:    s_cselect_b32 s1, 0x4f800000, 1.0
; GFX12-GISEL-NEXT:    s_mul_f32 s0, s0, s1
; GFX12-GISEL-NEXT:    s_cselect_b32 s1, 0x42000000, 0
; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_2) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
; GFX12-GISEL-NEXT:    v_s_log_f32 s0, s0
; GFX12-GISEL-NEXT:    s_wait_alu 0xfffe
; GFX12-GISEL-NEXT:    s_sub_f32 s0, s0, s1
; GFX12-GISEL-NEXT:    s_wait_alu 0xfffe
; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_2)
; GFX12-GISEL-NEXT:    v_mov_b32_e32 v0, s0
; GFX12-GISEL-NEXT:    ; return to shader part epilog
  %neg = fneg float %src
  %result = call float @llvm.log2.f32(float %neg)
  ret float %result
}

define amdgpu_cs half @srcmods_abs_f16(half inreg %src) {
; GFX12-LABEL: srcmods_abs_f16:
; GFX12:       ; %bb.0:
; GFX12-NEXT:    v_s_log_f16 s0, |s0|
; GFX12-NEXT:    s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-NEXT:    v_mov_b32_e32 v0, s0
; GFX12-NEXT:    ; return to shader part epilog
  %abs = call half @llvm.fabs.f16(half %src)
  %result = call half @llvm.log2.f16(half %abs)
  ret half %result
}

define amdgpu_cs half @srcmods_neg_f16(half inreg %src) {
; GFX12-LABEL: srcmods_neg_f16:
; GFX12:       ; %bb.0:
; GFX12-NEXT:    v_s_log_f16 s0, -s0
; GFX12-NEXT:    s_delay_alu instid0(TRANS32_DEP_1)
; GFX12-NEXT:    v_mov_b32_e32 v0, s0
; GFX12-NEXT:    ; return to shader part epilog
  %neg = fneg half %src
  %result = call half @llvm.log2.f16(half %neg)
  ret half %result
}

declare half @llvm.exp2.f16(half)
declare float @llvm.exp2.f32(float)
declare half @llvm.amdgcn.exp2.f16(half)
declare float @llvm.amdgcn.exp2.f32(float)
declare half @llvm.log2.f16(half)
declare float @llvm.log2.f32(float)
declare half @llvm.amdgcn.log.f16(half)
declare float @llvm.amdgcn.log.f32(float)
declare half @llvm.amdgcn.rcp.f16(half)
declare float @llvm.amdgcn.rcp.f32(float)
declare half @llvm.sqrt.f16(half)
declare float @llvm.sqrt.f32(float)
declare half @llvm.amdgcn.sqrt.f16(half)
declare float @llvm.amdgcn.sqrt.f32(float)
declare half @llvm.fabs.f16(half)
declare float @llvm.fabs.f32(float)