; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -enable-ppc-gen-scalar-mass -verify-machineinstrs -O3 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck --check-prefix=CHECK-LNX %s
; RUN: llc -enable-ppc-gen-scalar-mass -verify-machineinstrs -O3 -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck --check-prefix=CHECK-AIX %s
declare float @llvm.pow.f32 (float, float);
declare double @llvm.pow.f64 (double, double);
; fast-math powf with 0.25
; TODO: pow->sqrt conversion for AIX
define float @llvmintr_powf_f32_fast025(float %a) #1 {
; CHECK-LNX-LABEL: llvmintr_powf_f32_fast025:
; CHECK-LNX: # %bb.0: # %entry
; CHECK-LNX-NEXT: xsrsqrtesp 2, 1
; CHECK-LNX-NEXT: addis 3, 2, .LCPI0_0@toc@ha
; CHECK-LNX-NEXT: vspltisw 2, -3
; CHECK-LNX-NEXT: lfs 0, .LCPI0_0@toc@l(3)
; CHECK-LNX-NEXT: addis 3, 2, .LCPI0_1@toc@ha
; CHECK-LNX-NEXT: xxlxor 5, 5, 5
; CHECK-LNX-NEXT: xsmulsp 3, 1, 2
; CHECK-LNX-NEXT: xsabsdp 1, 1
; CHECK-LNX-NEXT: xsmulsp 4, 3, 0
; CHECK-LNX-NEXT: xsmulsp 2, 3, 2
; CHECK-LNX-NEXT: xvcvsxwdp 3, 34
; CHECK-LNX-NEXT: xsaddsp 2, 2, 3
; CHECK-LNX-NEXT: xsmulsp 2, 4, 2
; CHECK-LNX-NEXT: lfs 4, .LCPI0_1@toc@l(3)
; CHECK-LNX-NEXT: xssubsp 1, 1, 4
; CHECK-LNX-NEXT: fsel 1, 1, 2, 5
; CHECK-LNX-NEXT: xsrsqrtesp 2, 1
; CHECK-LNX-NEXT: xsmulsp 6, 1, 2
; CHECK-LNX-NEXT: xsabsdp 1, 1
; CHECK-LNX-NEXT: xsmulsp 2, 6, 2
; CHECK-LNX-NEXT: xsmulsp 0, 6, 0
; CHECK-LNX-NEXT: xssubsp 1, 1, 4
; CHECK-LNX-NEXT: xsaddsp 2, 2, 3
; CHECK-LNX-NEXT: xsmulsp 0, 0, 2
; CHECK-LNX-NEXT: fsel 1, 1, 0, 5
; CHECK-LNX-NEXT: blr
;
; CHECK-AIX-LABEL: llvmintr_powf_f32_fast025:
; CHECK-AIX: # %bb.0: # %entry
; CHECK-AIX-NEXT: mflr 0
; CHECK-AIX-NEXT: stwu 1, -64(1)
; CHECK-AIX-NEXT: lwz 3, L..C0(2) # %const.0
; CHECK-AIX-NEXT: stw 0, 72(1)
; CHECK-AIX-NEXT: lfs 2, 0(3)
; CHECK-AIX-NEXT: bl .__xl_powf_finite[PR]
; CHECK-AIX-NEXT: nop
; CHECK-AIX-NEXT: addi 1, 1, 64
; CHECK-AIX-NEXT: lwz 0, 8(1)
; CHECK-AIX-NEXT: mtlr 0
; CHECK-AIX-NEXT: blr
entry:
%call = tail call nnan ninf afn nsz float @llvm.pow.f32(float %a, float 2.500000e-01)
ret float %call
}
; fast-math pow with 0.25
; TODO: pow->sqrt conversion for AIX
define double @llvmintr_pow_f64_fast025(double %a) #1 {
; CHECK-LNX-LABEL: llvmintr_pow_f64_fast025:
; CHECK-LNX: # %bb.0: # %entry
; CHECK-LNX-NEXT: xstsqrtdp 0, 1
; CHECK-LNX-NEXT: vspltisw 2, -3
; CHECK-LNX-NEXT: addis 3, 2, .LCPI1_0@toc@ha
; CHECK-LNX-NEXT: xvcvsxwdp 2, 34
; CHECK-LNX-NEXT: lfs 0, .LCPI1_0@toc@l(3)
; CHECK-LNX-NEXT: bc 12, 2, .LBB1_3
; CHECK-LNX-NEXT: # %bb.1: # %entry
; CHECK-LNX-NEXT: xsrsqrtedp 3, 1
; CHECK-LNX-NEXT: xsmuldp 5, 1, 3
; CHECK-LNX-NEXT: xsmuldp 4, 3, 0
; CHECK-LNX-NEXT: xsmuldp 3, 5, 3
; CHECK-LNX-NEXT: xsadddp 3, 3, 2
; CHECK-LNX-NEXT: xsmuldp 3, 4, 3
; CHECK-LNX-NEXT: xsmuldp 1, 1, 3
; CHECK-LNX-NEXT: xsmuldp 3, 1, 3
; CHECK-LNX-NEXT: xsmuldp 1, 1, 0
; CHECK-LNX-NEXT: xsadddp 3, 3, 2
; CHECK-LNX-NEXT: xsmuldp 1, 1, 3
; CHECK-LNX-NEXT: xstsqrtdp 0, 1
; CHECK-LNX-NEXT: bc 4, 2, .LBB1_4
; CHECK-LNX-NEXT: .LBB1_2:
; CHECK-LNX-NEXT: xssqrtdp 1, 1
; CHECK-LNX-NEXT: blr
; CHECK-LNX-NEXT: .LBB1_3:
; CHECK-LNX-NEXT: xssqrtdp 1, 1
; CHECK-LNX-NEXT: xstsqrtdp 0, 1
; CHECK-LNX-NEXT: bc 12, 2, .LBB1_2
; CHECK-LNX-NEXT: .LBB1_4: # %entry
; CHECK-LNX-NEXT: xsrsqrtedp 3, 1
; CHECK-LNX-NEXT: xsmuldp 4, 1, 3
; CHECK-LNX-NEXT: xsmuldp 4, 4, 3
; CHECK-LNX-NEXT: xsmuldp 3, 3, 0
; CHECK-LNX-NEXT: xsadddp 4, 4, 2
; CHECK-LNX-NEXT: xsmuldp 3, 3, 4
; CHECK-LNX-NEXT: xsmuldp 1, 1, 3
; CHECK-LNX-NEXT: xsmuldp 3, 1, 3
; CHECK-LNX-NEXT: xsmuldp 0, 1, 0
; CHECK-LNX-NEXT: xsadddp 2, 3, 2
; CHECK-LNX-NEXT: xsmuldp 1, 0, 2
; CHECK-LNX-NEXT: blr
;
; CHECK-AIX-LABEL: llvmintr_pow_f64_fast025:
; CHECK-AIX: # %bb.0: # %entry
; CHECK-AIX-NEXT: mflr 0
; CHECK-AIX-NEXT: stwu 1, -64(1)
; CHECK-AIX-NEXT: lwz 3, L..C1(2) # %const.0
; CHECK-AIX-NEXT: stw 0, 72(1)
; CHECK-AIX-NEXT: lfs 2, 0(3)
; CHECK-AIX-NEXT: bl .__xl_pow_finite[PR]
; CHECK-AIX-NEXT: nop
; CHECK-AIX-NEXT: addi 1, 1, 64
; CHECK-AIX-NEXT: lwz 0, 8(1)
; CHECK-AIX-NEXT: mtlr 0
; CHECK-AIX-NEXT: blr
entry:
%call = tail call nnan ninf afn nsz double @llvm.pow.f64(double %a, double 2.500000e-01)
ret double %call
}
; fast-math powf with 0.75
; TODO: pow->sqrt conversion for AIX
define float @llvmintr_powf_f32_fast075(float %a) #1 {
; CHECK-LNX-LABEL: llvmintr_powf_f32_fast075:
; CHECK-LNX: # %bb.0: # %entry
; CHECK-LNX-NEXT: xsrsqrtesp 2, 1
; CHECK-LNX-NEXT: addis 3, 2, .LCPI2_0@toc@ha
; CHECK-LNX-NEXT: vspltisw 2, -3
; CHECK-LNX-NEXT: lfs 0, .LCPI2_0@toc@l(3)
; CHECK-LNX-NEXT: addis 3, 2, .LCPI2_1@toc@ha
; CHECK-LNX-NEXT: xxlxor 5, 5, 5
; CHECK-LNX-NEXT: xsmulsp 3, 1, 2
; CHECK-LNX-NEXT: xsabsdp 1, 1
; CHECK-LNX-NEXT: xsmulsp 4, 3, 0
; CHECK-LNX-NEXT: xsmulsp 2, 3, 2
; CHECK-LNX-NEXT: xvcvsxwdp 3, 34
; CHECK-LNX-NEXT: xsaddsp 2, 2, 3
; CHECK-LNX-NEXT: xsmulsp 2, 4, 2
; CHECK-LNX-NEXT: lfs 4, .LCPI2_1@toc@l(3)
; CHECK-LNX-NEXT: xssubsp 1, 1, 4
; CHECK-LNX-NEXT: fsel 1, 1, 2, 5
; CHECK-LNX-NEXT: xsrsqrtesp 2, 1
; CHECK-LNX-NEXT: xsmulsp 6, 1, 2
; CHECK-LNX-NEXT: xsmulsp 2, 6, 2
; CHECK-LNX-NEXT: xsmulsp 0, 6, 0
; CHECK-LNX-NEXT: xsaddsp 2, 2, 3
; CHECK-LNX-NEXT: xsmulsp 0, 0, 2
; CHECK-LNX-NEXT: xsabsdp 2, 1
; CHECK-LNX-NEXT: xssubsp 2, 2, 4
; CHECK-LNX-NEXT: fsel 0, 2, 0, 5
; CHECK-LNX-NEXT: xsmulsp 1, 1, 0
; CHECK-LNX-NEXT: blr
;
; CHECK-AIX-LABEL: llvmintr_powf_f32_fast075:
; CHECK-AIX: # %bb.0: # %entry
; CHECK-AIX-NEXT: mflr 0
; CHECK-AIX-NEXT: stwu 1, -64(1)
; CHECK-AIX-NEXT: lwz 3, L..C2(2) # %const.0
; CHECK-AIX-NEXT: stw 0, 72(1)
; CHECK-AIX-NEXT: lfs 2, 0(3)
; CHECK-AIX-NEXT: bl .__xl_powf_finite[PR]
; CHECK-AIX-NEXT: nop
; CHECK-AIX-NEXT: addi 1, 1, 64
; CHECK-AIX-NEXT: lwz 0, 8(1)
; CHECK-AIX-NEXT: mtlr 0
; CHECK-AIX-NEXT: blr
entry:
%call = tail call nnan ninf afn nsz float @llvm.pow.f32(float %a, float 7.500000e-01)
ret float %call
}
; fast-math pow with 0.75
; TODO: pow->sqrt conversion for AIX
define double @llvmintr_pow_f64_fast075(double %a) #1 {
; CHECK-LNX-LABEL: llvmintr_pow_f64_fast075:
; CHECK-LNX: # %bb.0: # %entry
; CHECK-LNX-NEXT: xstsqrtdp 0, 1
; CHECK-LNX-NEXT: vspltisw 2, -3
; CHECK-LNX-NEXT: addis 3, 2, .LCPI3_0@toc@ha
; CHECK-LNX-NEXT: xvcvsxwdp 2, 34
; CHECK-LNX-NEXT: lfs 0, .LCPI3_0@toc@l(3)
; CHECK-LNX-NEXT: bc 12, 2, .LBB3_3
; CHECK-LNX-NEXT: # %bb.1: # %entry
; CHECK-LNX-NEXT: xsrsqrtedp 3, 1
; CHECK-LNX-NEXT: xsmuldp 5, 1, 3
; CHECK-LNX-NEXT: xsmuldp 4, 3, 0
; CHECK-LNX-NEXT: xsmuldp 3, 5, 3
; CHECK-LNX-NEXT: xsadddp 3, 3, 2
; CHECK-LNX-NEXT: xsmuldp 3, 4, 3
; CHECK-LNX-NEXT: xsmuldp 1, 1, 3
; CHECK-LNX-NEXT: xsmuldp 3, 1, 3
; CHECK-LNX-NEXT: xsmuldp 1, 1, 0
; CHECK-LNX-NEXT: xsadddp 3, 3, 2
; CHECK-LNX-NEXT: xsmuldp 1, 1, 3
; CHECK-LNX-NEXT: xstsqrtdp 0, 1
; CHECK-LNX-NEXT: bc 4, 2, .LBB3_4
; CHECK-LNX-NEXT: .LBB3_2:
; CHECK-LNX-NEXT: xssqrtdp 0, 1
; CHECK-LNX-NEXT: xsmuldp 1, 1, 0
; CHECK-LNX-NEXT: blr
; CHECK-LNX-NEXT: .LBB3_3:
; CHECK-LNX-NEXT: xssqrtdp 1, 1
; CHECK-LNX-NEXT: xstsqrtdp 0, 1
; CHECK-LNX-NEXT: bc 12, 2, .LBB3_2
; CHECK-LNX-NEXT: .LBB3_4: # %entry
; CHECK-LNX-NEXT: xsrsqrtedp 3, 1
; CHECK-LNX-NEXT: xsmuldp 4, 1, 3
; CHECK-LNX-NEXT: xsmuldp 4, 4, 3
; CHECK-LNX-NEXT: xsmuldp 3, 3, 0
; CHECK-LNX-NEXT: xsadddp 4, 4, 2
; CHECK-LNX-NEXT: xsmuldp 3, 3, 4
; CHECK-LNX-NEXT: xsmuldp 4, 1, 3
; CHECK-LNX-NEXT: xsmuldp 3, 4, 3
; CHECK-LNX-NEXT: xsmuldp 0, 4, 0
; CHECK-LNX-NEXT: xsadddp 2, 3, 2
; CHECK-LNX-NEXT: xsmuldp 0, 0, 2
; CHECK-LNX-NEXT: xsmuldp 1, 1, 0
; CHECK-LNX-NEXT: blr
;
; CHECK-AIX-LABEL: llvmintr_pow_f64_fast075:
; CHECK-AIX: # %bb.0: # %entry
; CHECK-AIX-NEXT: mflr 0
; CHECK-AIX-NEXT: stwu 1, -64(1)
; CHECK-AIX-NEXT: lwz 3, L..C3(2) # %const.0
; CHECK-AIX-NEXT: stw 0, 72(1)
; CHECK-AIX-NEXT: lfs 2, 0(3)
; CHECK-AIX-NEXT: bl .__xl_pow_finite[PR]
; CHECK-AIX-NEXT: nop
; CHECK-AIX-NEXT: addi 1, 1, 64
; CHECK-AIX-NEXT: lwz 0, 8(1)
; CHECK-AIX-NEXT: mtlr 0
; CHECK-AIX-NEXT: blr
entry:
%call = tail call nnan ninf afn nsz double @llvm.pow.f64(double %a, double 7.500000e-01)
ret double %call
}
; fast-math powf with 0.50
; TODO: pow->sqrt conversion for LinuxLE and AIX
define float @llvmintr_powf_f32_fast050(float %a) #1 {
; CHECK-LNX-LABEL: llvmintr_powf_f32_fast050:
; CHECK-LNX: # %bb.0: # %entry
; CHECK-LNX-NEXT: mflr 0
; CHECK-LNX-NEXT: stdu 1, -32(1)
; CHECK-LNX-NEXT: std 0, 48(1)
; CHECK-LNX-NEXT: .cfi_def_cfa_offset 32
; CHECK-LNX-NEXT: .cfi_offset lr, 16
; CHECK-LNX-NEXT: addis 3, 2, .LCPI4_0@toc@ha
; CHECK-LNX-NEXT: lfs 2, .LCPI4_0@toc@l(3)
; CHECK-LNX-NEXT: bl __xl_powf_finite
; CHECK-LNX-NEXT: nop
; CHECK-LNX-NEXT: addi 1, 1, 32
; CHECK-LNX-NEXT: ld 0, 16(1)
; CHECK-LNX-NEXT: mtlr 0
; CHECK-LNX-NEXT: blr
;
; CHECK-AIX-LABEL: llvmintr_powf_f32_fast050:
; CHECK-AIX: # %bb.0: # %entry
; CHECK-AIX-NEXT: mflr 0
; CHECK-AIX-NEXT: stwu 1, -64(1)
; CHECK-AIX-NEXT: lwz 3, L..C4(2) # %const.0
; CHECK-AIX-NEXT: stw 0, 72(1)
; CHECK-AIX-NEXT: lfs 2, 0(3)
; CHECK-AIX-NEXT: bl .__xl_powf_finite[PR]
; CHECK-AIX-NEXT: nop
; CHECK-AIX-NEXT: addi 1, 1, 64
; CHECK-AIX-NEXT: lwz 0, 8(1)
; CHECK-AIX-NEXT: mtlr 0
; CHECK-AIX-NEXT: blr
entry:
%call = tail call nnan ninf afn nsz float @llvm.pow.f32(float %a, float 5.000000e-01)
ret float %call
}
; fast-math pow with 0.50
; TODO: pow->sqrt conversion for LinuxLE and AIX
define double @llvmintr_pow_f64_fast050(double %a) #1 {
; CHECK-LNX-LABEL: llvmintr_pow_f64_fast050:
; CHECK-LNX: # %bb.0: # %entry
; CHECK-LNX-NEXT: mflr 0
; CHECK-LNX-NEXT: stdu 1, -32(1)
; CHECK-LNX-NEXT: std 0, 48(1)
; CHECK-LNX-NEXT: .cfi_def_cfa_offset 32
; CHECK-LNX-NEXT: .cfi_offset lr, 16
; CHECK-LNX-NEXT: addis 3, 2, .LCPI5_0@toc@ha
; CHECK-LNX-NEXT: lfs 2, .LCPI5_0@toc@l(3)
; CHECK-LNX-NEXT: bl __xl_pow_finite
; CHECK-LNX-NEXT: nop
; CHECK-LNX-NEXT: addi 1, 1, 32
; CHECK-LNX-NEXT: ld 0, 16(1)
; CHECK-LNX-NEXT: mtlr 0
; CHECK-LNX-NEXT: blr
;
; CHECK-AIX-LABEL: llvmintr_pow_f64_fast050:
; CHECK-AIX: # %bb.0: # %entry
; CHECK-AIX-NEXT: mflr 0
; CHECK-AIX-NEXT: stwu 1, -64(1)
; CHECK-AIX-NEXT: lwz 3, L..C5(2) # %const.0
; CHECK-AIX-NEXT: stw 0, 72(1)
; CHECK-AIX-NEXT: lfs 2, 0(3)
; CHECK-AIX-NEXT: bl .__xl_pow_finite[PR]
; CHECK-AIX-NEXT: nop
; CHECK-AIX-NEXT: addi 1, 1, 64
; CHECK-AIX-NEXT: lwz 0, 8(1)
; CHECK-AIX-NEXT: mtlr 0
; CHECK-AIX-NEXT: blr
entry:
%call = tail call nnan ninf afn nsz double @llvm.pow.f64(double %a, double 5.000000e-01)
ret double %call
}
attributes #1 = { "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "approx-func-fp-math"="true" }