//===- llvm/ADT/APFloat.h - Arbitrary Precision Floating Point ---*- C++ -*-==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// This file declares a class to represent arbitrary precision floating point /// values and provide a variety of arithmetic operations on them. /// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_APFLOAT_H #define LLVM_ADT_APFLOAT_H #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/FloatingPointMode.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/float128.h" #include <memory> #define APFLOAT_DISPATCH_ON_SEMANTICS … namespace llvm { struct fltSemantics; class APSInt; class StringRef; class APFloat; class raw_ostream; template <typename T> class Expected; template <typename T> class SmallVectorImpl; /// Enum that represents what fraction of the LSB truncated bits of an fp number /// represent. /// /// This essentially combines the roles of guard and sticky bits. enum lostFraction { … }; /// A self-contained host- and target-independent arbitrary-precision /// floating-point software implementation. /// /// APFloat uses bignum integer arithmetic as provided by static functions in /// the APInt class. The library will work with bignum integers whose parts are /// any unsigned type at least 16 bits wide, but 64 bits is recommended. /// /// Written for clarity rather than speed, in particular with a view to use in /// the front-end of a cross compiler so that target arithmetic can be correctly /// performed on the host. Performance should nonetheless be reasonable, /// particularly for its intended use. It may be useful as a base /// implementation for a run-time library during development of a faster /// target-specific one. /// /// All 5 rounding modes in the IEEE-754R draft are handled correctly for all /// implemented operations. Currently implemented operations are add, subtract, /// multiply, divide, fused-multiply-add, conversion-to-float, /// conversion-to-integer and conversion-from-integer. New rounding modes /// (e.g. away from zero) can be added with three or four lines of code. /// /// Four formats are built-in: IEEE single precision, double precision, /// quadruple precision, and x87 80-bit extended double (when operating with /// full extended precision). Adding a new format that obeys IEEE semantics /// only requires adding two lines of code: a declaration and definition of the /// format. /// /// All operations return the status of that operation as an exception bit-mask, /// so multiple operations can be done consecutively with their results or-ed /// together. The returned status can be useful for compiler diagnostics; e.g., /// inexact, underflow and overflow can be easily diagnosed on constant folding, /// and compiler optimizers can determine what exceptions would be raised by /// folding operations and optimize, or perhaps not optimize, accordingly. /// /// At present, underflow tininess is detected after rounding; it should be /// straight forward to add support for the before-rounding case too. /// /// The library reads hexadecimal floating point numbers as per C99, and /// correctly rounds if necessary according to the specified rounding mode. /// Syntax is required to have been validated by the caller. It also converts /// floating point numbers to hexadecimal text as per the C99 %a and %A /// conversions. The output precision (or alternatively the natural minimal /// precision) can be specified; if the requested precision is less than the /// natural precision the output is correctly rounded for the specified rounding /// mode. /// /// It also reads decimal floating point numbers and correctly rounds according /// to the specified rounding mode. /// /// Conversion to decimal text is not currently implemented. /// /// Non-zero finite numbers are represented internally as a sign bit, a 16-bit /// signed exponent, and the significand as an array of integer parts. After /// normalization of a number of precision P the exponent is within the range of /// the format, and if the number is not denormal the P-th bit of the /// significand is set as an explicit integer bit. For denormals the most /// significant bit is shifted right so that the exponent is maintained at the /// format's minimum, so that the smallest denormal has just the least /// significant bit of the significand set. The sign of zeroes and infinities /// is significant; the exponent and significand of such numbers is not stored, /// but has a known implicit (deterministic) value: 0 for the significands, 0 /// for zero exponent, all 1 bits for infinity exponent. For NaNs the sign and /// significand are deterministic, although not really meaningful, and preserved /// in non-conversion operations. The exponent is implicitly all 1 bits. /// /// APFloat does not provide any exception handling beyond default exception /// handling. We represent Signaling NaNs via IEEE-754R 2008 6.2.1 should clause /// by encoding Signaling NaNs with the first bit of its trailing significand as /// 0. /// /// TODO /// ==== /// /// Some features that may or may not be worth adding: /// /// Binary to decimal conversion (hard). /// /// Optional ability to detect underflow tininess before rounding. /// /// New formats: x87 in single and double precision mode (IEEE apart from /// extended exponent range) (hard). /// /// New operations: sqrt, IEEE remainder, C90 fmod, nexttoward. /// // This is the common type definitions shared by APFloat and its internal // implementation classes. This struct should not define any non-static data // members. struct APFloatBase { … }; namespace detail { integerPart; uninitializedTag; roundingMode; opStatus; cmpResult; fltCategory; ExponentType; static constexpr uninitializedTag uninitialized = …; static constexpr roundingMode rmNearestTiesToEven = …; static constexpr roundingMode rmNearestTiesToAway = …; static constexpr roundingMode rmTowardNegative = …; static constexpr roundingMode rmTowardPositive = …; static constexpr roundingMode rmTowardZero = …; static constexpr unsigned integerPartWidth = …; static constexpr cmpResult cmpEqual = …; static constexpr cmpResult cmpLessThan = …; static constexpr cmpResult cmpGreaterThan = …; static constexpr cmpResult cmpUnordered = …; static constexpr opStatus opOK = …; static constexpr opStatus opInvalidOp = …; static constexpr opStatus opDivByZero = …; static constexpr opStatus opOverflow = …; static constexpr opStatus opUnderflow = …; static constexpr opStatus opInexact = …; static constexpr fltCategory fcInfinity = …; static constexpr fltCategory fcNaN = …; static constexpr fltCategory fcNormal = …; static constexpr fltCategory fcZero = …; class IEEEFloat final { … }; hash_code hash_value(const IEEEFloat &Arg); int ilogb(const IEEEFloat &Arg); IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode); IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM); // This mode implements more precise float in terms of two APFloats. // The interface and layout is designed for arbitrary underlying semantics, // though currently only PPCDoubleDouble semantics are supported, whose // corresponding underlying semantics are IEEEdouble. class DoubleAPFloat final { … }; hash_code hash_value(const DoubleAPFloat &Arg); DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp, roundingMode RM); DoubleAPFloat frexp(const DoubleAPFloat &X, int &Exp, roundingMode); } // End detail namespace // This is a interface class that is currently forwarding functionalities from // detail::IEEEFloat. class APFloat : public APFloatBase { … }; static_assert …; /// See friend declarations above. /// /// These additional declarations are required in order to compile LLVM with IBM /// xlC compiler. hash_code hash_value(const APFloat &Arg); inline APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM) { … } /// Equivalent of C standard library function. /// /// While the C standard says Exp is an unspecified value for infinity and nan, /// this returns INT_MAX for infinities, and INT_MIN for NaNs. inline APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM) { … } /// Returns the absolute value of the argument. inline APFloat abs(APFloat X) { … } /// Returns the negated value of the argument. inline APFloat neg(APFloat X) { … } /// Implements IEEE-754 2019 minimumNumber semantics. Returns the smaller of the /// 2 arguments if both are not NaN. If either argument is a NaN, returns the /// other argument. -0 is treated as ordered less than +0. LLVM_READONLY inline APFloat minnum(const APFloat &A, const APFloat &B) { … } /// Implements IEEE-754 2019 maximumNumber semantics. Returns the larger of the /// 2 arguments if both are not NaN. If either argument is a NaN, returns the /// other argument. +0 is treated as ordered greater than -0. LLVM_READONLY inline APFloat maxnum(const APFloat &A, const APFloat &B) { … } /// Implements IEEE 754-2019 minimum semantics. Returns the smaller of 2 /// arguments, returning a quiet NaN if an argument is a NaN and treating -0 /// as less than +0. LLVM_READONLY inline APFloat minimum(const APFloat &A, const APFloat &B) { … } /// Implements IEEE 754-2019 minimumNumber semantics. Returns the smaller /// of 2 arguments, not propagating NaNs and treating -0 as less than +0. LLVM_READONLY inline APFloat minimumnum(const APFloat &A, const APFloat &B) { … } /// Implements IEEE 754-2019 maximum semantics. Returns the larger of 2 /// arguments, returning a quiet NaN if an argument is a NaN and treating -0 /// as less than +0. LLVM_READONLY inline APFloat maximum(const APFloat &A, const APFloat &B) { … } /// Implements IEEE 754-2019 maximumNumber semantics. Returns the larger /// of 2 arguments, not propagating NaNs and treating -0 as less than +0. LLVM_READONLY inline APFloat maximumnum(const APFloat &A, const APFloat &B) { … } inline raw_ostream &operator<<(raw_ostream &OS, const APFloat &V) { … } // We want the following functions to be available in the header for inlining. // We cannot define them inline in the class definition of `DoubleAPFloat` // because doing so would instantiate `std::unique_ptr<APFloat[]>` before // `APFloat` is defined, and that would be undefined behavior. namespace detail { DoubleAPFloat &DoubleAPFloat::operator=(DoubleAPFloat &&RHS) { … } APFloat &DoubleAPFloat::getFirst() { … } const APFloat &DoubleAPFloat::getFirst() const { … } APFloat &DoubleAPFloat::getSecond() { … } const APFloat &DoubleAPFloat::getSecond() const { … } } // namespace detail } // namespace llvm #undef APFLOAT_DISPATCH_ON_SEMANTICS #endif // LLVM_ADT_APFLOAT_H