llvm/llvm/include/llvm/ADT/APFloat.h

//===- llvm/ADT/APFloat.h - Arbitrary Precision Floating Point ---*- C++ -*-==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file declares a class to represent arbitrary precision floating point
/// values and provide a variety of arithmetic operations on them.
///
//===----------------------------------------------------------------------===//

#ifndef LLVM_ADT_APFLOAT_H
#define LLVM_ADT_APFLOAT_H

#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/float128.h"
#include <memory>

#define APFLOAT_DISPATCH_ON_SEMANTICS

namespace llvm {

struct fltSemantics;
class APSInt;
class StringRef;
class APFloat;
class raw_ostream;

template <typename T> class Expected;
template <typename T> class SmallVectorImpl;

/// Enum that represents what fraction of the LSB truncated bits of an fp number
/// represent.
///
/// This essentially combines the roles of guard and sticky bits.
enum lostFraction {};

/// A self-contained host- and target-independent arbitrary-precision
/// floating-point software implementation.
///
/// APFloat uses bignum integer arithmetic as provided by static functions in
/// the APInt class.  The library will work with bignum integers whose parts are
/// any unsigned type at least 16 bits wide, but 64 bits is recommended.
///
/// Written for clarity rather than speed, in particular with a view to use in
/// the front-end of a cross compiler so that target arithmetic can be correctly
/// performed on the host.  Performance should nonetheless be reasonable,
/// particularly for its intended use.  It may be useful as a base
/// implementation for a run-time library during development of a faster
/// target-specific one.
///
/// All 5 rounding modes in the IEEE-754R draft are handled correctly for all
/// implemented operations.  Currently implemented operations are add, subtract,
/// multiply, divide, fused-multiply-add, conversion-to-float,
/// conversion-to-integer and conversion-from-integer.  New rounding modes
/// (e.g. away from zero) can be added with three or four lines of code.
///
/// Four formats are built-in: IEEE single precision, double precision,
/// quadruple precision, and x87 80-bit extended double (when operating with
/// full extended precision).  Adding a new format that obeys IEEE semantics
/// only requires adding two lines of code: a declaration and definition of the
/// format.
///
/// All operations return the status of that operation as an exception bit-mask,
/// so multiple operations can be done consecutively with their results or-ed
/// together.  The returned status can be useful for compiler diagnostics; e.g.,
/// inexact, underflow and overflow can be easily diagnosed on constant folding,
/// and compiler optimizers can determine what exceptions would be raised by
/// folding operations and optimize, or perhaps not optimize, accordingly.
///
/// At present, underflow tininess is detected after rounding; it should be
/// straight forward to add support for the before-rounding case too.
///
/// The library reads hexadecimal floating point numbers as per C99, and
/// correctly rounds if necessary according to the specified rounding mode.
/// Syntax is required to have been validated by the caller.  It also converts
/// floating point numbers to hexadecimal text as per the C99 %a and %A
/// conversions.  The output precision (or alternatively the natural minimal
/// precision) can be specified; if the requested precision is less than the
/// natural precision the output is correctly rounded for the specified rounding
/// mode.
///
/// It also reads decimal floating point numbers and correctly rounds according
/// to the specified rounding mode.
///
/// Conversion to decimal text is not currently implemented.
///
/// Non-zero finite numbers are represented internally as a sign bit, a 16-bit
/// signed exponent, and the significand as an array of integer parts.  After
/// normalization of a number of precision P the exponent is within the range of
/// the format, and if the number is not denormal the P-th bit of the
/// significand is set as an explicit integer bit.  For denormals the most
/// significant bit is shifted right so that the exponent is maintained at the
/// format's minimum, so that the smallest denormal has just the least
/// significant bit of the significand set.  The sign of zeroes and infinities
/// is significant; the exponent and significand of such numbers is not stored,
/// but has a known implicit (deterministic) value: 0 for the significands, 0
/// for zero exponent, all 1 bits for infinity exponent.  For NaNs the sign and
/// significand are deterministic, although not really meaningful, and preserved
/// in non-conversion operations.  The exponent is implicitly all 1 bits.
///
/// APFloat does not provide any exception handling beyond default exception
/// handling. We represent Signaling NaNs via IEEE-754R 2008 6.2.1 should clause
/// by encoding Signaling NaNs with the first bit of its trailing significand as
/// 0.
///
/// TODO
/// ====
///
/// Some features that may or may not be worth adding:
///
/// Binary to decimal conversion (hard).
///
/// Optional ability to detect underflow tininess before rounding.
///
/// New formats: x87 in single and double precision mode (IEEE apart from
/// extended exponent range) (hard).
///
/// New operations: sqrt, IEEE remainder, C90 fmod, nexttoward.
///

// This is the common type definitions shared by APFloat and its internal
// implementation classes. This struct should not define any non-static data
// members.
struct APFloatBase {};

namespace detail {

class IEEEFloat final : public APFloatBase {};

hash_code hash_value(const IEEEFloat &Arg);
int ilogb(const IEEEFloat &Arg);
IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode);
IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM);

// This mode implements more precise float in terms of two APFloats.
// The interface and layout is designed for arbitrary underlying semantics,
// though currently only PPCDoubleDouble semantics are supported, whose
// corresponding underlying semantics are IEEEdouble.
class DoubleAPFloat final : public APFloatBase {};

hash_code hash_value(const DoubleAPFloat &Arg);
DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp, IEEEFloat::roundingMode RM);
DoubleAPFloat frexp(const DoubleAPFloat &X, int &Exp, IEEEFloat::roundingMode);

} // End detail namespace

// This is a interface class that is currently forwarding functionalities from
// detail::IEEEFloat.
class APFloat : public APFloatBase {};

/// See friend declarations above.
///
/// These additional declarations are required in order to compile LLVM with IBM
/// xlC compiler.
hash_code hash_value(const APFloat &Arg);
inline APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM) {}

/// Equivalent of C standard library function.
///
/// While the C standard says Exp is an unspecified value for infinity and nan,
/// this returns INT_MAX for infinities, and INT_MIN for NaNs.
inline APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM) {}
/// Returns the absolute value of the argument.
inline APFloat abs(APFloat X) {}

/// Returns the negated value of the argument.
inline APFloat neg(APFloat X) {}

/// Implements IEEE-754 2019 minimumNumber semantics. Returns the smaller of the
/// 2 arguments if both are not NaN. If either argument is a NaN, returns the
/// other argument. -0 is treated as ordered less than +0.
LLVM_READONLY
inline APFloat minnum(const APFloat &A, const APFloat &B) {}

/// Implements IEEE-754 2019 maximumNumber semantics. Returns the larger of the
/// 2 arguments if both are not NaN. If either argument is a NaN, returns the
/// other argument. +0 is treated as ordered greater than -0.
LLVM_READONLY
inline APFloat maxnum(const APFloat &A, const APFloat &B) {}

/// Implements IEEE 754-2019 minimum semantics. Returns the smaller of 2
/// arguments, propagating NaNs and treating -0 as less than +0.
LLVM_READONLY
inline APFloat minimum(const APFloat &A, const APFloat &B) {}

/// Implements IEEE 754-2019 minimumNumber semantics. Returns the smaller
/// of 2 arguments, not propagating NaNs and treating -0 as less than +0.
LLVM_READONLY
inline APFloat minimumnum(const APFloat &A, const APFloat &B) {}

/// Implements IEEE 754-2019 maximum semantics. Returns the larger of 2
/// arguments, propagating NaNs and treating -0 as less than +0.
LLVM_READONLY
inline APFloat maximum(const APFloat &A, const APFloat &B) {}

/// Implements IEEE 754-2019 maximumNumber semantics. Returns the larger
/// of 2 arguments, not propagating NaNs and treating -0 as less than +0.
LLVM_READONLY
inline APFloat maximumnum(const APFloat &A, const APFloat &B) {}

// We want the following functions to be available in the header for inlining.
// We cannot define them inline in the class definition of `DoubleAPFloat`
// because doing so would instantiate `std::unique_ptr<APFloat[]>` before
// `APFloat` is defined, and that would be undefined behavior.
namespace detail {

DoubleAPFloat &DoubleAPFloat::operator=(DoubleAPFloat &&RHS) {}

APFloat &DoubleAPFloat::getFirst() {}
const APFloat &DoubleAPFloat::getFirst() const {}
APFloat &DoubleAPFloat::getSecond() {}
const APFloat &DoubleAPFloat::getSecond() const {}

} // namespace detail

} // namespace llvm

#undef APFLOAT_DISPATCH_ON_SEMANTICS
#endif // LLVM_ADT_APFLOAT_H