const debugFloat … type Float … type ErrNaN … func (err ErrNaN) Error() string { … } // NewFloat allocates and returns a new [Float] set to x, // with precision 53 and rounding mode [ToNearestEven]. // NewFloat panics with [ErrNaN] if x is a NaN. func NewFloat(x float64) *Float { … } const MaxExp … const MinExp … const MaxPrec … type form … const zero … const finite … const inf … type RoundingMode … const ToNearestEven … const ToNearestAway … const ToZero … const AwayFromZero … const ToNegativeInf … const ToPositiveInf … type Accuracy … const Below … const Exact … const Above … // SetPrec sets z's precision to prec and returns the (possibly) rounded // value of z. Rounding occurs according to z's rounding mode if the mantissa // cannot be represented in prec bits without loss of precision. // SetPrec(0) maps all finite values to ±0; infinite values remain unchanged. // If prec > [MaxPrec], it is set to [MaxPrec]. func (z *Float) SetPrec(prec uint) *Float { … } func makeAcc(above bool) Accuracy { … } // SetMode sets z's rounding mode to mode and returns an exact z. // z remains unchanged otherwise. // z.SetMode(z.Mode()) is a cheap way to set z's accuracy to [Exact]. func (z *Float) SetMode(mode RoundingMode) *Float { … } // Prec returns the mantissa precision of x in bits. // The result may be 0 for |x| == 0 and |x| == Inf. func (x *Float) Prec() uint { … } // MinPrec returns the minimum precision required to represent x exactly // (i.e., the smallest prec before x.SetPrec(prec) would start rounding x). // The result is 0 for |x| == 0 and |x| == Inf. func (x *Float) MinPrec() uint { … } // Mode returns the rounding mode of x. func (x *Float) Mode() RoundingMode { … } // Acc returns the accuracy of x produced by the most recent // operation, unless explicitly documented otherwise by that // operation. func (x *Float) Acc() Accuracy { … } // Sign returns: // - -1 if x < 0; // - 0 if x is ±0; // - +1 if x > 0. func (x *Float) Sign() int { … } // MantExp breaks x into its mantissa and exponent components // and returns the exponent. If a non-nil mant argument is // provided its value is set to the mantissa of x, with the // same precision and rounding mode as x. The components // satisfy x == mant × 2**exp, with 0.5 <= |mant| < 1.0. // Calling MantExp with a nil argument is an efficient way to // get the exponent of the receiver. // // Special cases are: // // ( ±0).MantExp(mant) = 0, with mant set to ±0 // (±Inf).MantExp(mant) = 0, with mant set to ±Inf // // x and mant may be the same in which case x is set to its // mantissa value. func (x *Float) MantExp(mant *Float) (exp int) { … } func (z *Float) setExpAndRound(exp int64, sbit uint) { … } // SetMantExp sets z to mant × 2**exp and returns z. // The result z has the same precision and rounding mode // as mant. SetMantExp is an inverse of [Float.MantExp] but does // not require 0.5 <= |mant| < 1.0. Specifically, for a // given x of type *[Float], SetMantExp relates to [Float.MantExp] // as follows: // // mant := new(Float) // new(Float).SetMantExp(mant, x.MantExp(mant)).Cmp(x) == 0 // // Special cases are: // // z.SetMantExp( ±0, exp) = ±0 // z.SetMantExp(±Inf, exp) = ±Inf // // z and mant may be the same in which case z's exponent // is set to exp. func (z *Float) SetMantExp(mant *Float, exp int) *Float { … } // Signbit reports whether x is negative or negative zero. func (x *Float) Signbit() bool { … } // IsInf reports whether x is +Inf or -Inf. func (x *Float) IsInf() bool { … } // IsInt reports whether x is an integer. // ±Inf values are not integers. func (x *Float) IsInt() bool { … } // debugging support func (x *Float) validate() { … } func (x *Float) validate0() string { … } // round rounds z according to z.mode to z.prec bits and sets z.acc accordingly. // sbit must be 0 or 1 and summarizes any "sticky bit" information one might // have before calling round. z's mantissa must be normalized (with the msb set) // or empty. // // CAUTION: The rounding modes [ToNegativeInf], [ToPositiveInf] are affected by the // sign of z. For correct rounding, the sign of z must be set correctly before // calling round. func (z *Float) round(sbit uint) { … } func (z *Float) setBits64(neg bool, x uint64) *Float { … } // SetUint64 sets z to the (possibly rounded) value of x and returns z. // If z's precision is 0, it is changed to 64 (and rounding will have // no effect). func (z *Float) SetUint64(x uint64) *Float { … } // SetInt64 sets z to the (possibly rounded) value of x and returns z. // If z's precision is 0, it is changed to 64 (and rounding will have // no effect). func (z *Float) SetInt64(x int64) *Float { … } // SetFloat64 sets z to the (possibly rounded) value of x and returns z. // If z's precision is 0, it is changed to 53 (and rounding will have // no effect). SetFloat64 panics with [ErrNaN] if x is a NaN. func (z *Float) SetFloat64(x float64) *Float { … } // fnorm normalizes mantissa m by shifting it to the left // such that the msb of the most-significant word (msw) is 1. // It returns the shift amount. It assumes that len(m) != 0. func fnorm(m nat) int64 { … } // SetInt sets z to the (possibly rounded) value of x and returns z. // If z's precision is 0, it is changed to the larger of x.BitLen() // or 64 (and rounding will have no effect). func (z *Float) SetInt(x *Int) *Float { … } // SetRat sets z to the (possibly rounded) value of x and returns z. // If z's precision is 0, it is changed to the largest of a.BitLen(), // b.BitLen(), or 64; with x = a/b. func (z *Float) SetRat(x *Rat) *Float { … } // SetInf sets z to the infinite Float -Inf if signbit is // set, or +Inf if signbit is not set, and returns z. The // precision of z is unchanged and the result is always // [Exact]. func (z *Float) SetInf(signbit bool) *Float { … } // Set sets z to the (possibly rounded) value of x and returns z. // If z's precision is 0, it is changed to the precision of x // before setting z (and rounding will have no effect). // Rounding is performed according to z's precision and rounding // mode; and z's accuracy reports the result error relative to the // exact (not rounded) result. func (z *Float) Set(x *Float) *Float { … } // Copy sets z to x, with the same precision, rounding mode, and accuracy as x. // Copy returns z. If x and z are identical, Copy is a no-op. func (z *Float) Copy(x *Float) *Float { … } // msb32 returns the 32 most significant bits of x. func msb32(x nat) uint32 { … } // msb64 returns the 64 most significant bits of x. func msb64(x nat) uint64 { … } // Uint64 returns the unsigned integer resulting from truncating x // towards zero. If 0 <= x <= [math.MaxUint64], the result is [Exact] // if x is an integer and [Below] otherwise. // The result is (0, [Above]) for x < 0, and ([math.MaxUint64], [Below]) // for x > [math.MaxUint64]. func (x *Float) Uint64() (uint64, Accuracy) { … } // Int64 returns the integer resulting from truncating x towards zero. // If [math.MinInt64] <= x <= [math.MaxInt64], the result is [Exact] if x is // an integer, and [Above] (x < 0) or [Below] (x > 0) otherwise. // The result is ([math.MinInt64], [Above]) for x < [math.MinInt64], // and ([math.MaxInt64], [Below]) for x > [math.MaxInt64]. func (x *Float) Int64() (int64, Accuracy) { … } // Float32 returns the float32 value nearest to x. If x is too small to be // represented by a float32 (|x| < [math.SmallestNonzeroFloat32]), the result // is (0, [Below]) or (-0, [Above]), respectively, depending on the sign of x. // If x is too large to be represented by a float32 (|x| > [math.MaxFloat32]), // the result is (+Inf, [Above]) or (-Inf, [Below]), depending on the sign of x. func (x *Float) Float32() (float32, Accuracy) { … } // Float64 returns the float64 value nearest to x. If x is too small to be // represented by a float64 (|x| < [math.SmallestNonzeroFloat64]), the result // is (0, [Below]) or (-0, [Above]), respectively, depending on the sign of x. // If x is too large to be represented by a float64 (|x| > [math.MaxFloat64]), // the result is (+Inf, [Above]) or (-Inf, [Below]), depending on the sign of x. func (x *Float) Float64() (float64, Accuracy) { … } // Int returns the result of truncating x towards zero; // or nil if x is an infinity. // The result is [Exact] if x.IsInt(); otherwise it is [Below] // for x > 0, and [Above] for x < 0. // If a non-nil *[Int] argument z is provided, [Int] stores // the result in z instead of allocating a new [Int]. func (x *Float) Int(z *Int) (*Int, Accuracy) { … } // Rat returns the rational number corresponding to x; // or nil if x is an infinity. // The result is [Exact] if x is not an Inf. // If a non-nil *[Rat] argument z is provided, [Rat] stores // the result in z instead of allocating a new [Rat]. func (x *Float) Rat(z *Rat) (*Rat, Accuracy) { … } // Abs sets z to the (possibly rounded) value |x| (the absolute value of x) // and returns z. func (z *Float) Abs(x *Float) *Float { … } // Neg sets z to the (possibly rounded) value of x with its sign negated, // and returns z. func (z *Float) Neg(x *Float) *Float { … } func validateBinaryOperands(x, y *Float) { … } // z = x + y, ignoring signs of x and y for the addition // but using the sign of z for rounding the result. // x and y must have a non-empty mantissa and valid exponent. func (z *Float) uadd(x, y *Float) { … } // z = x - y for |x| > |y|, ignoring signs of x and y for the subtraction // but using the sign of z for rounding the result. // x and y must have a non-empty mantissa and valid exponent. func (z *Float) usub(x, y *Float) { … } // z = x * y, ignoring signs of x and y for the multiplication // but using the sign of z for rounding the result. // x and y must have a non-empty mantissa and valid exponent. func (z *Float) umul(x, y *Float) { … } // z = x / y, ignoring signs of x and y for the division // but using the sign of z for rounding the result. // x and y must have a non-empty mantissa and valid exponent. func (z *Float) uquo(x, y *Float) { … } // ucmp returns -1, 0, or +1, depending on whether // |x| < |y|, |x| == |y|, or |x| > |y|. // x and y must have a non-empty mantissa and valid exponent. func (x *Float) ucmp(y *Float) int { … } // Add sets z to the rounded sum x+y and returns z. If z's precision is 0, // it is changed to the larger of x's or y's precision before the operation. // Rounding is performed according to z's precision and rounding mode; and // z's accuracy reports the result error relative to the exact (not rounded) // result. Add panics with [ErrNaN] if x and y are infinities with opposite // signs. The value of z is undefined in that case. func (z *Float) Add(x, y *Float) *Float { … } // Sub sets z to the rounded difference x-y and returns z. // Precision, rounding, and accuracy reporting are as for [Float.Add]. // Sub panics with [ErrNaN] if x and y are infinities with equal // signs. The value of z is undefined in that case. func (z *Float) Sub(x, y *Float) *Float { … } // Mul sets z to the rounded product x*y and returns z. // Precision, rounding, and accuracy reporting are as for [Float.Add]. // Mul panics with [ErrNaN] if one operand is zero and the other // operand an infinity. The value of z is undefined in that case. func (z *Float) Mul(x, y *Float) *Float { … } // Quo sets z to the rounded quotient x/y and returns z. // Precision, rounding, and accuracy reporting are as for [Float.Add]. // Quo panics with [ErrNaN] if both operands are zero or infinities. // The value of z is undefined in that case. func (z *Float) Quo(x, y *Float) *Float { … } // Cmp compares x and y and returns: // - -1 if x < y; // - 0 if x == y (incl. -0 == 0, -Inf == -Inf, and +Inf == +Inf); // - +1 if x > y. func (x *Float) Cmp(y *Float) int { … } // ord classifies x and returns: // // -2 if -Inf == x // -1 if -Inf < x < 0 // 0 if x == 0 (signed or unsigned) // +1 if 0 < x < +Inf // +2 if x == +Inf func (x *Float) ord() int { … } func umax32(x, y uint32) uint32 { … }