hex_float.h | Explore in Territory

// Copyright (c) 2015-2016 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef SOURCE_UTIL_HEX_FLOAT_H_
#define SOURCE_UTIL_HEX_FLOAT_H_

#include <cassert>
#include <cctype>
#include <cmath>
#include <cstdint>
#include <iomanip>
#include <limits>
#include <sstream>
#include <vector>

#include "source/util/bitutils.h"

#ifndef __GNUC__
#define GCC_VERSION …
#else
#define GCC_VERSION …
#endif

namespace spvtools {
namespace utils {

class Float16 { … };

// To specialize this type, you must override uint_type to define
// an unsigned integer that can fit your floating point type.
// You must also add a isNan function that returns true if
// a value is Nan.
template <typename T>
struct FloatProxyTraits { … };

template <>
struct FloatProxyTraits<float> { … };

template <>
struct FloatProxyTraits<double> { … };

template <>
struct FloatProxyTraits<Float16> { … };

// Since copying a floating point number (especially if it is NaN)
// does not guarantee that bits are preserved, this class lets us
// store the type and use it as a float when necessary.
template <typename T>
class FloatProxy { … };

template <typename T>
bool operator==(const FloatProxy<T>& first, const FloatProxy<T>& second) { … }

// Reads a FloatProxy value as a normal float from a stream.
template <typename T>
std::istream& operator>>(std::istream& is, FloatProxy<T>& value) { … }

// This is an example traits. It is not meant to be used in practice, but will
// be the default for any non-specialized type.
template <typename T>
struct HexFloatTraits { … };

// Traits for IEEE float.
// 1 sign bit, 8 exponent bits, 23 fractional bits.
template <>
struct HexFloatTraits<FloatProxy<float>> { … };

// Traits for IEEE double.
// 1 sign bit, 11 exponent bits, 52 fractional bits.
template <>
struct HexFloatTraits<FloatProxy<double>> { … };

// Traits for IEEE half.
// 1 sign bit, 5 exponent bits, 10 fractional bits.
template <>
struct HexFloatTraits<FloatProxy<Float16>> { … };

enum class round_direction { … };

// Template class that houses a floating pointer number.
// It exposes a number of constants based on the provided traits to
// assist in interpreting the bits of the value.
template <typename T, typename Traits = HexFloatTraits<T>>
class HexFloat {
 public:
  using uint_type = typename Traits::uint_type;
  using int_type = typename Traits::int_type;
  using underlying_type = typename Traits::underlying_type;
  using native_type = typename Traits::native_type;

  explicit HexFloat(T f) : … { … }

  T value() const { … }
  void set_value(T f) { … }

  // These are all written like this because it is convenient to have
  // compile-time constants for all of these values.

  // Pass-through values to save typing.
  static const uint32_t num_used_bits = Traits::num_used_bits;
  static const uint32_t exponent_bias = Traits::exponent_bias;
  static const uint32_t num_exponent_bits = Traits::num_exponent_bits;
  static const uint32_t num_fraction_bits = Traits::num_fraction_bits;

  // Number of bits to shift left to set the highest relevant bit.
  static const uint32_t top_bit_left_shift = num_used_bits - 1;
  // How many nibbles (hex characters) the fractional part takes up.
  static const uint32_t fraction_nibbles = (num_fraction_bits + 3) / 4;
  // If the fractional part does not fit evenly into a hex character (4-bits)
  // then we have to left-shift to get rid of leading 0s. This is the amount
  // we have to shift (might be 0).
  static const uint32_t num_overflow_bits =
      fraction_nibbles * 4 - num_fraction_bits;

  // The representation of the fraction, not the actual bits. This
  // includes the leading bit that is usually implicit.
  static const uint_type fraction_represent_mask =
      SetBits<uint_type, 0, num_fraction_bits + num_overflow_bits>::get;

  // The topmost bit in the nibble-aligned fraction.
  static const uint_type fraction_top_bit =
      uint_type(1) << (num_fraction_bits + num_overflow_bits - 1);

  // The least significant bit in the exponent, which is also the bit
  // immediately to the left of the significand.
  static const uint_type first_exponent_bit = uint_type(1)
                                              << (num_fraction_bits);

  // The mask for the encoded fraction. It does not include the
  // implicit bit.
  static const uint_type fraction_encode_mask =
      SetBits<uint_type, 0, num_fraction_bits>::get;

  // The bit that is used as a sign.
  static const uint_type sign_mask = uint_type(1) << top_bit_left_shift;

  // The bits that represent the exponent.
  static const uint_type exponent_mask =
      SetBits<uint_type, num_fraction_bits, num_exponent_bits>::get;

  // How far left the exponent is shifted.
  static const uint32_t exponent_left_shift = num_fraction_bits;

  // How far from the right edge the fraction is shifted.
  static const uint32_t fraction_right_shift =
      static_cast<uint32_t>(sizeof(uint_type) * 8) - num_fraction_bits;

  // The maximum representable unbiased exponent.
  static const int_type max_exponent =
      (exponent_mask >> num_fraction_bits) - exponent_bias;
  // The minimum representable exponent for normalized numbers.
  static const int_type min_exponent = -static_cast<int_type>(exponent_bias);

  // Returns the bits associated with the value.
  uint_type getBits() const { … }

  // Returns the bits associated with the value, without the leading sign bit.
  uint_type getUnsignedBits() const { … }

  // Returns the bits associated with the exponent, shifted to start at the
  // lsb of the type.
  const uint_type getExponentBits() const { … }

  // Returns the exponent in unbiased form. This is the exponent in the
  // human-friendly form.
  const int_type getUnbiasedExponent() const { … }

  // Returns just the significand bits from the value.
  const uint_type getSignificandBits() const { … }

  // If the number was normalized, returns the unbiased exponent.
  // If the number was denormal, normalize the exponent first.
  const int_type getUnbiasedNormalizedExponent() const { … }

  // Returns the signficand after it has been normalized.
  const uint_type getNormalizedSignificand() const { … }

  // Returns true if this number represents a negative value.
  bool isNegative() const { … }

  // Sets this HexFloat from the individual components.
  // Note this assumes EVERY significand is normalized, and has an implicit
  // leading one. This means that the only way that this method will set 0,
  // is if you set a number so denormalized that it underflows.
  // Do not use this method with raw bits extracted from a subnormal number,
  // since subnormals do not have an implicit leading 1 in the significand.
  // The significand is also expected to be in the
  // lowest-most num_fraction_bits of the uint_type.
  // The exponent is expected to be unbiased, meaning an exponent of
  // 0 actually means 0.
  // If underflow_round_up is set, then on underflow, if a number is non-0
  // and would underflow, we round up to the smallest denorm.
  void setFromSignUnbiasedExponentAndNormalizedSignificand(
      bool negative, int_type exponent, uint_type significand,
      bool round_denorm_up) { … }

  // Increments the significand of this number by the given amount.
  // If this would spill the significand into the implicit bit,
  // carry is set to true and the significand is shifted to fit into
  // the correct location, otherwise carry is set to false.
  // All significands and to_increment are assumed to be within the bounds
  // for a valid significand.
  static uint_type incrementSignificand(uint_type significand,
                                        uint_type to_increment, bool* carry) { … }

#if GCC_VERSION == 40801
  // These exist because MSVC throws warnings on negative right-shifts
  // even if they are not going to be executed. Eg:
  // constant_number < 0? 0: constant_number
  // These convert the negative left-shifts into right shifts.
  template <int_type N>
  struct negatable_left_shift {
    static uint_type val(uint_type val) {
      if (N > 0) {
        return static_cast<uint_type>(val << N);
      } else {
        return static_cast<uint_type>(val >> N);
      }
    }
  };

  template <int_type N>
  struct negatable_right_shift {
    static uint_type val(uint_type val) {
      if (N > 0) { … };

// Returns 4 bits represented by the hex character.
inline uint8_t get_nibble_from_character(int character) { … }

// Outputs the given HexFloat to the stream.
template <typename T, typename Traits>
std::ostream& operator<<(std::ostream& os, const HexFloat<T, Traits>& value) { … }

// Returns true if negate_value is true and the next character on the
// input stream is a plus or minus sign.  In that case we also set the fail bit
// on the stream and set the value to the zero value for its type.
template <typename T, typename Traits>
inline bool RejectParseDueToLeadingSign(std::istream& is, bool negate_value,
                                        HexFloat<T, Traits>& value) { … }

// Parses a floating point number from the given stream and stores it into the
// value parameter.
// If negate_value is true then the number may not have a leading minus or
// plus, and if it successfully parses, then the number is negated before
// being stored into the value parameter.
// If the value cannot be correctly parsed or overflows the target floating
// point type, then set the fail bit on the stream.
// TODO(dneto): Promise C++11 standard behavior in how the value is set in
// the error case, but only after all target platforms implement it correctly.
// In particular, the Microsoft C++ runtime appears to be out of spec.
template <typename T, typename Traits>
inline std::istream& ParseNormalFloat(std::istream& is, bool negate_value,
                                      HexFloat<T, Traits>& value) { … }

// Specialization of ParseNormalFloat for FloatProxy<Float16> values.
// This will parse the float as it were a 32-bit floating point number,
// and then round it down to fit into a Float16 value.
// The number is rounded towards zero.
// If negate_value is true then the number may not have a leading minus or
// plus, and if it successfully parses, then the number is negated before
// being stored into the value parameter.
// If the value cannot be correctly parsed or overflows the target floating
// point type, then set the fail bit on the stream.
// TODO(dneto): Promise C++11 standard behavior in how the value is set in
// the error case, but only after all target platforms implement it correctly.
// In particular, the Microsoft C++ runtime appears to be out of spec.
template <>
inline std::istream&
ParseNormalFloat<FloatProxy<Float16>, HexFloatTraits<FloatProxy<Float16>>>(
    std::istream& is, bool negate_value,
    HexFloat<FloatProxy<Float16>, HexFloatTraits<FloatProxy<Float16>>>& value) { … }

namespace detail {

// Returns a new value formed from 'value' by setting 'bit' that is the
// 'n'th most significant bit (where 0 is the most significant bit).
// If 'bit' is zero or 'n' is more than the number of bits in the integer
// type, then return the original value.
template <typename UINT_TYPE>
UINT_TYPE set_nth_most_significant_bit(UINT_TYPE value, UINT_TYPE bit,
                                       UINT_TYPE n) { … }

// Attempts to increment the argument.
// If it does not overflow, then increments the argument and returns true.
// If it would overflow, returns false.
template <typename INT_TYPE>
bool saturated_inc(INT_TYPE& value) { … }

// Attempts to decrement the argument.
// If it does not underflow, then decrements the argument and returns true.
// If it would overflow, returns false.
template <typename INT_TYPE>
bool saturated_dec(INT_TYPE& value) { … }
}  // namespace detail

// Reads a HexFloat from the given stream.
// If the float is not encoded as a hex-float then it will be parsed
// as a regular float.
// This may fail if your stream does not support at least one unget.
// Nan values can be encoded with "0x1.<not zero>p+exponent_bias".
// This would normally overflow a float and round to
// infinity but this special pattern is the exact representation for a NaN,
// and therefore is actually encoded as the correct NaN. To encode inf,
// either 0x0p+exponent_bias can be specified or any exponent greater than
// exponent_bias.
// Examples using IEEE 32-bit float encoding.
//    0x1.0p+128 (+inf)
//    -0x1.0p-128 (-inf)
//
//    0x1.1p+128 (+Nan)
//    -0x1.1p+128 (-Nan)
//
//    0x1p+129 (+inf)
//    -0x1p+129 (-inf)
template <typename T, typename Traits>
std::istream& operator>>(std::istream& is, HexFloat<T, Traits>& value) { … }

// Writes a FloatProxy value to a stream.
// Zero and normal numbers are printed in the usual notation, but with
// enough digits to fully reproduce the value.  Other values (subnormal,
// NaN, and infinity) are printed as a hex float.
template <typename T>
std::ostream& operator<<(std::ostream& os, const FloatProxy<T>& value) { … }

template <>
inline std::ostream& operator<<<Float16>(std::ostream& os,
                                         const FloatProxy<Float16>& value) { … }

}  // namespace utils
}  // namespace spvtools

#endif  // SOURCE_UTIL_HEX_FLOAT_H_
chromium/third_party/spirv-tools/src/source/util/hex_float.h