d2s.cpp | Explore in Territory

//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

// Copyright 2018 Ulf Adams
// Copyright (c) Microsoft Corporation. All rights reserved.

// Boost Software License - Version 1.0 - August 17th, 2003

// Permission is hereby granted, free of charge, to any person or organization
// obtaining a copy of the software and accompanying documentation covered by
// this license (the "Software") to use, reproduce, display, distribute,
// execute, and transmit the Software, and to prepare derivative works of the
// Software, and to permit third-parties to whom the Software is furnished to
// do so, all subject to the following:

// The copyright notices in the Software and this entire statement, including
// the above license grant, this restriction and the following disclaimer,
// must be included in all copies of the Software, in whole or in part, and
// all derivative works of the Software, unless such copies or derivative
// works are solely in the form of machine-executable object code generated by
// a source language processor.

// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS IN THE SOFTWARE.

// Avoid formatting to keep the changes with the original code minimal.
// clang-format off

#include <__assert>
#include <__config>
#include <charconv>

#include "include/ryu/common.h"
#include "include/ryu/d2fixed.h"
#include "include/ryu/d2s.h"
#include "include/ryu/d2s_full_table.h"
#include "include/ryu/d2s_intrinsics.h"
#include "include/ryu/digit_table.h"
#include "include/ryu/ryu.h"

_LIBCPP_BEGIN_NAMESPACE_STD

// We need a 64x128-bit multiplication and a subsequent 128-bit shift.
// Multiplication:
//   The 64-bit factor is variable and passed in, the 128-bit factor comes
//   from a lookup table. We know that the 64-bit factor only has 55
//   significant bits (i.e., the 9 topmost bits are zeros). The 128-bit
//   factor only has 124 significant bits (i.e., the 4 topmost bits are
//   zeros).
// Shift:
//   In principle, the multiplication result requires 55 + 124 = 179 bits to
//   represent. However, we then shift this value to the right by __j, which is
//   at least __j >= 115, so the result is guaranteed to fit into 179 - 115 = 64
//   bits. This means that we only need the topmost 64 significant bits of
//   the 64x128-bit multiplication.
//
// There are several ways to do this:
// 1. Best case: the compiler exposes a 128-bit type.
//    We perform two 64x64-bit multiplications, add the higher 64 bits of the
//    lower result to the higher result, and shift by __j - 64 bits.
//
//    We explicitly cast from 64-bit to 128-bit, so the compiler can tell
//    that these are only 64-bit inputs, and can map these to the best
//    possible sequence of assembly instructions.
//    x64 machines happen to have matching assembly instructions for
//    64x64-bit multiplications and 128-bit shifts.
//
// 2. Second best case: the compiler exposes intrinsics for the x64 assembly
//    instructions mentioned in 1.
//
// 3. We only have 64x64 bit instructions that return the lower 64 bits of
//    the result, i.e., we have to use plain C.
//    Our inputs are less than the full width, so we have three options:
//    a. Ignore this fact and just implement the intrinsics manually.
//    b. Split both into 31-bit pieces, which guarantees no internal overflow,
//       but requires extra work upfront (unless we change the lookup table).
//    c. Split only the first factor into 31-bit pieces, which also guarantees
//       no internal overflow, but requires extra work since the intermediate
//       results are not perfectly aligned.
#ifdef _LIBCPP_INTRINSIC128

[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __mulShift(const uint64_t __m, const uint64_t* const __mul, const int32_t __j) { … }

[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __mulShiftAll(const uint64_t __m, const uint64_t* const __mul, const int32_t __j,
  uint64_t* const __vp, uint64_t* const __vm, const uint32_t __mmShift) { … }

#else // ^^^ intrinsics available ^^^ / vvv intrinsics unavailable vvv

[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline _LIBCPP_ALWAYS_INLINE uint64_t __mulShiftAll(uint64_t __m, const uint64_t* const __mul, const int32_t __j,
  uint64_t* const __vp, uint64_t* const __vm, const uint32_t __mmShift) { // TRANSITION, VSO-634761
  __m <<= 1;
  // __m is maximum 55 bits
  uint64_t __tmp;
  const uint64_t __lo = __ryu_umul128(__m, __mul[0], &__tmp);
  uint64_t __hi;
  const uint64_t __mid = __tmp + __ryu_umul128(__m, __mul[1], &__hi);
  __hi += __mid < __tmp; // overflow into __hi

  const uint64_t __lo2 = __lo + __mul[0];
  const uint64_t __mid2 = __mid + __mul[1] + (__lo2 < __lo);
  const uint64_t __hi2 = __hi + (__mid2 < __mid);
  *__vp = __ryu_shiftright128(__mid2, __hi2, static_cast<uint32_t>(__j - 64 - 1));

  if (__mmShift == 1) {
    const uint64_t __lo3 = __lo - __mul[0];
    const uint64_t __mid3 = __mid - __mul[1] - (__lo3 > __lo);
    const uint64_t __hi3 = __hi - (__mid3 > __mid);
    *__vm = __ryu_shiftright128(__mid3, __hi3, static_cast<uint32_t>(__j - 64 - 1));
  } else {
    const uint64_t __lo3 = __lo + __lo;
    const uint64_t __mid3 = __mid + __mid + (__lo3 < __lo);
    const uint64_t __hi3 = __hi + __hi + (__mid3 < __mid);
    const uint64_t __lo4 = __lo3 - __mul[0];
    const uint64_t __mid4 = __mid3 - __mul[1] - (__lo4 > __lo3);
    const uint64_t __hi4 = __hi3 - (__mid4 > __mid3);
    *__vm = __ryu_shiftright128(__mid4, __hi4, static_cast<uint32_t>(__j - 64));
  }

  return __ryu_shiftright128(__mid, __hi, static_cast<uint32_t>(__j - 64 - 1));
}

#endif // ^^^ intrinsics unavailable ^^^

[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __decimalLength17(const uint64_t __v) { … }

// A floating decimal representing m * 10^e.
struct __floating_decimal_64 { … };

[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline __floating_decimal_64 __d2d(const uint64_t __ieeeMantissa, const uint32_t __ieeeExponent) { … }

[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline to_chars_result __to_chars(char* const _First, char* const _Last, const __floating_decimal_64 __v,
  chars_format _Fmt, const double __f) { … }

[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __d2d_small_int(const uint64_t __ieeeMantissa, const uint32_t __ieeeExponent,
  __floating_decimal_64* const __v) { … }

[[nodiscard]] to_chars_result __d2s_buffered_n(char* const _First, char* const _Last, const double __f,
  const chars_format _Fmt) { … }

_LIBCPP_END_NAMESPACE_STD

// clang-format on
chromium/third_party/libc++/src/src/ryu/d2s.cpp