//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // Copyright (c) Microsoft Corporation. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // Copyright 2018 Ulf Adams // Copyright (c) Microsoft Corporation. All rights reserved. // Boost Software License - Version 1.0 - August 17th, 2003 // Permission is hereby granted, free of charge, to any person or organization // obtaining a copy of the software and accompanying documentation covered by // this license (the "Software") to use, reproduce, display, distribute, // execute, and transmit the Software, and to prepare derivative works of the // Software, and to permit third-parties to whom the Software is furnished to // do so, all subject to the following: // The copyright notices in the Software and this entire statement, including // the above license grant, this restriction and the following disclaimer, // must be included in all copies of the Software, in whole or in part, and // all derivative works of the Software, unless such copies or derivative // works are solely in the form of machine-executable object code generated by // a source language processor. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT // SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE // FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER // DEALINGS IN THE SOFTWARE. // Avoid formatting to keep the changes with the original code minimal. // clang-format off #include <__assert> #include <__config> #include <charconv> #include "include/ryu/common.h" #include "include/ryu/d2fixed.h" #include "include/ryu/d2s.h" #include "include/ryu/d2s_full_table.h" #include "include/ryu/d2s_intrinsics.h" #include "include/ryu/digit_table.h" #include "include/ryu/ryu.h" _LIBCPP_BEGIN_NAMESPACE_STD // We need a 64x128-bit multiplication and a subsequent 128-bit shift. // Multiplication: // The 64-bit factor is variable and passed in, the 128-bit factor comes // from a lookup table. We know that the 64-bit factor only has 55 // significant bits (i.e., the 9 topmost bits are zeros). The 128-bit // factor only has 124 significant bits (i.e., the 4 topmost bits are // zeros). // Shift: // In principle, the multiplication result requires 55 + 124 = 179 bits to // represent. However, we then shift this value to the right by __j, which is // at least __j >= 115, so the result is guaranteed to fit into 179 - 115 = 64 // bits. This means that we only need the topmost 64 significant bits of // the 64x128-bit multiplication. // // There are several ways to do this: // 1. Best case: the compiler exposes a 128-bit type. // We perform two 64x64-bit multiplications, add the higher 64 bits of the // lower result to the higher result, and shift by __j - 64 bits. // // We explicitly cast from 64-bit to 128-bit, so the compiler can tell // that these are only 64-bit inputs, and can map these to the best // possible sequence of assembly instructions. // x64 machines happen to have matching assembly instructions for // 64x64-bit multiplications and 128-bit shifts. // // 2. Second best case: the compiler exposes intrinsics for the x64 assembly // instructions mentioned in 1. // // 3. We only have 64x64 bit instructions that return the lower 64 bits of // the result, i.e., we have to use plain C. // Our inputs are less than the full width, so we have three options: // a. Ignore this fact and just implement the intrinsics manually. // b. Split both into 31-bit pieces, which guarantees no internal overflow, // but requires extra work upfront (unless we change the lookup table). // c. Split only the first factor into 31-bit pieces, which also guarantees // no internal overflow, but requires extra work since the intermediate // results are not perfectly aligned. #ifdef _LIBCPP_INTRINSIC128 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __mulShift(const uint64_t __m, const uint64_t* const __mul, const int32_t __j) { … } [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __mulShiftAll(const uint64_t __m, const uint64_t* const __mul, const int32_t __j, uint64_t* const __vp, uint64_t* const __vm, const uint32_t __mmShift) { … } #else // ^^^ intrinsics available ^^^ / vvv intrinsics unavailable vvv [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline _LIBCPP_ALWAYS_INLINE uint64_t __mulShiftAll(uint64_t __m, const uint64_t* const __mul, const int32_t __j, uint64_t* const __vp, uint64_t* const __vm, const uint32_t __mmShift) { // TRANSITION, VSO-634761 __m <<= 1; // __m is maximum 55 bits uint64_t __tmp; const uint64_t __lo = __ryu_umul128(__m, __mul[0], &__tmp); uint64_t __hi; const uint64_t __mid = __tmp + __ryu_umul128(__m, __mul[1], &__hi); __hi += __mid < __tmp; // overflow into __hi const uint64_t __lo2 = __lo + __mul[0]; const uint64_t __mid2 = __mid + __mul[1] + (__lo2 < __lo); const uint64_t __hi2 = __hi + (__mid2 < __mid); *__vp = __ryu_shiftright128(__mid2, __hi2, static_cast<uint32_t>(__j - 64 - 1)); if (__mmShift == 1) { const uint64_t __lo3 = __lo - __mul[0]; const uint64_t __mid3 = __mid - __mul[1] - (__lo3 > __lo); const uint64_t __hi3 = __hi - (__mid3 > __mid); *__vm = __ryu_shiftright128(__mid3, __hi3, static_cast<uint32_t>(__j - 64 - 1)); } else { const uint64_t __lo3 = __lo + __lo; const uint64_t __mid3 = __mid + __mid + (__lo3 < __lo); const uint64_t __hi3 = __hi + __hi + (__mid3 < __mid); const uint64_t __lo4 = __lo3 - __mul[0]; const uint64_t __mid4 = __mid3 - __mul[1] - (__lo4 > __lo3); const uint64_t __hi4 = __hi3 - (__mid4 > __mid3); *__vm = __ryu_shiftright128(__mid4, __hi4, static_cast<uint32_t>(__j - 64)); } return __ryu_shiftright128(__mid, __hi, static_cast<uint32_t>(__j - 64 - 1)); } #endif // ^^^ intrinsics unavailable ^^^ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __decimalLength17(const uint64_t __v) { … } // A floating decimal representing m * 10^e. struct __floating_decimal_64 { … }; [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline __floating_decimal_64 __d2d(const uint64_t __ieeeMantissa, const uint32_t __ieeeExponent) { … } [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline to_chars_result __to_chars(char* const _First, char* const _Last, const __floating_decimal_64 __v, chars_format _Fmt, const double __f) { … } [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __d2d_small_int(const uint64_t __ieeeMantissa, const uint32_t __ieeeExponent, __floating_decimal_64* const __v) { … } [[nodiscard]] to_chars_result __d2s_buffered_n(char* const _First, char* const _Last, const double __f, const chars_format _Fmt) { … } _LIBCPP_END_NAMESPACE_STD // clang-format on