// Copyright 2021 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // FFT-based multiplication, due to Schönhage and Strassen. // This implementation mostly follows the description given in: // Christoph Lüders: Fast Multiplication of Large Integers, // http://arxiv.org/abs/1503.04955 #include "src/bigint/bigint-internal.h" #include "src/bigint/digit-arithmetic.h" #include "src/bigint/util.h" namespace v8 { namespace bigint { namespace { //////////////////////////////////////////////////////////////////////////////// // Part 1: Functions for "mod F_n" arithmetic. // F_n is of the shape 2^K + 1, and for convenience we use K to count the // number of digits rather than the number of bits, so F_n (or K) are implicit // and deduced from the length {len} of the digits array. // Helper function for {ModFn} below. void ModFn_Helper(digit_t* x, int len, signed_digit_t high) { … } // {x} := {x} mod F_n, assuming that {x} is "slightly" larger than F_n (e.g. // after addition of two numbers that were mod-F_n-normalized before). void ModFn(digit_t* x, int len) { … } // {dest} := {src} mod F_n, assuming that {src} is about twice as long as F_n // (e.g. after multiplication of two numbers that were mod-F_n-normalized // before). // {len} is length of {dest}; {src} is twice as long. void ModFnDoubleWidth(digit_t* dest, const digit_t* src, int len) { … } // Sets {sum} := {a} + {b} and {diff} := {a} - {b}, which is more efficient // than computing sum and difference separately. Applies "mod F_n" normalization // to both results. void SumDiff(digit_t* sum, digit_t* diff, const digit_t* a, const digit_t* b, int len) { … } // {result} := ({input} << shift) mod F_n, where shift >= K. void ShiftModFn_Large(digit_t* result, const digit_t* input, int digit_shift, int bits_shift, int K) { … } // Sets {result} := {input} * 2^{power_of_two} mod 2^{K} + 1. // This function is highly relevant for overall performance. void ShiftModFn(digit_t* result, const digit_t* input, int power_of_two, int K, int zero_above = 0x7FFFFFFF) { … } //////////////////////////////////////////////////////////////////////////////// // Part 2: FFT-based multiplication is very sensitive to appropriate choice // of parameters. The following functions choose the parameters that the // subsequent actual computation will use. This is partially based on formal // constraints and partially on experimentally-determined heuristics. struct Parameters { … }; // Computes parameters for the main calculation, given a bit length {N} and // an {m}. See the paper for details. void ComputeParameters(int N, int m, Parameters* params) { … } // Computes parameters for recursive invocations ("inner layer"). void ComputeParameters_Inner(int N, Parameters* params) { … } int PredictInnerK(int N) { … } // Applies heuristics to decide whether {m} should be decremented, by looking // at what would happen to {K} and {s} if {m} was decremented. bool ShouldDecrementM(const Parameters& current, const Parameters& next, const Parameters& after_next) { … } // Decides what parameters to use for a given input bit length {N}. // Returns the chosen m. int GetParameters(int N, Parameters* params) { … } //////////////////////////////////////////////////////////////////////////////// // Part 3: Fast Fourier Transformation. class FFTContainer { … }; inline void CopyAndZeroExtend(digit_t* dst, const digit_t* src, int digits_to_copy, size_t total_bytes) { … } // Reads {X} into the FFTContainer's internal storage, dividing it into chunks // while doing so; then performs the forward FFT. void FFTContainer::Start_Default(Digits X, int chunk_size, int theta, int omega) { … } // This version of Start is optimized for the case where ~half of the // container will be filled with padding zeros. void FFTContainer::Start(Digits X, int chunk_size, int theta, int omega) { … } // Forward transformation. // We use the "DIF" aka "decimation in frequency" transform, because it // leaves the result in "bit reversed" order, which is precisely what we // need as input for the "DIT" aka "decimation in time" backwards transform. void FFTContainer::FFT_ReturnShuffledThreadsafe(int start, int len, int omega, digit_t* temp) { … } // Recursive step of the above, factored out for additional callers. void FFTContainer::FFT_Recurse(int start, int half, int omega, digit_t* temp) { … } // Backward transformation. // We use the "DIT" aka "decimation in time" transform here, because it // turns bit-reversed input into normally sorted output. void FFTContainer::BackwardFFT(int start, int len, int omega) { … } void FFTContainer::BackwardFFT_Threadsafe(int start, int len, int omega, digit_t* temp) { … } // Recombines the result's parts into {Z}, after backwards FFT. void FFTContainer::NormalizeAndRecombine(int omega, int m, RWDigits Z, int chunk_size) { … } // Helper function for {CounterWeightAndRecombine} below. bool ShouldBeNegative(const digit_t* x, int xlen, digit_t threshold, int s) { … } // Same as {NormalizeAndRecombine} above, but for the needs of the recursive // invocation ("inner layer") of FFT multiplication, where an additional // counter-weighting step is required. void FFTContainer::CounterWeightAndRecombine(int theta, int m, RWDigits Z, int s) { … } // Main FFT function for recursive invocations ("inner layer"). void MultiplyFFT_Inner(RWDigits Z, Digits X, Digits Y, const Parameters& params, ProcessorImpl* processor) { … } // Actual implementation of pointwise multiplications. void FFTContainer::DoPointwiseMultiplication(const FFTContainer& other, int start, int end, digit_t* temp) { … } // Convenient entry point for pointwise multiplications. void FFTContainer::PointwiseMultiply(const FFTContainer& other) { … } } // namespace //////////////////////////////////////////////////////////////////////////////// // Part 4: Tying everything together into a multiplication algorithm. // TODO(jkummerow): Consider doing a "Mersenne transform" and CRT reconstruction // of the final result. Might yield a few percent of perf improvement. // TODO(jkummerow): Consider implementing the "sqrt(2) trick". // Gaudry/Kruppa/Zimmerman report that it saved them around 10%. void ProcessorImpl::MultiplyFFT(RWDigits Z, Digits X, Digits Y) { … } } // namespace bigint } // namespace v8