Crc32CombineDetail.cpp | Explore in Territory

/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <array>
#include <utility>

#include <folly/Bits.h>
#include <folly/hash/detail/ChecksumDetail.h>

namespace folly {

// Standard galois-field multiply.  The only modification is that a,
// b, m, and p are all bit-reflected.
//
// https://en.wikipedia.org/wiki/Finite_field_arithmetic
static constexpr uint32_t gf_multiply_sw_1(
    size_t i, uint32_t p, uint32_t a, uint32_t b, uint32_t m) { … }
static constexpr uint32_t gf_multiply_sw(uint32_t a, uint32_t b, uint32_t m) { … }

static constexpr uint32_t gf_square_sw(uint32_t a, uint32_t m) { … }

namespace {

template <size_t i, uint32_t m>
struct gf_powers_memo { … };
gf_powers_memo<0, m>;

template <uint32_t m>
struct gf_powers_make { … };

} // namespace

#if FOLLY_SSE_PREREQ(4, 2)

// Reduction taken from
// https://www.nicst.de/crc.pdf
//
// This is an intrinsics-based implementation of listing 3.
static uint32_t gf_multiply_crc32c_hw(uint64_t crc1, uint64_t crc2, uint32_t) {
  const auto crc1_xmm = _mm_set_epi64x(0, crc1);
  const auto crc2_xmm = _mm_set_epi64x(0, crc2);
  const auto count = _mm_set_epi64x(0, 1);
  const auto res0 = _mm_clmulepi64_si128(crc2_xmm, crc1_xmm, 0x00);
  const auto res1 = _mm_sll_epi64(res0, count);

  // Use hardware crc32c to do reduction from 64 -> 32 bytes
  const auto res2 = _mm_cvtsi128_si64(res1);
  const auto res3 = _mm_crc32_u32(0, res2);
  const auto res4 = _mm_extract_epi32(res1, 1);
  return res3 ^ res4;
}

static uint32_t gf_multiply_crc32_hw(uint64_t crc1, uint64_t crc2, uint32_t) {
  const auto crc1_xmm = _mm_set_epi64x(0, crc1);
  const auto crc2_xmm = _mm_set_epi64x(0, crc2);
  const auto count = _mm_set_epi64x(0, 1);
  const auto res0 = _mm_clmulepi64_si128(crc2_xmm, crc1_xmm, 0x00);
  const auto res1 = _mm_sll_epi64(res0, count);

  // Do barrett reduction of 64 -> 32 bytes
  const auto mask32 = _mm_set_epi32(0, 0, 0, 0xFFFFFFFF);
  const auto barrett_reduction_constants =
      _mm_set_epi32(0x1, 0xDB710641, 0x1, 0xF7011641);
  const auto res2 = _mm_clmulepi64_si128(
      _mm_and_si128(res1, mask32), barrett_reduction_constants, 0x00);
  const auto res3 = _mm_clmulepi64_si128(
      _mm_and_si128(res2, mask32), barrett_reduction_constants, 0x10);
  return _mm_cvtsi128_si32(_mm_srli_si128(_mm_xor_si128(res3, res1), 4));
}

#else

static uint32_t gf_multiply_crc32c_hw(uint64_t, uint64_t, uint32_t) { … }
static uint32_t gf_multiply_crc32_hw(uint64_t, uint64_t, uint32_t) { … }

#endif

static constexpr uint32_t crc32c_m = …;
static constexpr uint32_t crc32_m = …;

/*
 * Pre-calculated powers tables for crc32c and crc32.
 */
static constexpr std::array<uint32_t, 62> const crc32c_powers = …;
static constexpr std::array<uint32_t, 62> const crc32_powers = …;

template <typename F>
static uint32_t crc32_append_zeroes(
    F mult,
    uint32_t crc,
    size_t len,
    uint32_t polynomial,
    std::array<uint32_t, 62> const& powers_array) { … }

namespace detail {

uint32_t crc32_combine_sw(uint32_t crc1, uint32_t crc2, size_t crc2len) { … }

uint32_t crc32_combine_hw(uint32_t crc1, uint32_t crc2, size_t crc2len) { … }

uint32_t crc32c_combine_sw(uint32_t crc1, uint32_t crc2, size_t crc2len) { … }

uint32_t crc32c_combine_hw(uint32_t crc1, uint32_t crc2, size_t crc2len) { … }

} // namespace detail

} // namespace folly
folly/folly/hash/detail/Crc32CombineDetail.cpp