// Copyright 2008 Google Inc. All Rights Reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Internals shared between the Snappy implementation and its unittest. #ifndef THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_ #define THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_ #include <utility> #include "snappy-stubs-internal.h" #if SNAPPY_HAVE_SSSE3 // Please do not replace with <x86intrin.h> or with headers that assume more // advanced SSE versions without checking with all the OWNERS. #include <emmintrin.h> #include <tmmintrin.h> #endif #if SNAPPY_HAVE_NEON #include <arm_neon.h> #endif #if SNAPPY_HAVE_SSSE3 || SNAPPY_HAVE_NEON #define SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE … #else #define SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE … #endif namespace snappy { namespace internal { #if SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE #if SNAPPY_HAVE_SSSE3 using V128 = __m128i; #elif SNAPPY_HAVE_NEON using V128 = uint8x16_t; #endif // Load 128 bits of integer data. `src` must be 16-byte aligned. inline V128 V128_Load(const V128* src); // Load 128 bits of integer data. `src` does not need to be aligned. inline V128 V128_LoadU(const V128* src); // Store 128 bits of integer data. `dst` does not need to be aligned. inline void V128_StoreU(V128* dst, V128 val); // Shuffle packed 8-bit integers using a shuffle mask. // Each packed integer in the shuffle mask must be in [0,16). inline V128 V128_Shuffle(V128 input, V128 shuffle_mask); // Constructs V128 with 16 chars |c|. inline V128 V128_DupChar(char c); #if SNAPPY_HAVE_SSSE3 inline V128 V128_Load(const V128* src) { return _mm_load_si128(src); } inline V128 V128_LoadU(const V128* src) { return _mm_loadu_si128(src); } inline void V128_StoreU(V128* dst, V128 val) { _mm_storeu_si128(dst, val); } inline V128 V128_Shuffle(V128 input, V128 shuffle_mask) { return _mm_shuffle_epi8(input, shuffle_mask); } inline V128 V128_DupChar(char c) { return _mm_set1_epi8(c); } #elif SNAPPY_HAVE_NEON inline V128 V128_Load(const V128* src) { return vld1q_u8(reinterpret_cast<const uint8_t*>(src)); } inline V128 V128_LoadU(const V128* src) { return vld1q_u8(reinterpret_cast<const uint8_t*>(src)); } inline void V128_StoreU(V128* dst, V128 val) { vst1q_u8(reinterpret_cast<uint8_t*>(dst), val); } inline V128 V128_Shuffle(V128 input, V128 shuffle_mask) { assert(vminvq_u8(shuffle_mask) >= 0 && vmaxvq_u8(shuffle_mask) <= 15); return vqtbl1q_u8(input, shuffle_mask); } inline V128 V128_DupChar(char c) { return vdupq_n_u8(c); } #endif #endif // SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE // Working memory performs a single allocation to hold all scratch space // required for compression. class WorkingMemory { … }; // Flat array compression that does not emit the "uncompressed length" // prefix. Compresses "input" string to the "*op" buffer. // // REQUIRES: "input_length <= kBlockSize" // REQUIRES: "op" points to an array of memory that is at least // "MaxCompressedLength(input_length)" in size. // REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. // REQUIRES: "table_size" is a power of two // // Returns an "end" pointer into "op" buffer. // "end - op" is the compressed size of "input". char* CompressFragment(const char* input, size_t input_length, char* op, uint16_t* table, const int table_size); // Find the largest n such that // // s1[0,n-1] == s2[0,n-1] // and n <= (s2_limit - s2). // // Return make_pair(n, n < 8). // Does not read *s2_limit or beyond. // Does not read *(s1 + (s2_limit - s2)) or beyond. // Requires that s2_limit >= s2. // // In addition populate *data with the next 5 bytes from the end of the match. // This is only done if 8 bytes are available (s2_limit - s2 >= 8). The point is // that on some arch's this can be done faster in this routine than subsequent // loading from s2 + n. // // Separate implementation for 64-bit, little-endian cpus. #if !SNAPPY_IS_BIG_ENDIAN && \ (defined(__x86_64__) || defined(_M_X64) || defined(ARCH_PPC) || \ defined(ARCH_ARM)) static inline std::pair<size_t, bool> FindMatchLength(const char* s1, const char* s2, const char* s2_limit, uint64_t* data) { … } #else static inline std::pair<size_t, bool> FindMatchLength(const char* s1, const char* s2, const char* s2_limit, uint64_t* data) { // Implementation based on the x86-64 version, above. assert(s2_limit >= s2); int matched = 0; while (s2 <= s2_limit - 4 && UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) { s2 += 4; matched += 4; } if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 4) { uint32_t x = UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched); int matching_bits = Bits::FindLSBSetNonZero(x); matched += matching_bits >> 3; s2 += matching_bits >> 3; } else { while ((s2 < s2_limit) && (s1[matched] == *s2)) { ++s2; ++matched; } } if (s2 <= s2_limit - 8) *data = LittleEndian::Load64(s2); return std::pair<size_t, bool>(matched, matched < 8); } #endif // Lookup tables for decompression code. Give --snappy_dump_decompression_table // to the unit test to recompute char_table. enum { … }; static const int kMaximumTagLength = …; // COPY_4_BYTE_OFFSET plus the actual offset. // Data stored per entry in lookup table: // Range Bits-used Description // ------------------------------------ // 1..64 0..7 Literal/copy length encoded in opcode byte // 0..7 8..10 Copy offset encoded in opcode byte / 256 // 0..4 11..13 Extra bytes after opcode // // We use eight bits for the length even though 7 would have sufficed // because of efficiency reasons: // (1) Extracting a byte is faster than a bit-field // (2) It properly aligns copy offset so we do not need a <<8 static constexpr uint16_t char_table[256] = …; } // end namespace internal } // end namespace snappy #endif // THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_