/* * Copyright (c) 2020 - 2024 the ThorVG project. All rights reserved. * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* * Lempel–Ziv–Welch (LZW) encoder/decoder by Guilherme R. Lampert([email protected]) * This is the compression scheme used by the GIF image format and the Unix 'compress' tool. * Main differences from this implementation is that End Of Input (EOI) and Clear Codes (CC) * are not stored in the output and the max code length in bits is 12, vs 16 in compress. * * EOI is simply detected by the end of the data stream, while CC happens if the * dictionary gets filled. Data is written/read from bit streams, which handle * byte-alignment for us in a transparent way. * The decoder relies on the hardcoded data layout produced by the encoder, since * no additional reconstruction data is added to the output, so they must match. * The nice thing about LZW is that we can reconstruct the dictionary directly from * the stream of codes generated by the encoder, so this avoids storing additional * headers in the bit stream. * The output code length is variable. It starts with the minimum number of bits * required to store the base byte-sized dictionary and automatically increases * as the dictionary gets larger (it starts at 9-bits and grows to 10-bits when * code 512 is added, then 11-bits when 1024 is added, and so on). If the dictionary * is filled (4096 items for a 12-bits dictionary), the whole thing is cleared and * the process starts over. This is the main reason why the encoder and the decoder * must match perfectly, since the lengths of the codes will not be specified with * the data itself. * USEFUL LINKS: * https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch * http://rosettacode.org/wiki/LZW_compression * http://www.cs.duke.edu/csed/curious/compression/lzw.html * http://www.cs.cf.ac.uk/Dave/Multimedia/node214.html * http://marknelson.us/1989/10/01/lzw-data-compression/ */ #include "config.h" #include <string> #include <memory.h> #include "tvgCompressor.h" namespace tvg { /************************************************************************/ /* LZW Implementation */ /************************************************************************/ //LZW Dictionary helper: constexpr int Nil = …; constexpr int MaxDictBits = …; constexpr int StartBits = …; constexpr int FirstCode = …; // 256 constexpr int MaxDictEntries = …; // 4096 //Round up to the next power-of-two number, e.g. 37 => 64 static int nextPowerOfTwo(int num) { … } struct BitStreamWriter { … }; struct BitStreamReader { … }; struct Dictionary { … }; static bool outputByte(int code, uint8_t*& output, int outputSizeBytes, int& bytesDecodedSoFar) { … } static bool outputSequence(const Dictionary& dict, int code, uint8_t*& output, int outputSizeBytes, int& bytesDecodedSoFar, int& firstByte) { … } uint8_t* lzwDecode(const uint8_t* compressed, uint32_t compressedSizeBytes, uint32_t compressedSizeBits, uint32_t uncompressedSizeBytes) { … } uint8_t* lzwEncode(const uint8_t* uncompressed, uint32_t uncompressedSizeBytes, uint32_t* compressedSizeBytes, uint32_t* compressedSizeBits) { … } /************************************************************************/ /* B64 Implementation */ /************************************************************************/ size_t b64Decode(const char* encoded, const size_t len, char** decoded) { … } /************************************************************************/ /* DJB2 Implementation */ /************************************************************************/ unsigned long djb2Encode(const char* str) { … } }