tvgCompressor.cpp | Explore in Territory

/*
 * Copyright (c) 2020 - 2024 the ThorVG project. All rights reserved.

 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:

 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.

 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

/*
 * Lempel–Ziv–Welch (LZW) encoder/decoder by Guilherme R. Lampert([email protected])

 * This is the compression scheme used by the GIF image format and the Unix 'compress' tool.
 * Main differences from this implementation is that End Of Input (EOI) and Clear Codes (CC)
 * are not stored in the output and the max code length in bits is 12, vs 16 in compress.
 *
 * EOI is simply detected by the end of the data stream, while CC happens if the
 * dictionary gets filled. Data is written/read from bit streams, which handle
 * byte-alignment for us in a transparent way.

 * The decoder relies on the hardcoded data layout produced by the encoder, since
 * no additional reconstruction data is added to the output, so they must match.
 * The nice thing about LZW is that we can reconstruct the dictionary directly from
 * the stream of codes generated by the encoder, so this avoids storing additional
 * headers in the bit stream.

 * The output code length is variable. It starts with the minimum number of bits
 * required to store the base byte-sized dictionary and automatically increases
 * as the dictionary gets larger (it starts at 9-bits and grows to 10-bits when
 * code 512 is added, then 11-bits when 1024 is added, and so on). If the dictionary
 * is filled (4096 items for a 12-bits dictionary), the whole thing is cleared and
 * the process starts over. This is the main reason why the encoder and the decoder
 * must match perfectly, since the lengths of the codes will not be specified with
 * the data itself.

 * USEFUL LINKS:
 * https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
 * http://rosettacode.org/wiki/LZW_compression
 * http://www.cs.duke.edu/csed/curious/compression/lzw.html
 * http://www.cs.cf.ac.uk/Dave/Multimedia/node214.html
 * http://marknelson.us/1989/10/01/lzw-data-compression/
 */
#include "config.h"



#include <string>
#include <memory.h>
#include "tvgCompressor.h"

namespace tvg {


/************************************************************************/
/* LZW Implementation                                                   */
/************************************************************************/


//LZW Dictionary helper:
constexpr int Nil = …;
constexpr int MaxDictBits = …;
constexpr int StartBits = …;
constexpr int FirstCode = …; // 256
constexpr int MaxDictEntries = …;     // 4096


//Round up to the next power-of-two number, e.g. 37 => 64
static int nextPowerOfTwo(int num)
{ … }


struct BitStreamWriter
{ … };


struct BitStreamReader
{ … };


struct Dictionary
{ … };


static bool outputByte(int code, uint8_t*& output, int outputSizeBytes, int& bytesDecodedSoFar)
{ … }


static bool outputSequence(const Dictionary& dict, int code, uint8_t*& output, int outputSizeBytes, int& bytesDecodedSoFar, int& firstByte)
{ … }


uint8_t* lzwDecode(const uint8_t* compressed, uint32_t compressedSizeBytes, uint32_t compressedSizeBits, uint32_t uncompressedSizeBytes)
{ … }


uint8_t* lzwEncode(const uint8_t* uncompressed, uint32_t uncompressedSizeBytes, uint32_t* compressedSizeBytes, uint32_t* compressedSizeBits)
{ … }


/************************************************************************/
/* B64 Implementation                                                   */
/************************************************************************/


size_t b64Decode(const char* encoded, const size_t len, char** decoded)
{ … }


/************************************************************************/
/* DJB2 Implementation                                                   */
/************************************************************************/

unsigned long djb2Encode(const char* str)
{ … }

}
godot/thirdparty/thorvg/src/common/tvgCompressor.cpp