folly/folly/compression/Compression.h

/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#pragma once

#include <cstdint>
#include <limits>
#include <memory>
#include <string>
#include <vector>

#include <folly/Optional.h>
#include <folly/Range.h>
#include <folly/io/IOBuf.h>

/**
 * Compression / decompression over IOBufs
 */

namespace folly {
namespace compression {

enum class CodecType {};

class Codec {};

class StreamCodec : public Codec {};

constexpr int COMPRESSION_LEVEL_FASTEST =;
constexpr int COMPRESSION_LEVEL_DEFAULT =;
constexpr int COMPRESSION_LEVEL_BEST =;

/**
 * Return a codec for the given type. Throws on error.  The level
 * is a non-negative codec-dependent integer indicating the level of
 * compression desired, or one of the following constants:
 *
 * COMPRESSION_LEVEL_FASTEST is fastest (uses least CPU / memory,
 *   worst compression)
 * COMPRESSION_LEVEL_DEFAULT is the default (likely a tradeoff between
 *   FASTEST and BEST)
 * COMPRESSION_LEVEL_BEST is the best compression (uses most CPU / memory,
 *   best compression)
 *
 * When decompressing, the compression level is ignored. All codecs will
 * decompress all data compressed with the a codec of the same type, regardless
 * of compression level.
 */
std::unique_ptr<Codec> getCodec(
    CodecType type, int level = COMPRESSION_LEVEL_DEFAULT);

/**
 * Return a codec for the given type. Throws on error.  The level
 * is a non-negative codec-dependent integer indicating the level of
 * compression desired, or one of the following constants:
 *
 * COMPRESSION_LEVEL_FASTEST is fastest (uses least CPU / memory,
 *   worst compression)
 * COMPRESSION_LEVEL_DEFAULT is the default (likely a tradeoff between
 *   FASTEST and BEST)
 * COMPRESSION_LEVEL_BEST is the best compression (uses most CPU / memory,
 *   best compression)
 *
 * When decompressing, the compression level is ignored. All codecs will
 * decompress all data compressed with the a codec of the same type, regardless
 * of compression level.
 */
std::unique_ptr<StreamCodec> getStreamCodec(
    CodecType type, int level = COMPRESSION_LEVEL_DEFAULT);

/**
 * Returns a codec that can uncompress any of the given codec types as well as
 * {LZ4_FRAME, ZSTD, ZLIB, GZIP, LZMA2, BZIP2}. Appends each default codec to
 * customCodecs in order, so long as a codec with the same type() isn't already
 * present in customCodecs or as the terminalCodec. When uncompress() is called,
 * each codec's canUncompress() is called in the order that they are given.
 * Appended default codecs are checked last.  uncompress() is called on the
 * first codec whose canUncompress() returns true.
 *
 * In addition, an optional `terminalCodec` can be provided. This codec's
 * uncompress() will be called either when no other codec canUncompress() the
 * data or the chosen codec throws an exception on the data. The terminalCodec
 * is intended for ambiguous headers, when canUncompress() is false for some
 * data it can actually uncompress. The terminalCodec does not need to override
 * validPrefixes() or canUncompress() and overriding these functions will have
 * no effect on the returned codec's validPrefixes() or canUncompress()
 * functions. The terminalCodec's needsUncompressedLength() and
 * maxUncompressedLength() will affect the returned codec's respective
 * functions. The terminalCodec must not be duplicated in customCodecs.
 *
 * An exception is thrown if no codec canUncompress() the data and either no
 * terminal codec was provided or a terminal codec was provided and it throws on
 * the data.
 * An exception is thrown if the chosen codec's uncompress() throws on the data
 * and either no terminal codec was provided or a terminal codec was provided
 * and it also throws on the data.
 * An exception is thrown if compress() is called on the returned codec.
 *
 * Requirements are checked in debug mode and are as follows:
 * Let headers be the concatenation of every codec's validPrefixes().
 *  1. Each codec must override validPrefixes() and canUncompress().
 *  2. No codec's validPrefixes() may be empty.
 *  3. No header in headers may be empty.
 *  4. headers must not contain any duplicate elements.
 *  5. No strict non-empty prefix of any header in headers may be in headers.
 *  6. The terminalCodec's type must not be the same as any other codec's type
 *     (with USER_DEFINED being the exception).
 */
std::unique_ptr<Codec> getAutoUncompressionCodec(
    std::vector<std::unique_ptr<Codec>> customCodecs = {};

/**
 * Check if a specified codec is supported.
 */
bool hasCodec(CodecType type);

/**
 * Check if a specified codec is supported and supports streaming.
 */
bool hasStreamCodec(CodecType type);

/**
 * Added here so users of folly can figure out whether the header
 * folly/compression/CompressionContextPoolSingletons.h is present, and
 * therefore whether it can be included.
 */
#define FOLLY_COMPRESSION_HAS_CONTEXT_POOL_SINGLETONS
} // namespace compression
} // namespace folly

namespace folly::io {
Codec;
CodecType;
COMPRESSION_LEVEL_BEST;
COMPRESSION_LEVEL_DEFAULT;
COMPRESSION_LEVEL_FASTEST;
getAutoUncompressionCodec;
getCodec;
getStreamCodec;
hasCodec;
hasStreamCodec;
StreamCodec;
} // namespace folly::io