/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * RecordIO: self-synchronizing stream of variable length records * * RecordIO gives you the ability to write a stream of variable length records * and read them later even in the face of data corruption -- randomly inserted * or deleted chunks of the file, or modified data. When reading, you may lose * corrupted records, but the stream will resynchronize automatically. */ #pragma once #define FOLLY_IO_RECORDIO_H_ #include <atomic> #include <memory> #include <mutex> #include <folly/File.h> #include <folly/Range.h> #include <folly/io/IOBuf.h> #include <folly/system/MemoryMapping.h> namespace folly { /** * Class to write a stream of RecordIO records to a file. * * RecordIOWriter is thread-safe */ class RecordIOWriter { … }; /** * Class to read from a RecordIO file. Will skip invalid records. */ class RecordIOReader { … }; namespace recordio_helpers { // We're exposing the guts of the RecordIO implementation for two reasons: // 1. It makes unit testing easier, and // 2. It allows you to build different RecordIO readers / writers that use // different storage systems underneath (not standard files) /** * Header size. */ constexpr size_t headerSize(); // defined in RecordIO-inl.h /** * Write a header in the buffer. We will prepend the header to the front * of the chain. Do not write the buffer if empty (we don't allow empty * records). Returns the total length, including header (0 if empty) * (same as buf->computeChainDataLength(), but likely faster) * * The fileId should be unique per stream and allows you to have RecordIO * headers stored inside the data (for example, have an entire RecordIO * file stored as a record inside another RecordIO file). The fileId may * not be 0. */ size_t prependHeader(std::unique_ptr<IOBuf>& buf, uint32_t fileId = 1); /** * Search for the first valid record that begins in searchRange (which must be * a subrange of wholeRange). Returns the record data (not the header) if * found, ByteRange() otherwise. * * The fileId may be 0, in which case we'll return the first valid record for * *any* fileId, or non-zero, in which case we'll only look for records with * the requested fileId. */ struct RecordInfo { … }; RecordInfo findRecord( ByteRange searchRange, ByteRange wholeRange, uint32_t fileId); /** * Search for the first valid record in range. */ RecordInfo findRecord(ByteRange range, uint32_t fileId); /** * Check if the Record Header is valid at the beginning of range. * Useful to check the validity of the header before building the entire record * in IOBuf. If the record is from storage device (e.g. flash) then, it * is better to make sure that the header is valid before reading the data * from the storage device. * Returns true if valid, false otherwise. */ bool validateRecordHeader(ByteRange range, uint32_t fileId); /** * Check if there Record Data is valid (to be used after validating the header * separately) * Returns the record data (not the header) if the record data is valid, * ByteRange() otherwise. */ RecordInfo validateRecordData(ByteRange range); /** * Check if there is a valid record at the beginning of range. This validates * both record header and data and Returns the * record data (not the header) if the record is valid, ByteRange() otherwise. */ RecordInfo validateRecord(ByteRange range, uint32_t fileId); } // namespace recordio_helpers } // namespace folly #include <folly/io/RecordIO-inl.h>