llvm/clang-tools-extra/clangd/index/Serialization.cpp

//===-- Serialization.cpp - Binary serialization of index data ------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "Serialization.h"
#include "Headers.h"
#include "RIFF.h"
#include "index/MemIndex.h"
#include "index/SymbolLocation.h"
#include "index/SymbolOrigin.h"
#include "index/dex/Dex.h"
#include "support/Logger.h"
#include "support/Trace.h"
#include "clang/Tooling/CompilationDatabase.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdint>
#include <vector>

namespace clang {
namespace clangd {
namespace {

// IO PRIMITIVES
// We use little-endian 32 bit ints, sometimes with variable-length encoding.
//
// Variable-length int encoding (varint) uses the bottom 7 bits of each byte
// to encode the number, and the top bit to indicate whether more bytes follow.
// e.g. 9a 2f means [0x1a and keep reading, 0x2f and stop].
// This represents 0x1a | 0x2f<<7 = 6042.
// A 32-bit integer takes 1-5 bytes to encode; small numbers are more compact.

// Reads binary data from a StringRef, and keeps track of position.
class Reader {};

void write32(uint32_t I, llvm::raw_ostream &OS) {}

void writeVar(uint32_t I, llvm::raw_ostream &OS) {}

// STRING TABLE ENCODING
// Index data has many string fields, and many strings are identical.
// We store each string once, and refer to them by index.
//
// The string table's format is:
//   - UncompressedSize : uint32 (or 0 for no compression)
//   - CompressedData   : byte[CompressedSize]
//
// CompressedData is a zlib-compressed byte[UncompressedSize].
// It contains a sequence of null-terminated strings, e.g. "foo\0bar\0".
// These are sorted to improve compression.

// Maps each string to a canonical representation.
// Strings remain owned externally (e.g. by SymbolSlab).
class StringTableOut {};

struct StringTableIn {};

llvm::Expected<StringTableIn> readStringTable(llvm::StringRef Data) {}

// SYMBOL ENCODING
// Each field of clangd::Symbol is encoded in turn (see implementation).
//  - StringRef fields encode as varint (index into the string table)
//  - enums encode as the underlying type
//  - most numbers encode as varint

void writeLocation(const SymbolLocation &Loc, const StringTableOut &Strings,
                   llvm::raw_ostream &OS) {}

SymbolLocation readLocation(Reader &Data,
                            llvm::ArrayRef<llvm::StringRef> Strings) {}

IncludeGraphNode readIncludeGraphNode(Reader &Data,
                                      llvm::ArrayRef<llvm::StringRef> Strings) {}

void writeIncludeGraphNode(const IncludeGraphNode &IGN,
                           const StringTableOut &Strings,
                           llvm::raw_ostream &OS) {}

void writeSymbol(const Symbol &Sym, const StringTableOut &Strings,
                 llvm::raw_ostream &OS) {}

Symbol readSymbol(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings,
                  SymbolOrigin Origin) {}

// REFS ENCODING
// A refs section has data grouped by Symbol. Each symbol has:
//  - SymbolID: 8 bytes
//  - NumRefs: varint
//  - Ref[NumRefs]
// Fields of Ref are encoded in turn, see implementation.

void writeRefs(const SymbolID &ID, llvm::ArrayRef<Ref> Refs,
               const StringTableOut &Strings, llvm::raw_ostream &OS) {}

std::pair<SymbolID, std::vector<Ref>>
readRefs(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings) {}

// RELATIONS ENCODING
// A relations section is a flat list of relations. Each relation has:
//  - SymbolID (subject): 8 bytes
//  - relation kind (predicate): 1 byte
//  - SymbolID (object): 8 bytes
// In the future, we might prefer a packed representation if the need arises.

void writeRelation(const Relation &R, llvm::raw_ostream &OS) {}

Relation readRelation(Reader &Data) {}

struct InternedCompileCommand {};

void writeCompileCommand(const InternedCompileCommand &Cmd,
                         const StringTableOut &Strings,
                         llvm::raw_ostream &CmdOS) {}

InternedCompileCommand
readCompileCommand(Reader CmdReader, llvm::ArrayRef<llvm::StringRef> Strings) {}

// FILE ENCODING
// A file is a RIFF chunk with type 'CdIx'.
// It contains the sections:
//   - meta: version number
//   - srcs: information related to include graph
//   - stri: string table
//   - symb: symbols
//   - refs: references to symbols

// The current versioning scheme is simple - non-current versions are rejected.
// If you make a breaking change, bump this version number to invalidate stored
// data. Later we may want to support some backward compatibility.
constexpr static uint32_t Version =;

llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data,
                                     SymbolOrigin Origin) {}

template <class Callback>
void visitStrings(IncludeGraphNode &IGN, const Callback &CB) {}

void writeRIFF(const IndexFileOut &Data, llvm::raw_ostream &OS) {}

} // namespace

// Defined in YAMLSerialization.cpp.
void writeYAML(const IndexFileOut &, llvm::raw_ostream &);
llvm::Expected<IndexFileIn> readYAML(llvm::StringRef, SymbolOrigin Origin);

llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O) {}

llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef Data,
                                          SymbolOrigin Origin) {}

std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
                                       SymbolOrigin Origin, bool UseDex) {}

} // namespace clangd
} // namespace clang