llvm/llvm/include/llvm/Support/JSON.h

//===--- JSON.h - JSON values, parsing and serialization -------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//
///
/// \file
/// This file supports working with JSON data.
///
/// It comprises:
///
/// - classes which hold dynamically-typed parsed JSON structures
///   These are value types that can be composed, inspected, and modified.
///   See json::Value, and the related types json::Object and json::Array.
///
/// - functions to parse JSON text into Values, and to serialize Values to text.
///   See parse(), operator<<, and format_provider.
///
/// - a convention and helpers for mapping between json::Value and user-defined
///   types. See fromJSON(), ObjectMapper, and the class comment on Value.
///
/// - an output API json::OStream which can emit JSON without materializing
///   all structures as json::Value.
///
/// Typically, JSON data would be read from an external source, parsed into
/// a Value, and then converted into some native data structure before doing
/// real work on it. (And vice versa when writing).
///
/// Other serialization mechanisms you may consider:
///
/// - YAML is also text-based, and more human-readable than JSON. It's a more
///   complex format and data model, and YAML parsers aren't ubiquitous.
///   YAMLParser.h is a streaming parser suitable for parsing large documents
///   (including JSON, as YAML is a superset). It can be awkward to use
///   directly. YAML I/O (YAMLTraits.h) provides data mapping that is more
///   declarative than the toJSON/fromJSON conventions here.
///
/// - LLVM bitstream is a space- and CPU- efficient binary format. Typically it
///   encodes LLVM IR ("bitcode"), but it can be a container for other data.
///   Low-level reader/writer libraries are in Bitstream/Bitstream*.h
///
//===---------------------------------------------------------------------===//

#ifndef LLVM_SUPPORT_JSON_H
#define LLVM_SUPPORT_JSON_H

#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"
#include <cmath>
#include <map>

namespace llvm {
namespace json {

// === String encodings ===
//
// JSON strings are character sequences (not byte sequences like std::string).
// We need to know the encoding, and for simplicity only support UTF-8.
//
//   - When parsing, invalid UTF-8 is a syntax error like any other
//
//   - When creating Values from strings, callers must ensure they are UTF-8.
//        with asserts on, invalid UTF-8 will crash the program
//        with asserts off, we'll substitute the replacement character (U+FFFD)
//     Callers can use json::isUTF8() and json::fixUTF8() for validation.
//
//   - When retrieving strings from Values (e.g. asString()), the result will
//     always be valid UTF-8.

is_uint_64_bit_v;

/// Returns true if \p S is valid UTF-8, which is required for use as JSON.
/// If it returns false, \p Offset is set to a byte offset near the first error.
bool isUTF8(llvm::StringRef S, size_t *ErrOffset = nullptr);
/// Replaces invalid UTF-8 sequences in \p S with the replacement character
/// (U+FFFD). The returned string is valid UTF-8.
/// This is much slower than isUTF8, so test that first.
std::string fixUTF8(llvm::StringRef S);

class Array;
class ObjectKey;
class Value;
template <typename T> Value toJSON(const std::optional<T> &Opt);

/// An Object is a JSON object, which maps strings to heterogenous JSON values.
/// It simulates DenseMap<ObjectKey, Value>. ObjectKey is a maybe-owned string.
class Object {};
bool operator==(const Object &LHS, const Object &RHS);
inline bool operator!=(const Object &LHS, const Object &RHS) {}

/// An Array is a JSON array, which contains heterogeneous JSON values.
/// It simulates std::vector<Value>.
class Array {};
inline bool operator!=(const Array &L, const Array &R) {}

/// A Value is an JSON value of unknown type.
/// They can be copied, but should generally be moved.
///
/// === Composing values ===
///
/// You can implicitly construct Values from:
///   - strings: std::string, SmallString, formatv, StringRef, char*
///              (char*, and StringRef are references, not copies!)
///   - numbers
///   - booleans
///   - null: nullptr
///   - arrays: {"foo", 42.0, false}
///   - serializable things: types with toJSON(const T&)->Value, found by ADL
///
/// They can also be constructed from object/array helpers:
///   - json::Object is a type like map<ObjectKey, Value>
///   - json::Array is a type like vector<Value>
/// These can be list-initialized, or used to build up collections in a loop.
/// json::ary(Collection) converts all items in a collection to Values.
///
/// === Inspecting values ===
///
/// Each Value is one of the JSON kinds:
///   null    (nullptr_t)
///   boolean (bool)
///   number  (double, int64 or uint64)
///   string  (StringRef)
///   array   (json::Array)
///   object  (json::Object)
///
/// The kind can be queried directly, or implicitly via the typed accessors:
///   if (std::optional<StringRef> S = E.getAsString()
///     assert(E.kind() == Value::String);
///
/// Array and Object also have typed indexing accessors for easy traversal:
///   Expected<Value> E = parse(R"( {"options": {"font": "sans-serif"}} )");
///   if (Object* O = E->getAsObject())
///     if (Object* Opts = O->getObject("options"))
///       if (std::optional<StringRef> Font = Opts->getString("font"))
///         assert(Opts->at("font").kind() == Value::String);
///
/// === Converting JSON values to C++ types ===
///
/// The convention is to have a deserializer function findable via ADL:
///     fromJSON(const json::Value&, T&, Path) -> bool
///
/// The return value indicates overall success, and Path is used for precise
/// error reporting. (The Path::Root passed in at the top level fromJSON call
/// captures any nested error and can render it in context).
/// If conversion fails, fromJSON calls Path::report() and immediately returns.
/// This ensures that the first fatal error survives.
///
/// Deserializers are provided for:
///   - bool
///   - int and int64_t
///   - double
///   - std::string
///   - vector<T>, where T is deserializable
///   - map<string, T>, where T is deserializable
///   - std::optional<T>, where T is deserializable
/// ObjectMapper can help writing fromJSON() functions for object types.
///
/// For conversion in the other direction, the serializer function is:
///    toJSON(const T&) -> json::Value
/// If this exists, then it also allows constructing Value from T, and can
/// be used to serialize vector<T>, map<string, T>, and std::optional<T>.
///
/// === Serialization ===
///
/// Values can be serialized to JSON:
///   1) raw_ostream << Value                    // Basic formatting.
///   2) raw_ostream << formatv("{0}", Value)    // Basic formatting.
///   3) raw_ostream << formatv("{0:2}", Value)  // Pretty-print with indent 2.
///
/// And parsed:
///   Expected<Value> E = json::parse("[1, 2, null]");
///   assert(E && E->kind() == Value::Array);
class Value {};

bool operator==(const Value &, const Value &);
inline bool operator!=(const Value &L, const Value &R) {}

// Array Methods
inline Value &Array::operator[](size_t I) {}
inline const Value &Array::operator[](size_t I) const {}
inline Value &Array::front() {}
inline const Value &Array::front() const {}
inline Value &Array::back() {}
inline const Value &Array::back() const {}
inline Value *Array::data() {}
inline const Value *Array::data() const {}

inline typename Array::iterator Array::begin() {}
inline typename Array::const_iterator Array::begin() const {}
inline typename Array::iterator Array::end() {}
inline typename Array::const_iterator Array::end() const {}

inline bool Array::empty() const {}
inline size_t Array::size() const {}
inline void Array::reserve(size_t S) {}

inline void Array::clear() {}
inline void Array::push_back(const Value &E) {}
inline void Array::push_back(Value &&E) {}
template <typename... Args> inline void Array::emplace_back(Args &&...A) {}
inline void Array::pop_back() {}
inline typename Array::iterator Array::insert(const_iterator P, const Value &E) {}
inline typename Array::iterator Array::insert(const_iterator P, Value &&E) {}
template <typename It>
inline typename Array::iterator Array::insert(const_iterator P, It A, It Z) {}
template <typename... Args>
inline typename Array::iterator Array::emplace(const_iterator P, Args &&...A) {}
inline bool operator==(const Array &L, const Array &R) {}

/// ObjectKey is a used to capture keys in Object. Like Value but:
///   - only strings are allowed
///   - it's optimized for the string literal case (Owned == nullptr)
/// Like Value, strings must be UTF-8. See isUTF8 documentation for details.
class ObjectKey {};

inline bool operator==(const ObjectKey &L, const ObjectKey &R) {}
inline bool operator!=(const ObjectKey &L, const ObjectKey &R) {}
inline bool operator<(const ObjectKey &L, const ObjectKey &R) {}

struct Object::KV {};

inline Object::Object(std::initializer_list<KV> Properties) {}
inline std::pair<Object::iterator, bool> Object::insert(KV E) {}
inline bool Object::erase(StringRef K) {}

std::vector<const Object::value_type *> sortedElements(const Object &O);

/// A "cursor" marking a position within a Value.
/// The Value is a tree, and this is the path from the root to the current node.
/// This is used to associate errors with particular subobjects.
class Path {};

/// The root is the trivial Path to the root value.
/// It also stores the latest reported error and the path where it occurred.
class Path::Root {};

// Standard deserializers are provided for primitive types.
// See comments on Value.
inline bool fromJSON(const Value &E, std::string &Out, Path P) {}
inline bool fromJSON(const Value &E, int &Out, Path P) {}
inline bool fromJSON(const Value &E, int64_t &Out, Path P) {}
inline bool fromJSON(const Value &E, double &Out, Path P) {}
inline bool fromJSON(const Value &E, bool &Out, Path P) {}
inline bool fromJSON(const Value &E, uint64_t &Out, Path P) {}
inline bool fromJSON(const Value &E, std::nullptr_t &Out, Path P) {}
template <typename T>
bool fromJSON(const Value &E, std::optional<T> &Out, Path P) {}
template <typename T>
bool fromJSON(const Value &E, std::vector<T> &Out, Path P) {}
template <typename T>
bool fromJSON(const Value &E, std::map<std::string, T> &Out, Path P) {}

// Allow serialization of std::optional<T> for supported T.
template <typename T> Value toJSON(const std::optional<T> &Opt) {}

/// Helper for mapping JSON objects onto protocol structs.
///
/// Example:
/// \code
///   bool fromJSON(const Value &E, MyStruct &R, Path P) {
///     ObjectMapper O(E, P);
///     // When returning false, error details were already reported.
///     return O && O.map("mandatory_field", R.MandatoryField) &&
///         O.mapOptional("optional_field", R.OptionalField);
///   }
/// \endcode
class ObjectMapper {};

/// Parses the provided JSON source, or returns a ParseError.
/// The returned Value is self-contained and owns its strings (they do not refer
/// to the original source).
llvm::Expected<Value> parse(llvm::StringRef JSON);

class ParseError : public llvm::ErrorInfo<ParseError> {};

/// Version of parse() that converts the parsed value to the type T.
/// RootName describes the root object and is used in error messages.
template <typename T>
Expected<T> parse(const llvm::StringRef &JSON, const char *RootName = "") {}

/// json::OStream allows writing well-formed JSON without materializing
/// all structures as json::Value ahead of time.
/// It's faster, lower-level, and less safe than OS << json::Value.
/// It also allows emitting more constructs, such as comments.
///
/// Only one "top-level" object can be written to a stream.
/// Simplest usage involves passing lambdas (Blocks) to fill in containers:
///
///   json::OStream J(OS);
///   J.array([&]{
///     for (const Event &E : Events)
///       J.object([&] {
///         J.attribute("timestamp", int64_t(E.Time));
///         J.attributeArray("participants", [&] {
///           for (const Participant &P : E.Participants)
///             J.value(P.toString());
///         });
///       });
///   });
///
/// This would produce JSON like:
///
///   [
///     {
///       "timestamp": 19287398741,
///       "participants": [
///         "King Kong",
///         "Miley Cyrus",
///         "Cleopatra"
///       ]
///     },
///     ...
///   ]
///
/// The lower level begin/end methods (arrayBegin()) are more flexible but
/// care must be taken to pair them correctly:
///
///   json::OStream J(OS);
//    J.arrayBegin();
///   for (const Event &E : Events) {
///     J.objectBegin();
///     J.attribute("timestamp", int64_t(E.Time));
///     J.attributeBegin("participants");
///     for (const Participant &P : E.Participants)
///       J.value(P.toString());
///     J.attributeEnd();
///     J.objectEnd();
///   }
///   J.arrayEnd();
///
/// If the call sequence isn't valid JSON, asserts will fire in debug mode.
/// This can be mismatched begin()/end() pairs, trying to emit attributes inside
/// an array, and so on.
/// With asserts disabled, this is undefined behavior.
class OStream {};

/// Serializes this Value to JSON, writing it to the provided stream.
/// The formatting is compact (no extra whitespace) and deterministic.
/// For pretty-printing, use the formatv() format_provider below.
inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Value &V) {}
} // namespace json

/// Allow printing json::Value with formatv().
/// The default style is basic/compact formatting, like operator<<.
/// A format string like formatv("{0:2}", Value) pretty-prints with indent 2.
template <> struct format_provider<llvm::json::Value> {};
} // namespace llvm

#endif