//===--- JSON.h - JSON values, parsing and serialization -------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===---------------------------------------------------------------------===// /// /// \file /// This file supports working with JSON data. /// /// It comprises: /// /// - classes which hold dynamically-typed parsed JSON structures /// These are value types that can be composed, inspected, and modified. /// See json::Value, and the related types json::Object and json::Array. /// /// - functions to parse JSON text into Values, and to serialize Values to text. /// See parse(), operator<<, and format_provider. /// /// - a convention and helpers for mapping between json::Value and user-defined /// types. See fromJSON(), ObjectMapper, and the class comment on Value. /// /// - an output API json::OStream which can emit JSON without materializing /// all structures as json::Value. /// /// Typically, JSON data would be read from an external source, parsed into /// a Value, and then converted into some native data structure before doing /// real work on it. (And vice versa when writing). /// /// Other serialization mechanisms you may consider: /// /// - YAML is also text-based, and more human-readable than JSON. It's a more /// complex format and data model, and YAML parsers aren't ubiquitous. /// YAMLParser.h is a streaming parser suitable for parsing large documents /// (including JSON, as YAML is a superset). It can be awkward to use /// directly. YAML I/O (YAMLTraits.h) provides data mapping that is more /// declarative than the toJSON/fromJSON conventions here. /// /// - LLVM bitstream is a space- and CPU- efficient binary format. Typically it /// encodes LLVM IR ("bitcode"), but it can be a container for other data. /// Low-level reader/writer libraries are in Bitstream/Bitstream*.h /// //===---------------------------------------------------------------------===// #ifndef LLVM_SUPPORT_JSON_H #define LLVM_SUPPORT_JSON_H #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Error.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" #include <cmath> #include <map> namespace llvm { namespace json { // === String encodings === // // JSON strings are character sequences (not byte sequences like std::string). // We need to know the encoding, and for simplicity only support UTF-8. // // - When parsing, invalid UTF-8 is a syntax error like any other // // - When creating Values from strings, callers must ensure they are UTF-8. // with asserts on, invalid UTF-8 will crash the program // with asserts off, we'll substitute the replacement character (U+FFFD) // Callers can use json::isUTF8() and json::fixUTF8() for validation. // // - When retrieving strings from Values (e.g. asString()), the result will // always be valid UTF-8. is_uint_64_bit_v; /// Returns true if \p S is valid UTF-8, which is required for use as JSON. /// If it returns false, \p Offset is set to a byte offset near the first error. bool isUTF8(llvm::StringRef S, size_t *ErrOffset = nullptr); /// Replaces invalid UTF-8 sequences in \p S with the replacement character /// (U+FFFD). The returned string is valid UTF-8. /// This is much slower than isUTF8, so test that first. std::string fixUTF8(llvm::StringRef S); class Array; class ObjectKey; class Value; template <typename T> Value toJSON(const std::optional<T> &Opt); /// An Object is a JSON object, which maps strings to heterogenous JSON values. /// It simulates DenseMap<ObjectKey, Value>. ObjectKey is a maybe-owned string. class Object { … }; bool operator==(const Object &LHS, const Object &RHS); inline bool operator!=(const Object &LHS, const Object &RHS) { … } /// An Array is a JSON array, which contains heterogeneous JSON values. /// It simulates std::vector<Value>. class Array { … }; inline bool operator!=(const Array &L, const Array &R) { … } /// A Value is an JSON value of unknown type. /// They can be copied, but should generally be moved. /// /// === Composing values === /// /// You can implicitly construct Values from: /// - strings: std::string, SmallString, formatv, StringRef, char* /// (char*, and StringRef are references, not copies!) /// - numbers /// - booleans /// - null: nullptr /// - arrays: {"foo", 42.0, false} /// - serializable things: types with toJSON(const T&)->Value, found by ADL /// /// They can also be constructed from object/array helpers: /// - json::Object is a type like map<ObjectKey, Value> /// - json::Array is a type like vector<Value> /// These can be list-initialized, or used to build up collections in a loop. /// json::ary(Collection) converts all items in a collection to Values. /// /// === Inspecting values === /// /// Each Value is one of the JSON kinds: /// null (nullptr_t) /// boolean (bool) /// number (double, int64 or uint64) /// string (StringRef) /// array (json::Array) /// object (json::Object) /// /// The kind can be queried directly, or implicitly via the typed accessors: /// if (std::optional<StringRef> S = E.getAsString() /// assert(E.kind() == Value::String); /// /// Array and Object also have typed indexing accessors for easy traversal: /// Expected<Value> E = parse(R"( {"options": {"font": "sans-serif"}} )"); /// if (Object* O = E->getAsObject()) /// if (Object* Opts = O->getObject("options")) /// if (std::optional<StringRef> Font = Opts->getString("font")) /// assert(Opts->at("font").kind() == Value::String); /// /// === Converting JSON values to C++ types === /// /// The convention is to have a deserializer function findable via ADL: /// fromJSON(const json::Value&, T&, Path) -> bool /// /// The return value indicates overall success, and Path is used for precise /// error reporting. (The Path::Root passed in at the top level fromJSON call /// captures any nested error and can render it in context). /// If conversion fails, fromJSON calls Path::report() and immediately returns. /// This ensures that the first fatal error survives. /// /// Deserializers are provided for: /// - bool /// - int and int64_t /// - double /// - std::string /// - vector<T>, where T is deserializable /// - map<string, T>, where T is deserializable /// - std::optional<T>, where T is deserializable /// ObjectMapper can help writing fromJSON() functions for object types. /// /// For conversion in the other direction, the serializer function is: /// toJSON(const T&) -> json::Value /// If this exists, then it also allows constructing Value from T, and can /// be used to serialize vector<T>, map<string, T>, and std::optional<T>. /// /// === Serialization === /// /// Values can be serialized to JSON: /// 1) raw_ostream << Value // Basic formatting. /// 2) raw_ostream << formatv("{0}", Value) // Basic formatting. /// 3) raw_ostream << formatv("{0:2}", Value) // Pretty-print with indent 2. /// /// And parsed: /// Expected<Value> E = json::parse("[1, 2, null]"); /// assert(E && E->kind() == Value::Array); class Value { … }; bool operator==(const Value &, const Value &); inline bool operator!=(const Value &L, const Value &R) { … } // Array Methods inline Value &Array::operator[](size_t I) { … } inline const Value &Array::operator[](size_t I) const { … } inline Value &Array::front() { … } inline const Value &Array::front() const { … } inline Value &Array::back() { … } inline const Value &Array::back() const { … } inline Value *Array::data() { … } inline const Value *Array::data() const { … } inline typename Array::iterator Array::begin() { … } inline typename Array::const_iterator Array::begin() const { … } inline typename Array::iterator Array::end() { … } inline typename Array::const_iterator Array::end() const { … } inline bool Array::empty() const { … } inline size_t Array::size() const { … } inline void Array::reserve(size_t S) { … } inline void Array::clear() { … } inline void Array::push_back(const Value &E) { … } inline void Array::push_back(Value &&E) { … } template <typename... Args> inline void Array::emplace_back(Args &&...A) { … } inline void Array::pop_back() { … } inline typename Array::iterator Array::insert(const_iterator P, const Value &E) { … } inline typename Array::iterator Array::insert(const_iterator P, Value &&E) { … } template <typename It> inline typename Array::iterator Array::insert(const_iterator P, It A, It Z) { … } template <typename... Args> inline typename Array::iterator Array::emplace(const_iterator P, Args &&...A) { … } inline bool operator==(const Array &L, const Array &R) { … } /// ObjectKey is a used to capture keys in Object. Like Value but: /// - only strings are allowed /// - it's optimized for the string literal case (Owned == nullptr) /// Like Value, strings must be UTF-8. See isUTF8 documentation for details. class ObjectKey { … }; inline bool operator==(const ObjectKey &L, const ObjectKey &R) { … } inline bool operator!=(const ObjectKey &L, const ObjectKey &R) { … } inline bool operator<(const ObjectKey &L, const ObjectKey &R) { … } struct Object::KV { … }; inline Object::Object(std::initializer_list<KV> Properties) { … } inline std::pair<Object::iterator, bool> Object::insert(KV E) { … } inline bool Object::erase(StringRef K) { … } std::vector<const Object::value_type *> sortedElements(const Object &O); /// A "cursor" marking a position within a Value. /// The Value is a tree, and this is the path from the root to the current node. /// This is used to associate errors with particular subobjects. class Path { … }; /// The root is the trivial Path to the root value. /// It also stores the latest reported error and the path where it occurred. class Path::Root { … }; // Standard deserializers are provided for primitive types. // See comments on Value. inline bool fromJSON(const Value &E, std::string &Out, Path P) { … } inline bool fromJSON(const Value &E, int &Out, Path P) { … } inline bool fromJSON(const Value &E, int64_t &Out, Path P) { … } inline bool fromJSON(const Value &E, double &Out, Path P) { … } inline bool fromJSON(const Value &E, bool &Out, Path P) { … } inline bool fromJSON(const Value &E, uint64_t &Out, Path P) { … } inline bool fromJSON(const Value &E, std::nullptr_t &Out, Path P) { … } template <typename T> bool fromJSON(const Value &E, std::optional<T> &Out, Path P) { … } template <typename T> bool fromJSON(const Value &E, std::vector<T> &Out, Path P) { … } template <typename T> bool fromJSON(const Value &E, std::map<std::string, T> &Out, Path P) { … } // Allow serialization of std::optional<T> for supported T. template <typename T> Value toJSON(const std::optional<T> &Opt) { … } /// Helper for mapping JSON objects onto protocol structs. /// /// Example: /// \code /// bool fromJSON(const Value &E, MyStruct &R, Path P) { /// ObjectMapper O(E, P); /// // When returning false, error details were already reported. /// return O && O.map("mandatory_field", R.MandatoryField) && /// O.mapOptional("optional_field", R.OptionalField); /// } /// \endcode class ObjectMapper { … }; /// Parses the provided JSON source, or returns a ParseError. /// The returned Value is self-contained and owns its strings (they do not refer /// to the original source). llvm::Expected<Value> parse(llvm::StringRef JSON); class ParseError : public llvm::ErrorInfo<ParseError> { … }; /// Version of parse() that converts the parsed value to the type T. /// RootName describes the root object and is used in error messages. template <typename T> Expected<T> parse(const llvm::StringRef &JSON, const char *RootName = "") { … } /// json::OStream allows writing well-formed JSON without materializing /// all structures as json::Value ahead of time. /// It's faster, lower-level, and less safe than OS << json::Value. /// It also allows emitting more constructs, such as comments. /// /// Only one "top-level" object can be written to a stream. /// Simplest usage involves passing lambdas (Blocks) to fill in containers: /// /// json::OStream J(OS); /// J.array([&]{ /// for (const Event &E : Events) /// J.object([&] { /// J.attribute("timestamp", int64_t(E.Time)); /// J.attributeArray("participants", [&] { /// for (const Participant &P : E.Participants) /// J.value(P.toString()); /// }); /// }); /// }); /// /// This would produce JSON like: /// /// [ /// { /// "timestamp": 19287398741, /// "participants": [ /// "King Kong", /// "Miley Cyrus", /// "Cleopatra" /// ] /// }, /// ... /// ] /// /// The lower level begin/end methods (arrayBegin()) are more flexible but /// care must be taken to pair them correctly: /// /// json::OStream J(OS); // J.arrayBegin(); /// for (const Event &E : Events) { /// J.objectBegin(); /// J.attribute("timestamp", int64_t(E.Time)); /// J.attributeBegin("participants"); /// for (const Participant &P : E.Participants) /// J.value(P.toString()); /// J.attributeEnd(); /// J.objectEnd(); /// } /// J.arrayEnd(); /// /// If the call sequence isn't valid JSON, asserts will fire in debug mode. /// This can be mismatched begin()/end() pairs, trying to emit attributes inside /// an array, and so on. /// With asserts disabled, this is undefined behavior. class OStream { … }; /// Serializes this Value to JSON, writing it to the provided stream. /// The formatting is compact (no extra whitespace) and deterministic. /// For pretty-printing, use the formatv() format_provider below. inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Value &V) { … } } // namespace json /// Allow printing json::Value with formatv(). /// The default style is basic/compact formatting, like operator<<. /// A format string like formatv("{0:2}", Value) pretty-prints with indent 2. template <> struct format_provider<llvm::json::Value> { … }; } // namespace llvm #endif