chromium/third_party/rust/serde_json_lenient/v0_2/wrapper/lib.rs

// Copyright 2022 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

mod visitor;

use crate::visitor::ValueVisitor;

use serde::de::Deserializer;
use serde_json_lenient::de::SliceRead;
use std::pin::Pin;

/// UTF8 byte order mark.
const UTF8_BOM: [u8; 3] = [0xef, 0xbb, 0xbf];

/// C++ bindings
#[cxx::bridge(namespace=serde_json_lenient)]
mod ffi {
    // From the `wrapper_functions` target.
    unsafe extern "C++" {
        include!("third_party/rust/serde_json_lenient/v0_2/wrapper/functions.h");

        type ContextPointer;

        type Functions;
        fn list_append_none(self: &Functions, ctx: Pin<&mut ContextPointer>);
        fn list_append_bool(self: &Functions, ctx: Pin<&mut ContextPointer>, val: bool);
        fn list_append_i32(self: &Functions, ctx: Pin<&mut ContextPointer>, val: i32);
        fn list_append_f64(self: &Functions, ctx: Pin<&mut ContextPointer>, val: f64);
        fn list_append_str(self: &Functions, ctx: Pin<&mut ContextPointer>, val: &str);
        fn list_append_list<'a>(
            self: &Functions,
            ctx: Pin<&'a mut ContextPointer>,
            reserve: usize,
        ) -> Pin<&'a mut ContextPointer>;
        fn list_append_dict<'a>(
            self: &Functions,
            ctx: Pin<&'a mut ContextPointer>,
        ) -> Pin<&'a mut ContextPointer>;

        fn dict_set_none(self: &Functions, ctx: Pin<&mut ContextPointer>, key: &str);
        fn dict_set_bool(self: &Functions, ctx: Pin<&mut ContextPointer>, key: &str, val: bool);
        fn dict_set_i32(self: &Functions, ctx: Pin<&mut ContextPointer>, key: &str, val: i32);
        fn dict_set_f64(self: &Functions, ctx: Pin<&mut ContextPointer>, key: &str, val: f64);
        fn dict_set_str(self: &Functions, ctx: Pin<&mut ContextPointer>, key: &str, val: &str);
        fn dict_set_list<'f, 'a>(
            self: &Functions,
            ctx: Pin<&'a mut ContextPointer>,
            key: &'f str,
            reserve: usize,
        ) -> Pin<&'a mut ContextPointer>;
        fn dict_set_dict<'f, 'a>(
            self: &Functions,
            ctx: Pin<&'a mut ContextPointer>,
            key: &'f str,
        ) -> Pin<&'a mut ContextPointer>;
    }

    extern "Rust" {
        fn decode_json(
            json: &[u8],
            options: &JsonOptions,
            functions: &'static Functions,
            ctx: Pin<&mut ContextPointer>,
            error: Pin<&mut DecodeError>,
        ) -> bool;
    }

    struct DecodeError {
        line: i32,
        column: i32,
        message: String,
    }

    /// Options for parsing JSON inputs. A mirror of the C++
    /// `base::JSONParserOptions` bitflags, represented as a friendlier
    /// struct-of-bools instead, and with additional fields
    struct JsonOptions {
        /// Allows commas to exist after the last element in structures.
        allow_trailing_commas: bool,
        /// If set the parser replaces invalid code points (i.e. lone
        /// surrogates) with the Unicode replacement character (U+FFFD).
        /// If not set, invalid code points trigger a hard error and
        /// parsing fails.
        replace_invalid_characters: bool,
        /// Allows both C (/* */) and C++ (//) style comments.
        allow_comments: bool,
        /// Permits unescaped \r and \n in strings. This is a subset of what
        /// allow_control_chars allows.
        allow_newlines: bool,
        /// Permits unescaped ASCII control characters (such as unescaped \b,
        /// \r, or \n) in the range [0x00,0x1F].
        allow_control_chars: bool,
        /// Permits \\v vertical tab escapes.
        allow_vert_tab: bool,
        /// Permits \\xNN escapes as described above.
        allow_x_escapes: bool,

        /// The maximum recursion depth to walk while parsing nested JSON
        /// objects. JSON beyond the specified depth will be ignored.
        max_depth: usize,
    }
}

pub type DecodeError = ffi::DecodeError;
pub type JsonOptions = ffi::JsonOptions;
pub type Functions = ffi::Functions;
pub type ContextPointer = ffi::ContextPointer;

/// Decode a JSON input from `json` and call back out to functions defined in
/// `options` when visiting each node in order for the caller to construct an
/// output.
///
/// The first item visited will be appened to the `ctx` as if the `ctx` were a
/// list. This means the `ContextPointer` in `ctx` must already be a list
/// aggregate type, unless the caller has extra logic to handle the first
/// element visited.
///
/// The `error` is only written to when there is an error decoding and `false`
/// is returned.
///
/// # Returns
///
/// Whether the decode succeeded.
pub fn decode_json(
    json: &[u8],
    options: &JsonOptions,
    functions: &'static Functions,
    // TODO(danakj): Use std::ptr::NonNull when the binding generator supports it.
    ctx: Pin<&mut ContextPointer>,
    // TODO(danakj): Return `Result<(), DecodeError>` once the binding generator supports it.
    mut error: Pin<&mut DecodeError>,
) -> bool {
    let mut deserializer = serde_json_lenient::Deserializer::new(SliceRead::new(
        if json.starts_with(&UTF8_BOM) { &json[3..] } else { json },
        options.replace_invalid_characters,

        // On the C++ side, allow_control_chars means "allow all control chars,
        // including \r and \n", while in serde_json_lenient,
        // allow_control_chars means "allow all controls chars, except \r and
        // \n". To give the behavior that C++ client code is expecting, enable
        // allow_newlines as well when allow_control_chars is supplied.
        options.allow_newlines || options.allow_control_chars,
        options.allow_control_chars,
        options.allow_vert_tab,
        options.allow_x_escapes,
    ));
    deserializer.set_ignore_trailing_commas(options.allow_trailing_commas);
    deserializer.set_allow_comments(options.allow_comments);

    // We track recursion depth ourselves to limit it to `max_depth` option.
    deserializer.disable_recursion_limit();

    // The first element visited will be treated as if being appended to a list, as
    // is specified in the contract of `decode_json()`.
    //
    // SAFETY: We have only a single ContextPointer around at a time, so this
    // reference will not alias. The lifetime of the ContextPointer exceeds this
    // function's lifetime, so we are okay to tie it to the `target`'s lifetime
    // which is shorter.
    //
    // Dereferencing the ContextPointer in C++ would be Undefined Behaviour since
    // it's not a similar type to the actual type it's pointing to, but Rust
    // allows us to make a reference to it regardless.
    let target = visitor::DeserializationTarget::List { ctx };

    let result =
        deserializer.deserialize_any(ValueVisitor::new(&functions, target, options.max_depth));
    match result.and(deserializer.end()) {
        Ok(()) => true,
        Err(err) => {
            error.as_mut().line = err.line().try_into().unwrap_or(-1);
            error.as_mut().column = err.column().try_into().unwrap_or(-1);
            error.as_mut().message.clear();
            // The following line pulls in a lot of binary bloat, due to all the formatter
            // implementations required to stringify error messages. This error message is
            // used in only a couple of places outside unit tests so we could
            // consider trying to eliminate.
            error.as_mut().message.push_str(&err.to_string());
            false
        }
    }
}