chromium/extensions/browser/api/web_request/form_data_parser.cc

// Copyright 2012 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/351564777): Remove this and convert code to safer constructs.
#pragma allow_unsafe_buffers
#endif

#include "extensions/browser/api/web_request/form_data_parser.h"

#include <stddef.h>

#include <string_view>
#include <vector>

#include "base/check.h"
#include "base/lazy_instance.h"
#include "base/memory/raw_ptr.h"
#include "base/notreached.h"
#include "base/strings/escape.h"
#include "base/strings/string_util.h"
#include "base/types/optional_util.h"
#include "base/values.h"
#include "net/http/http_request_headers.h"
#include "third_party/re2/src/re2/re2.h"

RE2;

namespace extensions {

namespace {

const char kContentDisposition[] =;
const size_t kContentDispositionLength =;
// kCharacterPattern is an allowed character in a URL encoding. Definition is
// from RFC 1738, end of section 2.2.
const char kCharacterPattern[] =;
const char kCRLF[] =;
const char kContentTypeOctetString[] =;

// A wrapper struct for static RE2 objects to be held as LazyInstance.
struct Patterns {};

Patterns::Patterns()
    :{}

base::LazyInstance<Patterns>::Leaky g_patterns =;

bool ConsumePrefix(std::string_view* str, std::string_view prefix) {}

}  // namespace

// Parses URLencoded forms, see
// http://www.w3.org/TR/REC-html40-971218/interact/forms.html#h-17.13.4.1 .
class FormDataParserUrlEncoded : public FormDataParser {};

// The following class, FormDataParserMultipart, parses forms encoded as
// multipart, defined in RFCs 2388 (specific to forms), 2046 (multipart
// encoding) and 5322 (MIME-headers).
//
// Implementation details
//
// The original grammar from RFC 2046 is this, "multipart-body" being the root
// non-terminal:
//
// boundary := 0*69<bchars> bcharsnospace
// bchars := bcharsnospace / " "
// bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" / "_" / ","
//                  / "-" / "." / "/" / ":" / "=" / "?"
// dash-boundary := "--" boundary
// multipart-body := [preamble CRLF]
//                        dash-boundary transport-padding CRLF
//                        body-part *encapsulation
//                        close-delimiter transport-padding
//                        [CRLF epilogue]
// transport-padding := *LWSP-char
// encapsulation := delimiter transport-padding CRLF body-part
// delimiter := CRLF dash-boundary
// close-delimiter := delimiter "--"
// preamble := discard-text
// epilogue := discard-text
// discard-text := *(*text CRLF) *text
// body-part := MIME-part-headers [CRLF *OCTET]
// OCTET := <any 0-255 octet value>
//
// Uppercase non-terminals are defined in RFC 5234, Appendix B.1; i.e. CRLF,
// DIGIT, and ALPHA stand for "\r\n", '0'-'9' and the set of letters of the
// English alphabet, respectively.
// The non-terminal "text" is presumably just any text, excluding line breaks.
// The non-terminal "LWSP-char" is not directly defined in the original grammar
// but it means "linear whitespace", which is a space or a horizontal tab.
// The non-terminal "MIME-part-headers" is not discussed in RFC 2046, so we use
// the syntax for "optional fields" from Section 3.6.8 of RFC 5322:
//
// MIME-part-headers := field-name ":" unstructured CRLF
// field-name := 1*ftext
// ftext := %d33-57 /          ; Printable US-ASCII
//          %d59-126           ;  characters not including ":".
// Based on Section 2.2.1 of RFC 5322, "unstructured" matches any string which
// does not contain a CRLF sub-string, except for substrings "CRLF<space>" and
// "CRLF<horizontal tab>", which serve for "folding".
//
// The FormDataParseMultipart class reads the input source and tries to parse it
// according to the grammar above, rooted at the "multipart-body" non-terminal.
// This happens in stages:
//
// 1. The optional preamble and the initial dash-boundary with transport padding
// and a CRLF are read and ignored.
//
// 2. Repeatedly each body part is read. The body parts can either serve to
//    upload a file, or just a string of bytes.
// 2.a. The headers of that part are searched for the "content-disposition"
//      header, which contains the name of the value represented by that body
//      part. If the body-part is for file upload, that header also contains a
//      filename.
// 2.b. The "*OCTET" part of the body part is then read and passed as the value
//      of the name-value pair for body parts representing a string of bytes.
//      For body parts for uploading a file the "*OCTET" part is just ignored
//      and the filename is used for value instead.
//
// 3. The final close-delimiter and epilogue are read and ignored.
//
// IMPORTANT NOTE
// This parser supports sources split into multiple chunks. Therefore SetSource
// can be called multiple times if the source is spread over several chunks.
// However, the split may only occur inside a body part, right after the
// trailing CRLF of headers.
class FormDataParserMultipart : public FormDataParser {};

FormDataParser::Result::Result() = default;
FormDataParser::Result::~Result() = default;

void FormDataParser::Result::SetBinaryValue(std::string_view str) {}

void FormDataParser::Result::SetStringValue(std::string str) {}

FormDataParser::~FormDataParser() = default;

// static
std::unique_ptr<FormDataParser> FormDataParser::Create(
    const net::HttpRequestHeaders& request_headers) {}

// static
std::unique_ptr<FormDataParser> FormDataParser::CreateFromContentTypeHeader(
    const std::string* content_type_header) {}

FormDataParser::FormDataParser() = default;

FormDataParserUrlEncoded::FormDataParserUrlEncoded()
    :{}

FormDataParserUrlEncoded::~FormDataParserUrlEncoded() = default;

bool FormDataParserUrlEncoded::AllDataReadOK() {}

bool FormDataParserUrlEncoded::GetNextNameValue(Result* result) {}

bool FormDataParserUrlEncoded::SetSource(std::string_view source) {}

// static
bool FormDataParserMultipart::StartsWithPattern(std::string_view input,
                                                const RE2& pattern) {}

FormDataParserMultipart::FormDataParserMultipart(
    const std::string& boundary_separator)
    :{}

FormDataParserMultipart::~FormDataParserMultipart() = default;

bool FormDataParserMultipart::AllDataReadOK() {}

bool FormDataParserMultipart::FinishReadingPart(std::string_view* data) {}

bool FormDataParserMultipart::GetNextNameValue(Result* result) {}

bool FormDataParserMultipart::SetSource(std::string_view source) {}

bool FormDataParserMultipart::TryReadHeader(std::string_view* name,
                                            std::string_view* value,
                                            bool* value_assigned,
                                            bool* value_is_binary) {}

}  // namespace extensions