// Copyright 2012 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifdef UNSAFE_BUFFERS_BUILD // TODO(crbug.com/351564777): Remove this and convert code to safer constructs. #pragma allow_unsafe_buffers #endif #include "extensions/browser/api/web_request/form_data_parser.h" #include <stddef.h> #include <string_view> #include <vector> #include "base/check.h" #include "base/lazy_instance.h" #include "base/memory/raw_ptr.h" #include "base/notreached.h" #include "base/strings/escape.h" #include "base/strings/string_util.h" #include "base/types/optional_util.h" #include "base/values.h" #include "net/http/http_request_headers.h" #include "third_party/re2/src/re2/re2.h" RE2; namespace extensions { namespace { const char kContentDisposition[] = …; const size_t kContentDispositionLength = …; // kCharacterPattern is an allowed character in a URL encoding. Definition is // from RFC 1738, end of section 2.2. const char kCharacterPattern[] = …; const char kCRLF[] = …; const char kContentTypeOctetString[] = …; // A wrapper struct for static RE2 objects to be held as LazyInstance. struct Patterns { … }; Patterns::Patterns() : … { … } base::LazyInstance<Patterns>::Leaky g_patterns = …; bool ConsumePrefix(std::string_view* str, std::string_view prefix) { … } } // namespace // Parses URLencoded forms, see // http://www.w3.org/TR/REC-html40-971218/interact/forms.html#h-17.13.4.1 . class FormDataParserUrlEncoded : public FormDataParser { … }; // The following class, FormDataParserMultipart, parses forms encoded as // multipart, defined in RFCs 2388 (specific to forms), 2046 (multipart // encoding) and 5322 (MIME-headers). // // Implementation details // // The original grammar from RFC 2046 is this, "multipart-body" being the root // non-terminal: // // boundary := 0*69<bchars> bcharsnospace // bchars := bcharsnospace / " " // bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" / "_" / "," // / "-" / "." / "/" / ":" / "=" / "?" // dash-boundary := "--" boundary // multipart-body := [preamble CRLF] // dash-boundary transport-padding CRLF // body-part *encapsulation // close-delimiter transport-padding // [CRLF epilogue] // transport-padding := *LWSP-char // encapsulation := delimiter transport-padding CRLF body-part // delimiter := CRLF dash-boundary // close-delimiter := delimiter "--" // preamble := discard-text // epilogue := discard-text // discard-text := *(*text CRLF) *text // body-part := MIME-part-headers [CRLF *OCTET] // OCTET := <any 0-255 octet value> // // Uppercase non-terminals are defined in RFC 5234, Appendix B.1; i.e. CRLF, // DIGIT, and ALPHA stand for "\r\n", '0'-'9' and the set of letters of the // English alphabet, respectively. // The non-terminal "text" is presumably just any text, excluding line breaks. // The non-terminal "LWSP-char" is not directly defined in the original grammar // but it means "linear whitespace", which is a space or a horizontal tab. // The non-terminal "MIME-part-headers" is not discussed in RFC 2046, so we use // the syntax for "optional fields" from Section 3.6.8 of RFC 5322: // // MIME-part-headers := field-name ":" unstructured CRLF // field-name := 1*ftext // ftext := %d33-57 / ; Printable US-ASCII // %d59-126 ; characters not including ":". // Based on Section 2.2.1 of RFC 5322, "unstructured" matches any string which // does not contain a CRLF sub-string, except for substrings "CRLF<space>" and // "CRLF<horizontal tab>", which serve for "folding". // // The FormDataParseMultipart class reads the input source and tries to parse it // according to the grammar above, rooted at the "multipart-body" non-terminal. // This happens in stages: // // 1. The optional preamble and the initial dash-boundary with transport padding // and a CRLF are read and ignored. // // 2. Repeatedly each body part is read. The body parts can either serve to // upload a file, or just a string of bytes. // 2.a. The headers of that part are searched for the "content-disposition" // header, which contains the name of the value represented by that body // part. If the body-part is for file upload, that header also contains a // filename. // 2.b. The "*OCTET" part of the body part is then read and passed as the value // of the name-value pair for body parts representing a string of bytes. // For body parts for uploading a file the "*OCTET" part is just ignored // and the filename is used for value instead. // // 3. The final close-delimiter and epilogue are read and ignored. // // IMPORTANT NOTE // This parser supports sources split into multiple chunks. Therefore SetSource // can be called multiple times if the source is spread over several chunks. // However, the split may only occur inside a body part, right after the // trailing CRLF of headers. class FormDataParserMultipart : public FormDataParser { … }; FormDataParser::Result::Result() = default; FormDataParser::Result::~Result() = default; void FormDataParser::Result::SetBinaryValue(std::string_view str) { … } void FormDataParser::Result::SetStringValue(std::string str) { … } FormDataParser::~FormDataParser() = default; // static std::unique_ptr<FormDataParser> FormDataParser::Create( const net::HttpRequestHeaders& request_headers) { … } // static std::unique_ptr<FormDataParser> FormDataParser::CreateFromContentTypeHeader( const std::string* content_type_header) { … } FormDataParser::FormDataParser() = default; FormDataParserUrlEncoded::FormDataParserUrlEncoded() : … { … } FormDataParserUrlEncoded::~FormDataParserUrlEncoded() = default; bool FormDataParserUrlEncoded::AllDataReadOK() { … } bool FormDataParserUrlEncoded::GetNextNameValue(Result* result) { … } bool FormDataParserUrlEncoded::SetSource(std::string_view source) { … } // static bool FormDataParserMultipart::StartsWithPattern(std::string_view input, const RE2& pattern) { … } FormDataParserMultipart::FormDataParserMultipart( const std::string& boundary_separator) : … { … } FormDataParserMultipart::~FormDataParserMultipart() = default; bool FormDataParserMultipart::AllDataReadOK() { … } bool FormDataParserMultipart::FinishReadingPart(std::string_view* data) { … } bool FormDataParserMultipart::GetNextNameValue(Result* result) { … } bool FormDataParserMultipart::SetSource(std::string_view source) { … } bool FormDataParserMultipart::TryReadHeader(std::string_view* name, std::string_view* value, bool* value_assigned, bool* value_is_binary) { … } } // namespace extensions