#ifdef UNSAFE_BUFFERS_BUILD
#pragma allow_unsafe_buffers
#endif
#include "third_party/blink/renderer/core/html/parser/html_document_parser_fastpath.h"
#include <algorithm>
#include <iostream>
#include <type_traits>
#include "base/metrics/histogram_functions.h"
#include "base/timer/elapsed_timer.h"
#include "base/trace_event/trace_event.h"
#include "third_party/blink/renderer/core/dom/attribute.h"
#include "third_party/blink/renderer/core/dom/document_fragment.h"
#include "third_party/blink/renderer/core/dom/element.h"
#include "third_party/blink/renderer/core/dom/qualified_name.h"
#include "third_party/blink/renderer/core/dom/text.h"
#include "third_party/blink/renderer/core/html/forms/html_button_element.h"
#include "third_party/blink/renderer/core/html/forms/html_form_element.h"
#include "third_party/blink/renderer/core/html/forms/html_input_element.h"
#include "third_party/blink/renderer/core/html/forms/html_label_element.h"
#include "third_party/blink/renderer/core/html/forms/html_option_element.h"
#include "third_party/blink/renderer/core/html/forms/html_select_element.h"
#include "third_party/blink/renderer/core/html/html_anchor_element.h"
#include "third_party/blink/renderer/core/html/html_br_element.h"
#include "third_party/blink/renderer/core/html/html_div_element.h"
#include "third_party/blink/renderer/core/html/html_element.h"
#include "third_party/blink/renderer/core/html/html_image_element.h"
#include "third_party/blink/renderer/core/html/html_li_element.h"
#include "third_party/blink/renderer/core/html/html_olist_element.h"
#include "third_party/blink/renderer/core/html/html_paragraph_element.h"
#include "third_party/blink/renderer/core/html/html_span_element.h"
#include "third_party/blink/renderer/core/html/html_template_element.h"
#include "third_party/blink/renderer/core/html/html_ulist_element.h"
#include "third_party/blink/renderer/core/html/parser/atomic_html_token.h"
#include "third_party/blink/renderer/core/html/parser/html_construction_site.h"
#include "third_party/blink/renderer/core/html/parser/html_entity_parser.h"
#include "third_party/blink/renderer/core/html_names.h"
#include "third_party/blink/renderer/core/svg/svg_element.h"
#include "third_party/blink/renderer/core/svg_names.h"
#include "third_party/blink/renderer/platform/heap/garbage_collected.h"
#include "third_party/blink/renderer/platform/runtime_enabled_features.h"
#include "third_party/blink/renderer/platform/text/segmented_string.h"
#include "third_party/blink/renderer/platform/wtf/text/atomic_string_encoding.h"
#include "third_party/blink/renderer/platform/wtf/text/wtf_uchar.h"
#if defined(BLINK_ENABLE_VECTORIZED_HTML_SCANNING)
#include "third_party/highway/src/hwy/highway.h"
#define VECTORIZE_SCANNING …
#else
#define VECTORIZE_SCANNING …
#endif
namespace blink {
namespace {
#if VECTORIZE_SCANNING
constexpr size_t kVectorizationThreshold = 64;
constexpr uint8_t kNeverMatchedChar = 0xff;
struct MatchedCharacter {
bool Matched() const { return found_character != kNeverMatchedChar; }
size_t index_in_vector = 0;
uint8_t found_character = kNeverMatchedChar;
};
template <typename D, typename VectorT>
requires(sizeof(hwy::HWY_NAMESPACE::TFromD<D>) == 1)
HWY_ATTR ALWAYS_INLINE MatchedCharacter TryMatch(D tag,
VectorT input,
VectorT low_nibble_table,
VectorT low_nib_and_mask) {
namespace hw = hwy::HWY_NAMESPACE;
const auto nib_lo = input & low_nib_and_mask;
const auto shuf_lo = hw::TableLookupBytes(low_nibble_table, nib_lo);
const auto result = shuf_lo == input;
if (const intptr_t index = hw::FindFirstTrue(tag, result); index != -1) {
return {static_cast<size_t>(index), hw::ExtractLane(input, index)};
}
return {};
}
template <typename T, typename VectorT>
requires(sizeof(T) == 1)
HWY_ATTR ALWAYS_INLINE uint8_t SimdAdvanceAndLookup(const T*& start,
const T* end,
VectorT low_nibble_table) {
namespace hw = hwy::HWY_NAMESPACE;
DCHECK_GE(static_cast<size_t>(end - start), kVectorizationThreshold);
hw::FixedTag<uint8_t, 16> tag;
static constexpr auto stride = hw::MaxLanes(tag);
const auto low_nib_and_mask = hw::Set(tag, 0xf);
for (; start + (stride - 1) < end; start += stride) {
const auto input = hw::LoadU(tag, reinterpret_cast<const uint8_t*>(start));
if (const auto result =
TryMatch(tag, input, low_nibble_table, low_nib_and_mask);
result.Matched()) {
start = reinterpret_cast<const T*>(start + result.index_in_vector);
return result.found_character;
};
}
if (start < end) {
const auto input =
hw::LoadU(tag, reinterpret_cast<const uint8_t*>(end - stride));
if (const auto result =
TryMatch(tag, input, low_nibble_table, low_nib_and_mask);
result.Matched()) {
start = end - stride + result.index_in_vector;
return result.found_character;
}
start = end;
}
return kNeverMatchedChar;
}
template <typename T, typename VectorT>
requires(sizeof(T) == 2)
HWY_ATTR ALWAYS_INLINE uint8_t SimdAdvanceAndLookup(const T*& start,
const T* end,
VectorT low_nibble_table) {
namespace hw = hwy::HWY_NAMESPACE;
DCHECK_GE(static_cast<size_t>(end - start), kVectorizationThreshold);
hw::FixedTag<uint8_t, 16> tag;
static constexpr auto stride = hw::MaxLanes(tag);
const auto low_nib_and_mask = hw::Set(tag, 0xf);
while (start + (stride - 1) < end) {
VectorT dummy_upper;
VectorT input;
hw::LoadInterleaved2(tag, reinterpret_cast<const uint8_t*>(start), input,
dummy_upper);
if (const auto result =
TryMatch(tag, input, low_nibble_table, low_nib_and_mask);
result.Matched()) {
const auto index = result.index_in_vector;
if (*(start + index) >> 8 == 0) {
start = reinterpret_cast<const T*>(start + index);
return result.found_character;
}
start += index + 1;
continue;
}
start += stride;
}
if (start < end) {
VectorT dummy_upper;
VectorT input;
hw::LoadInterleaved2(tag, reinterpret_cast<const uint8_t*>(end - stride),
input, dummy_upper);
for (auto result = TryMatch(tag, input, low_nibble_table, low_nib_and_mask);
result.Matched();
result = TryMatch(tag, input, low_nibble_table, low_nib_and_mask)) {
const auto index = result.index_in_vector;
if (*(end - stride + index) >> 8 == 0) {
start = reinterpret_cast<const T*>(end - stride + index);
return result.found_character;
}
input = hw::InsertLane(input, index, kNeverMatchedChar);
}
start = end;
}
return kNeverMatchedChar;
}
#endif
template <class Char, size_t n>
bool operator==(base::span<const Char> span, const char (&s)[n]) { … }
template <int n>
constexpr bool OnlyContainsLowercaseASCIILetters(const char (&s)[n]) { … }
template <class Char, size_t n>
bool SpanMatchesLowercase(base::span<const Char> span, const char (&s)[n]) { … }
template <uint32_t n>
constexpr uint32_t TagnameHash(const char (&s)[n]) { … }
template <class Char>
uint32_t TagnameHash(base::span<const Char> s) { … }
uint32_t TagnameHash(const String& s) { … }
#define SUPPORTED_TAGS …
UCharLiteralBufferType;
template <class Char>
struct ScanTextResult { … };
template <>
String ScanTextResult<LChar>::TextToString() const { … }
template <>
String ScanTextResult<UChar>::TextToString() const { … }
template <class Char>
class HTMLFastPathParser { … };
void LogFastPathResult(HtmlFastPathResult result) { … }
bool CanUseFastPath(Document& document,
Element& context_element,
ParserContentPolicy policy,
HTMLFragmentParsingBehaviorSet behavior) { … }
enum class UnsupportedTagType : uint32_t { … };
constexpr uint32_t kAllUnsupportedTags = …;
static_assert …;
#define CHECK_TAG_TYPE(t) …
#define NODE_HAS_TAG_NAME(t) …
UnsupportedTagType UnsupportedTagTypeValueForNode(const Node& node) { … }
const char* kUnsupportedTagTypeCompositeName = …;
const char* kUnsupportedTagTypeMaskNames[] = …;
const char* kUnsupportedContextTagTypeCompositeName = …;
const char* kUnsupportedContextTagTypeMaskNames[] = …;
void LogFastPathUnsupportedTagTypeDetails(uint32_t type_mask,
const char* composite_histogram_name,
const char* mask_histogram_names[]) { … }
template <class Char>
bool TryParsingHTMLFragmentImpl(const base::span<const Char>& source,
Document& document,
ContainerNode& root_node,
Element& context_element,
HTMLFragmentParsingBehaviorSet behavior,
bool* failed_because_unsupported_tag) { … }
}
bool TryParsingHTMLFragment(const String& source,
Document& document,
ContainerNode& parent,
Element& context_element,
ParserContentPolicy policy,
HTMLFragmentParsingBehaviorSet behavior,
bool* failed_because_unsupported_tag) { … }
void LogTagsForUnsupportedTagTypeFailure(DocumentFragment& fragment) { … }
#undef SUPPORTED_TAGS
}