chromium/third_party/blink/renderer/modules/content_extraction/document_chunker.cc

// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "third_party/blink/renderer/modules/content_extraction/document_chunker.h"

#include "third_party/blink/renderer/core/dom/node_traversal.h"
#include "third_party/blink/renderer/core/dom/text.h"
#include "third_party/blink/renderer/core/frame/local_frame.h"
#include "third_party/blink/renderer/core/html/html_element.h"
#include "third_party/blink/renderer/core/html/html_iframe_element.h"
#include "third_party/blink/renderer/core/html_names.h"
#include "third_party/blink/renderer/core/svg/svg_element.h"
#include "third_party/blink/renderer/core/svg_names.h"
#include "third_party/blink/renderer/platform/wtf/text/character_visitor.h"
#include "third_party/blink/renderer/platform/wtf/text/string_builder.h"

namespace blink {

namespace {

// Checks for excluded tags. Text within these will be excluded from passages.
bool IsExcludedElement(const Node& node) {}

// Checks for tags that indicate a section break. Sibling nodes will not be
// greedily aggregated into a chunk across one of these tags.
bool IsSectionBreak(const Node& node) {}

}  // namespace

bool ShouldContentExtractionIncludeIFrame(const HTMLIFrameElement& iframe_element) {}

DocumentChunker::DocumentChunker(size_t max_words_per_aggregate_passage,
                                 bool greedily_aggregate_sibling_nodes,
                                 uint32_t max_passages,
                                 uint32_t min_words_per_passage)
    :{}

Vector<String> DocumentChunker::Chunk(const Node& tree) {}

DocumentChunker::AggregateNode DocumentChunker::ProcessNode(
    const Node& node,
    int depth,
    uint32_t passage_count) {}

void DocumentChunker::PassageList::AddPassageForNode(
    const AggregateNode& node,
    size_t min_words_per_passage) {}

void DocumentChunker::PassageList::Extend(const PassageList& passage_list) {}

bool DocumentChunker::AggregateNode::Fits(const AggregateNode& node,
                                          size_t max_words) {}

void DocumentChunker::AggregateNode::AddNode(const AggregateNode& node) {}

String DocumentChunker::AggregateNode::CreatePassage() const {}

}  // namespace blink