#include "core/fpdftext/cpdf_textpage.h"
#include <math.h>
#include <stdint.h>
#include <algorithm>
#include <utility>
#include <vector>
#include "core/fpdfapi/font/cpdf_cidfont.h"
#include "core/fpdfapi/font/cpdf_font.h"
#include "core/fpdfapi/page/cpdf_form.h"
#include "core/fpdfapi/page/cpdf_formobject.h"
#include "core/fpdfapi/page/cpdf_page.h"
#include "core/fpdfapi/page/cpdf_pageobject.h"
#include "core/fpdfapi/page/cpdf_textobject.h"
#include "core/fpdfapi/parser/cpdf_dictionary.h"
#include "core/fpdfapi/parser/cpdf_string.h"
#include "core/fpdftext/unicodenormalizationdata.h"
#include "core/fxcrt/check.h"
#include "core/fxcrt/check_op.h"
#include "core/fxcrt/compiler_specific.h"
#include "core/fxcrt/data_vector.h"
#include "core/fxcrt/fx_bidi.h"
#include "core/fxcrt/fx_extension.h"
#include "core/fxcrt/fx_unicode.h"
#include "core/fxcrt/span.h"
#include "core/fxcrt/stl_util.h"
namespace {
constexpr float kDefaultFontSize = …;
constexpr float kSizeEpsilon = …;
constexpr std::array<pdfium::span<const uint16_t>, 3>
kUnicodeDataNormalizationMaps = …;
float NormalizeThreshold(float threshold, int t1, int t2, int t3) { … }
float CalculateBaseSpace(const CPDF_TextObject* pTextObj,
const CFX_Matrix& matrix) { … }
DataVector<wchar_t> GetUnicodeNormalization(wchar_t wch) { … }
float MaskPercentFilled(const std::vector<bool>& mask,
int32_t start,
int32_t end) { … }
bool IsControlChar(const CPDF_TextPage::CharInfo& char_info) { … }
bool IsHyphenCode(wchar_t c) { … }
bool IsRectIntersect(const CFX_FloatRect& rect1, const CFX_FloatRect& rect2) { … }
bool IsRightToLeft(const CPDF_TextObject& text_obj, const CPDF_Font& font) { … }
int GetCharWidth(uint32_t charCode, CPDF_Font* pFont) { … }
bool GenerateSpace(const CFX_PointF& pos,
float last_pos,
float this_width,
float last_width,
float threshold) { … }
bool EndHorizontalLine(const CFX_FloatRect& this_rect,
const CFX_FloatRect& prev_rect) { … }
bool EndVerticalLine(const CFX_FloatRect& this_rect,
const CFX_FloatRect& prev_rect,
const CFX_FloatRect& curline_rect,
float this_fontsize,
float prev_fontsize) { … }
CFX_Matrix GetPageMatrix(const CPDF_Page* pPage) { … }
float GetFontSize(const CPDF_TextObject* text_object) { … }
CFX_FloatRect GetLooseBounds(const CPDF_TextPage::CharInfo& charinfo) { … }
}
CPDF_TextPage::TransformedTextObject::TransformedTextObject() = default;
CPDF_TextPage::TransformedTextObject::TransformedTextObject(
const TransformedTextObject& that) = default;
CPDF_TextPage::TransformedTextObject::~TransformedTextObject() = default;
CPDF_TextPage::CharInfo::CharInfo() = default;
CPDF_TextPage::CharInfo::CharInfo(const CharInfo&) = default;
CPDF_TextPage::CharInfo::~CharInfo() = default;
CPDF_TextPage::CPDF_TextPage(const CPDF_Page* pPage, bool rtl)
: … { … }
CPDF_TextPage::~CPDF_TextPage() = default;
void CPDF_TextPage::Init() { … }
int CPDF_TextPage::CountChars() const { … }
int CPDF_TextPage::CharIndexFromTextIndex(int text_index) const { … }
int CPDF_TextPage::TextIndexFromCharIndex(int char_index) const { … }
std::vector<CFX_FloatRect> CPDF_TextPage::GetRectArray(int start,
int count) const { … }
int CPDF_TextPage::GetIndexAtPos(const CFX_PointF& point,
const CFX_SizeF& tolerance) const { … }
WideString CPDF_TextPage::GetTextByPredicate(
const std::function<bool(const CharInfo&)>& predicate) const { … }
WideString CPDF_TextPage::GetTextByRect(const CFX_FloatRect& rect) const { … }
WideString CPDF_TextPage::GetTextByObject(
const CPDF_TextObject* pTextObj) const { … }
const CPDF_TextPage::CharInfo& CPDF_TextPage::GetCharInfo(size_t index) const { … }
CPDF_TextPage::CharInfo& CPDF_TextPage::GetCharInfo(size_t index) { … }
float CPDF_TextPage::GetCharFontSize(size_t index) const { … }
CFX_FloatRect CPDF_TextPage::GetCharLooseBounds(size_t index) const { … }
WideString CPDF_TextPage::GetPageText(int start, int count) const { … }
int CPDF_TextPage::CountRects(int start, int nCount) { … }
bool CPDF_TextPage::GetRect(int rectIndex, CFX_FloatRect* pRect) const { … }
CPDF_TextPage::TextOrientation CPDF_TextPage::FindTextlineFlowOrientation()
const { … }
void CPDF_TextPage::AppendGeneratedCharacter(wchar_t unicode,
const CFX_Matrix& formMatrix) { … }
void CPDF_TextPage::ProcessObject() { … }
void CPDF_TextPage::ProcessFormObject(CPDF_FormObject* pFormObj,
const CFX_Matrix& formMatrix) { … }
void CPDF_TextPage::AddCharInfoByLRDirection(wchar_t wChar,
const CharInfo& info) { … }
void CPDF_TextPage::AddCharInfoByRLDirection(wchar_t wChar,
const CharInfo& info) { … }
void CPDF_TextPage::CloseTempLine() { … }
void CPDF_TextPage::ProcessTextObject(
CPDF_TextObject* pTextObj,
const CFX_Matrix& formMatrix,
const CPDF_PageObjectHolder* pObjList,
CPDF_PageObjectHolder::const_iterator ObjPos) { … }
CPDF_TextPage::MarkedContentState CPDF_TextPage::PreMarkedContent(
const CPDF_TextObject* pTextObj) { … }
void CPDF_TextPage::ProcessMarkedContent(const TransformedTextObject& obj) { … }
void CPDF_TextPage::FindPreviousTextObject() { … }
void CPDF_TextPage::SwapTempTextBuf(size_t iCharListStartAppend,
size_t iBufStartAppend) { … }
void CPDF_TextPage::ProcessTextObject(const TransformedTextObject& obj) { … }
CPDF_TextPage::TextOrientation CPDF_TextPage::GetTextObjectWritingMode(
const CPDF_TextObject* pTextObj) const { … }
bool CPDF_TextPage::IsHyphen(wchar_t curChar) const { … }
const CPDF_TextPage::CharInfo* CPDF_TextPage::GetPrevCharInfo() const { … }
CPDF_TextPage::GenerateCharacter CPDF_TextPage::ProcessInsertObject(
const CPDF_TextObject* pObj,
const CFX_Matrix& formMatrix) { … }
bool CPDF_TextPage::IsSameTextObject(CPDF_TextObject* pTextObj1,
CPDF_TextObject* pTextObj2) const { … }
bool CPDF_TextPage::IsSameAsPreTextObject(
CPDF_TextObject* pTextObj,
const CPDF_PageObjectHolder* pObjList,
CPDF_PageObjectHolder::const_iterator iter) const { … }
std::optional<CPDF_TextPage::CharInfo> CPDF_TextPage::GenerateCharInfo(
wchar_t unicode) { … }