// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "pdf/pdfium/pdfium_font_win.h"
#include <optional>
#include <string>
#include <string_view>
#include <utility>
#include "base/check_op.h"
#include "base/containers/contains.h"
#include "base/containers/fixed_flat_map.h"
#include "base/containers/flat_map.h"
#include "base/logging.h"
#include "base/no_destructor.h"
#include "base/sequence_checker.h"
#include "base/strings/string_number_conversions.h"
#include "base/trace_event/trace_event.h"
#include "pdf/pdfium/pdfium_engine.h"
#include "pdf/pdfium/pdfium_font_helpers.h"
#include "skia/ext/font_utils.h"
#include "third_party/blink/public/platform/web_font_description.h"
#include "third_party/pdfium/public/fpdf_sysfontinfo.h"
#include "third_party/re2/src/re2/re2.h"
#include "third_party/skia/include/core/SkFontMgr.h"
#include "third_party/skia/include/core/SkFontStyle.h"
#include "third_party/skia/include/core/SkStream.h"
#include "third_party/skia/include/core/SkTypeface.h"
namespace chrome_pdf {
namespace {
constexpr auto kBase14Substs =
base::MakeFixedFlatMap<std::string_view, std::string_view>({
// PDF Fonts
{"Courier", "Courier New"},
{"Courier-Bold", "Courier New Bold"},
{"Courier-BoldOblique", "Courier New Bold Italic"},
{"Courier-Oblique", "Courier New Italic"},
{"Helvetica", "Arial"},
{"Helvetica-Bold", "Arial Bold"},
{"Helvetica-BoldOblique", "Arial Bold Italic"},
{"Helvetica-Oblique", "Arial Italic"},
{"Times-Roman", "Times New Roman"},
{"Times-Bold", "Times New Roman Bold"},
{"Times-BoldItalic", "Times New Roman Bold Italic"},
{"Times-Italic", "Times New Roman Italic"},
});
// kBase14Substs from cfx_folderfontinfo.
std::string GetSubstFont(const std::string& face) {
auto iter = kBase14Substs.find(face);
if (iter != kBase14Substs.end()) {
return std::string(iter->second);
}
return face;
}
// Maps font description and charset to `FontId` as requested by PDFium, with
// `FontId` as an opaque type that PDFium works with. Based on the `FontId`,
// PDFium can read from the font files using GetFontData(). Properly frees the
// underlying resource type when PDFium is done with the mapped font.
class SkiaFontMapper {
public:
// Defined as the type most convenient for use with PDFium's
// `FPDF_SYSFONTINFO` functions.
using FontId = void*;
SkiaFontMapper() { manager_ = skia::DefaultFontMgr(); }
~SkiaFontMapper() = delete;
// Returns a handle to the font mapped based on `desc`, for use
// as the `font_id` in GetFontData() and DeleteFont() below. Returns nullptr
// on failure.
FontId MapFont(int weight,
int italic,
int charset,
int pitch,
const char* face) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
TRACE_EVENT2("fonts", "PdfiumMapFont", "face", std::string(face), "charset",
charset);
auto typeface = MapTypeface(weight, italic, charset, pitch, face);
if (typeface) {
FontId id = reinterpret_cast<FontId>(typeface->uniqueID());
id_to_typeface_.try_emplace(id, std::move(typeface));
return id;
}
LOG(WARNING) << "Failed to lookup face `" << base::HexEncode(face)
<< "` for charset " << charset << ", weight " << weight;
return nullptr;
}
// Releases the font file that `font_id` points to. Note that skia's font
// manager might retain its own cached resources.
void DeleteFont(FontId font_id) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
id_to_typeface_.erase(font_id);
}
// Reads data from the `font_id` handle for `table` into a `buffer` of
// `buf_size`. Returns the amount of data read on success, or 0 on failure.
// If `buffer` is null, then just return the required size for the buffer.
// See content::GetFontTable() for information on the `table_tag` parameter.
unsigned long GetFontData(FontId font_id,
unsigned int table_tag,
unsigned char* buffer,
unsigned long buf_size) const {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
// This class creates font_id so it will always cast safely to SkTypefaceID.
auto stored_typeface = id_to_typeface_.find(font_id);
if (stored_typeface == id_to_typeface_.end()) {
return 0;
}
sk_sp<SkTypeface> typeface = stored_typeface->second;
// PDFium asks for 0 and 'ttcf' tags. These are not supported by DirectWrite
// backed Skia fonts so this adapter must do something sensible. Return the
// full font data for 0, and skip 'ttcf' allowing getTableSize(ttcf) to
// naturally fail.
if (table_tag == 0) {
std::unique_ptr<SkStreamAsset> stream = typeface->openStream(nullptr);
if (!buffer || buf_size < stream->getLength()) {
return stream->getLength();
}
return stream->read(buffer, buf_size);
}
if (!buffer) {
return typeface->getTableSize(table_tag);
}
return typeface->getTableData(table_tag, /*offset=*/0,
/*size=*/buf_size, buffer);
}
private:
// Lookup a typeface, taking various fallbacks if fonts are not available.
sk_sp<SkTypeface> MapTypeface(int weight,
int italic,
int charset,
int pitch,
const std::string& face) {
// Lookup via skia manager directly.
SkFontStyle style(weight, SkFontStyle::Width::kNormal_Width,
italic > 0 ? SkFontStyle::Slant::kItalic_Slant
: SkFontStyle::Slant::kUpright_Slant);
// Force name substitution for default PDF fonts.
std::string subst_face = GetSubstFont(face);
auto typeface = manager_->matchFamilyStyle(subst_face.c_str(), style);
if (typeface) {
return typeface;
}
// Try pdf->blink mappings, which does its own substitution.
std::optional<blink::WebFontDescription> desc =
PdfFontToBlinkFontMapping(weight, italic, charset, pitch, face.c_str());
if (desc) {
typeface = manager_->matchFamilyStyle(desc->family.Utf8().c_str(), style);
if (typeface) {
return typeface;
}
}
// Nothing was found (e.g. an optional Windows font is not installed),
// then try to map the name to a fallback.
auto fallback = GetFallbackFace(subst_face, charset, weight, italic);
if (fallback) {
typeface = manager_->matchFamilyStyle(fallback->c_str(), style);
if (typeface) {
return typeface;
}
}
// Finally, try some hacks that fix edge cases & mis-spellings.
return FinalFixups(subst_face, style);
}
bool HasFamily(const char* family) {
auto style_set = manager_->matchFamily(family);
bool has_family = style_set->count() > 0;
return has_family;
}
std::optional<std::string> GetShiftJISPreference(const std::string& face,
int weight,
int pitch_family) {
if (base::Contains(face, "Gothic") ||
base::Contains(face, "\x83\x53\x83\x56\x83\x62\x83\x4e")) {
if (base::Contains(face, "UI Gothic")) {
return "MS UI Gothic";
} else if (base::Contains(face, "PGothic") ||
base::Contains(face,
"\x82\x6f\x83\x53\x83\x56\x83\x62\x83\x4e") ||
base::Contains(face, "HGSGothicM") ||
base::Contains(face, "HGMaruGothicMPRO")) {
return "MS PGothic";
}
return "MS Gothic";
}
if (base::Contains(face, "Mincho") ||
base::Contains(face, "\x96\xbe\x92\xa9")) {
if (base::Contains(face, "PMincho") ||
base::Contains(face, "\x82\x6f\x96\xbe\x92\xa9")) {
return std::string(HasFamily("MS PMincho") ? "MS PMincho"
: "MS PGothic");
}
return std::string(HasFamily("MS Mincho") ? "MS Mincho" : "MS Gothic");
}
if (!(pitch_family & FXFONT_FF_ROMAN) && weight > 400) {
return "MS PGothic";
}
return "MS Gothic";
}
std::optional<std::string> GetGBPreference(const std::string& face,
int weight,
int pitch_family) {
// KaiTi and SimHei are Windows supplemental fonts so assume they were not
// found by skia.
if (base::Contains(face, "KaiTi") || base::Contains(face, "\xbf\xac")) {
return "SimSun";
} else if (base::Contains(face, "FangSong") ||
base::Contains(face, "\xb7\xc2\xcb\xce")) {
return "SimSun";
} else if (base::Contains(face, "SimSun") ||
base::Contains(face, "\xcb\xce")) {
return "SimSun";
} else if (base::Contains(face, "SimHei") ||
base::Contains(face, "\xba\xda")) {
return "SimHei";
} else if (!(pitch_family & FXFONT_FF_ROMAN) && weight > 550) {
return "SimHei";
}
return "SimSun";
}
std::optional<std::string> GetHangeulPreference(const std::string& face,
int weight,
int pitch_family) {
// Gulim is a supplemental font.
if (HasFamily("Gulim")) {
return "Gulim";
}
return "Malgun Gothic";
}
std::optional<std::string> GetFallbackFace(const std::string& face,
int charset,
int weight,
int pitch_family) {
switch (charset) {
case FXFONT_SHIFTJIS_CHARSET:
return GetShiftJISPreference(face, weight, pitch_family);
case FXFONT_GB2312_CHARSET:
return GetGBPreference(face, weight, pitch_family);
case FXFONT_HANGEUL_CHARSET:
return GetHangeulPreference(face, weight, pitch_family);
case FXFONT_CHINESEBIG5_CHARSET:
if (base::Contains(face, "MSung")) {
// Monospace.
return "Microsoft YaHei";
}
// Proportional.
return "Microsoft JHengHei";
default:
return std::nullopt;
}
}
// Put any last-gasp hacks into this method.
sk_sp<SkTypeface> FinalFixups(const std::string& face,
const SkFontStyle& style) {
// Some fonts are specified with weights that Skia can't provide.
// pdf.js/tests/issue5801.pdf specifies ArialBlack but a weight of 390.
// Commonly seen patterns: `ArialBlack` `Arial Black` & `Arial-Black`.
if (base::StartsWith(face, "Arial")) {
if (base::EndsWith(face, "Black")) {
SkFontStyle black = SkFontStyle(SkFontStyle::Weight::kBlack_Weight,
style.width(), style.slant());
return manager_->matchFamilyStyle("Arial", black);
}
if (base::EndsWith(face, "Narrow")) {
SkFontStyle narrow = SkFontStyle(SkFontStyle::Weight::kThin_Weight,
style.width(), style.slant());
return manager_->matchFamilyStyle("Arial", narrow);
}
}
// Some fonts are specified without spaces in their name e.g. `ComicSansMS`.
std::string with_spaces(face);
// s/{lower case letter}{uppercase letter}/l u/g.
if (re2::RE2::GlobalReplace(&with_spaces, "(\\p{Ll})(\\p{Lu})", "\\1 \\2") >
0) {
return manager_->matchFamilyStyle(with_spaces.c_str(), style);
}
return nullptr;
}
sk_sp<SkFontMgr> manager_;
base::flat_map<FontId, sk_sp<SkTypeface>> id_to_typeface_;
SEQUENCE_CHECKER(sequence_checker_);
};
SkiaFontMapper& GetSkiaFontMapper() {
static base::NoDestructor<SkiaFontMapper> mapper;
return *mapper;
}
// Note: `exact` is obsolete.
void* MapFont(FPDF_SYSFONTINFO*,
int weight,
int italic,
int charset,
int pitch,
const char* face,
int* exact) {
// Exit early if pdfium was specifically configured in kNoMapping mode.
if (PDFiumEngine::GetFontMappingMode() != FontMappingMode::kBlink) {
CHECK_EQ(PDFiumEngine::GetFontMappingMode(), FontMappingMode::kNoMapping);
return nullptr;
}
return GetSkiaFontMapper().MapFont(weight, italic, charset, pitch, face);
}
unsigned long GetFontData(FPDF_SYSFONTINFO*,
void* font_id,
unsigned int table,
unsigned char* buffer,
unsigned long buf_size) {
CHECK_EQ(PDFiumEngine::GetFontMappingMode(), FontMappingMode::kBlink);
return GetSkiaFontMapper().GetFontData(font_id, table, buffer, buf_size);
}
void DeleteFont(FPDF_SYSFONTINFO*, void* font_id) {
CHECK_EQ(PDFiumEngine::GetFontMappingMode(), FontMappingMode::kBlink);
GetSkiaFontMapper().DeleteFont(font_id);
}
FPDF_SYSFONTINFO g_font_info = {1, 0, 0, MapFont, 0,
GetFontData, 0, 0, DeleteFont};
} // namespace
void InitializeWindowsFontMapper() {
FPDF_SetSystemFontInfo(&g_font_info);
}
FPDF_SYSFONTINFO* GetSkiaFontMapperForTesting() {
return &g_font_info;
}
} // namespace chrome_pdf