chromium/pdf/pdf.h

// Copyright 2010 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef PDF_PDF_H_
#define PDF_PDF_H_

#include <optional>
#include <vector>

#include "base/containers/span.h"
#include "base/values.h"
#include "build/build_config.h"
#include "pdf/document_metadata.h"
#include "services/screen_ai/buildflags/buildflags.h"

#if BUILDFLAG(IS_CHROMEOS)
#include "pdf/flatten_pdf_result.h"
#endif

#if BUILDFLAG(IS_WIN)
#include <windows.h>
#endif

#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
#include <memory>

#include "base/functional/callback_forward.h"
#include "services/screen_ai/public/mojom/screen_ai_service.mojom.h"
#include "third_party/skia/include/core/SkBitmap.h"
#endif  // BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)

namespace gfx {
class Rect;
class Size;
class SizeF;
}  // namespace gfx

namespace chrome_pdf {

#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
class PdfProgressiveSearchifier;
#endif  // BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)

void SetUseSkiaRendererPolicy(bool use_skia);

#if BUILDFLAG(IS_CHROMEOS)
// Create a flattened PDF document from an existing PDF document.
// `input_buffer` is the buffer that contains the entire PDF document to be
// flattened.
std::optional<FlattenPdfResult> CreateFlattenedPdf(
    base::span<const uint8_t> input_buffer);
#endif  // BUILDFLAG(IS_CHROMEOS)

#if BUILDFLAG(IS_WIN)
// Printing modes - type to convert PDF to for printing. See PDFium's
// FPDF_SetPrintMode() for details.
enum PrintingMode {
  kEmf = 0,
  kTextOnly = 1,
  kPostScript2 = 2,
  kPostScript3 = 3,
  // Values 4 and 5 are similar to `kPostScript2` and `kPostScript3`, but are
  // not intended for use in sandboxed environments like Chromium's.
  kEmfWithReducedRasterization = 6,
  kPostScript3WithType42Fonts = 7,
  // Value 8 is similar to `kPostScript3WithType42Fonts`, but is not intended
  // for use in sandboxed environments like Chromium's.
};

// `pdf_buffer` is the buffer that contains the entire PDF document to be
//     rendered.
// `page_index` is the 0-based index of the page to be rendered.
// `dc` is the device context to render into.
// `dpi_x` and `dpi_y` is the resolution.
// `bounds_origin_x`, `bounds_origin_y`, `bounds_width` and `bounds_height`
//     specify a bounds rectangle within the DC in which to render the PDF
//     page.
// `fit_to_bounds` specifies whether the output should be shrunk to fit the
//     supplied bounds if the page size is larger than the bounds in any
//     dimension. If this is false, parts of the PDF page that lie outside
//     the bounds will be clipped.
// `stretch_to_bounds` specifies whether the output should be stretched to fit
//     the supplied bounds if the page size is smaller than the bounds in any
//     dimension.
// If both `fit_to_bounds` and `stretch_to_bounds` are true, then
//     `fit_to_bounds` is honored first.
// `keep_aspect_ratio` If any scaling is to be done is true, this flag
//     specifies whether the original aspect ratio of the page should be
//     preserved while scaling.
// `center_in_bounds` specifies whether the final image (after any scaling is
//     done) should be centered within the given bounds.
// `autorotate` specifies whether the final image should be rotated to match
//     the output bound.
// `use_color` specifies color or grayscale.
// Returns false if the document or the page number are not valid.
bool RenderPDFPageToDC(base::span<const uint8_t> pdf_buffer,
                       int page_index,
                       HDC dc,
                       int dpi_x,
                       int dpi_y,
                       int bounds_origin_x,
                       int bounds_origin_y,
                       int bounds_width,
                       int bounds_height,
                       bool fit_to_bounds,
                       bool stretch_to_bounds,
                       bool keep_aspect_ratio,
                       bool center_in_bounds,
                       bool autorotate,
                       bool use_color);

void SetPDFUsePrintMode(int mode);
#endif  // BUILDFLAG(IS_WIN)

// `page_count` and `max_page_width` are optional and can be NULL.
// Returns false if the document is not valid.
bool GetPDFDocInfo(base::span<const uint8_t> pdf_buffer,
                   int* page_count,
                   float* max_page_width);

// Gets the PDF document metadata (see section 14.3.3 "Document Information
// Dictionary" of the ISO 32000-1:2008 spec).
std::optional<DocumentMetadata> GetPDFDocMetadata(
    base::span<const uint8_t> pdf_buffer);

// Whether the PDF is Tagged (see ISO 32000-1:2008 14.8 "Tagged PDF").
// Returns true if it's a tagged (accessible) PDF, false if it's a valid
// PDF but untagged, and nullopt if the PDF can't be parsed.
std::optional<bool> IsPDFDocTagged(base::span<const uint8_t> pdf_buffer);

// Given a tagged PDF (see IsPDFDocTagged, above), return the portion of
// the structure tree for a given page as a hierarchical tree of base::Values.
base::Value GetPDFStructTreeForPage(base::span<const uint8_t> pdf_buffer,
                                    int page_index);

// Whether the PDF has a Document Outline (see ISO 32000-1:2008 12.3.3 "Document
// Outline"). Returns true if the PDF has an outline, false if it's a valid PDF
// without an outline, and nullopt if the PDF can't be parsed.
std::optional<bool> PDFDocHasOutline(base::span<const uint8_t> pdf_buffer);

// Gets the dimensions of a specific page in a document.
// `pdf_buffer` is the buffer that contains the entire PDF document to be
//     rendered.
// `page_index` is the page number that the function will get the dimensions of.
// Returns the size of the page in points, or nullopt if the document or the
// page number are not valid.
std::optional<gfx::SizeF> GetPDFPageSizeByIndex(
    base::span<const uint8_t> pdf_buffer,
    int page_index);

enum class RenderDeviceType {};

struct RenderOptions {};

// Renders PDF page into 4-byte per pixel BGRA color bitmap.
// `pdf_buffer` is the buffer that contains the entire PDF document to be
//     rendered.
// `page_index` is the 0-based index of the page to be rendered.
// `bitmap_buffer` is the output buffer for bitmap.
// `bitmap_size` is the size of the output bitmap.
// `dpi` is the 2D resolution.
// `options` is the options to render with.
// Returns false if the document or the page number are not valid.
bool RenderPDFPageToBitmap(base::span<const uint8_t> pdf_buffer,
                           int page_index,
                           void* bitmap_buffer,
                           const gfx::Size& bitmap_size,
                           const gfx::Size& dpi,
                           const RenderOptions& options);

// Convert multiple PDF pages into a N-up PDF.
// `input_buffers` is the vector of buffers with each buffer contains a PDF.
//     If any of the PDFs contains multiple pages, only the first page of the
//     document is used.
// `pages_per_sheet` is the number of pages to put on one sheet.
// `page_size` is the output page size, measured in PDF "user space" units.
// `printable_area` is the output page printable area, measured in PDF
//     "user space" units.  Should be smaller than `page_size`.
//
// `page_size` is the print media size.  The page size of the output N-up PDF is
// determined by the `pages_per_sheet`, the orientation of the PDF pages
// contained in the `input_buffers`, and the media page size `page_size`. For
// example, when `page_size` = 512x792, `pages_per_sheet` = 2, and the
// orientation of `input_buffers` = portrait, the output N-up PDF will be
// 792x512.
// See printing::NupParameters for more details on how the output page
// orientation is determined, to understand why `page_size` may be swapped in
// some cases.
std::vector<uint8_t> ConvertPdfPagesToNupPdf(
    std::vector<base::span<const uint8_t>> input_buffers,
    size_t pages_per_sheet,
    const gfx::Size& page_size,
    const gfx::Rect& printable_area);

// Convert a PDF document to a N-up PDF document.
// `input_buffer` is the buffer that contains the entire PDF document to be
//     converted to a N-up PDF document.
// `pages_per_sheet` is the number of pages to put on one sheet.
// `page_size` is the output page size, measured in PDF "user space" units.
// `printable_area` is the output page printable area, measured in PDF
//     "user space" units.  Should be smaller than `page_size`.
//
// Refer to the description of ConvertPdfPagesToNupPdf to understand how the
// output page size will be calculated.
// The algorithm used to determine the output page size is the same.
std::vector<uint8_t> ConvertPdfDocumentToNupPdf(
    base::span<const uint8_t> input_buffer,
    size_t pages_per_sheet,
    const gfx::Size& page_size,
    const gfx::Rect& printable_area);

#if BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)
// Converts an inaccessible PDF to a searchable PDF.
// `pdf_buffer` is the buffer of the inaccessible PDF.
// `perform_ocr_callback` is the callback that takes an image and outputs
//     the OCR result. It may be called multiple times.
//
// The conversion is done by performing OCR on each image in the PDF and adding
// a layer of invisible text to the PDF to make text on images accessible. Each
// execution should take place in an isolated process, and each process should
// be terminated upon completion of the conversion. An empty vector is returned
// on failure.
std::vector<uint8_t> Searchify(
    base::span<const uint8_t> pdf_buffer,
    base::RepeatingCallback<screen_ai::mojom::VisualAnnotationPtr(
        const SkBitmap& bitmap)> perform_ocr_callback);

// Creates a PDF searchifier for future operations, such as adding and deleting
// pages, and saving PDFs. Crashes if failed to create.
std::unique_ptr<PdfProgressiveSearchifier> CreateProgressiveSearchifier();
#endif  // BUILDFLAG(ENABLE_SCREEN_AI_SERVICE)

}  // namespace chrome_pdf

#endif  // PDF_PDF_H_