cmsopt.c | Explore in Territory

//---------------------------------------------------------------------------------
//
//  Little Color Management System
//  Copyright (c) 1998-2023 Marti Maria Saguer
//
// Permission is hereby granted, free of charge, to any person obtaining
// a copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//
//---------------------------------------------------------------------------------
//

#include "lcms2_internal.h"


//----------------------------------------------------------------------------------

// Optimization for 8 bits, Shaper-CLUT (3 inputs only)
Prelin8Data;


// Generic optimization for 16 bits Shaper-CLUT-Shaper (any inputs)
Prelin16Data;


// Optimization for matrix-shaper in 8 bits. Numbers are operated in n.14 signed, tables are stored in 1.14 fixed

cmsS1Fixed14Number;   // Note that this may hold more than 16 bits!

#define DOUBLE_TO_1FIXED14(x) …

MatShaper8Data;

// Curves, optimization is shared between 8 and 16 bits
Curves16Data;

// A simple adapter to prevent _cmsPipelineEval16Fn vs. _cmsInterpFn16
// confusion, which trips up UBSAN.
static
void Lerp16Adapter(CMSREGISTER const cmsUInt16Number in[],
                   CMSREGISTER cmsUInt16Number out[],
                   const void* data) { … }

// Simple optimizations ----------------------------------------------------------------------------------------------------------


// Clamp a fixed point integer to signed 28 bits to avoid overflow in
// calculations.  Clamp is intended for use with colorants, requiring one bit
// for a colorant and another two bits to avoid overflow when combining the
// colors.
cmsINLINE cmsS1Fixed14Number _FixedClamp(cmsS1Fixed14Number n) { … }

// Perform one row of matrix multiply with translation for MatShaperEval16().
cmsINLINE cmsInt64Number _MatShaperEvaluateRow(cmsS1Fixed14Number* mat,
                                               cmsS1Fixed14Number off,
                                               cmsS1Fixed14Number r,
                                               cmsS1Fixed14Number g,
                                               cmsS1Fixed14Number b) { … }

// Remove an element in linked chain
static
void _RemoveElement(cmsStage** head)
{ … }

// Remove all identities in chain. Note that pt actually is a double pointer to the element that holds the pointer.
static
cmsBool _Remove1Op(cmsPipeline* Lut, cmsStageSignature UnaryOp)
{ … }

// Same, but only if two adjacent elements are found
static
cmsBool _Remove2Op(cmsPipeline* Lut, cmsStageSignature Op1, cmsStageSignature Op2)
{ … }


static
cmsBool CloseEnoughFloat(cmsFloat64Number a, cmsFloat64Number b)
{ … }

static
cmsBool  isFloatMatrixIdentity(const cmsMAT3* a)
{ … }
// if two adjacent matrices are found, multiply them. 
static
cmsBool _MultiplyMatrix(cmsPipeline* Lut)
{ … }


// Preoptimize just gets rif of no-ops coming paired. Conversion from v2 to v4 followed
// by a v4 to v2 and vice-versa. The elements are then discarded.
static
cmsBool PreOptimize(cmsPipeline* Lut)
{ … }

static
void Eval16nop1D(CMSREGISTER const cmsUInt16Number Input[],
                 CMSREGISTER cmsUInt16Number Output[],
                 CMSREGISTER const struct _cms_interp_struc* p)
{ … }

static
void PrelinEval16(CMSREGISTER const cmsUInt16Number Input[],
                  CMSREGISTER cmsUInt16Number Output[],
                  CMSREGISTER const void* D)
{ … }


static
void PrelinOpt16free(cmsContext ContextID, void* ptr)
{ … }

static
void* Prelin16dup(cmsContext ContextID, const void* ptr)
{ … }


static
Prelin16Data* PrelinOpt16alloc(cmsContext ContextID,
                               const cmsInterpParams* ColorMap,
                               cmsUInt32Number nInputs, cmsToneCurve** In,
                               cmsUInt32Number nOutputs, cmsToneCurve** Out )
{ … }



// Resampling ---------------------------------------------------------------------------------

#define PRELINEARIZATION_POINTS …

// Sampler implemented by another LUT. This is a clean way to precalculate the devicelink 3D CLUT for
// almost any transform. We use floating point precision and then convert from floating point to 16 bits.
static
cmsInt32Number XFormSampler16(CMSREGISTER const cmsUInt16Number In[], 
                              CMSREGISTER cmsUInt16Number Out[], 
                              CMSREGISTER void* Cargo)
{ … }

// Try to see if the curves of a given MPE are linear
static
cmsBool AllCurvesAreLinear(cmsStage* mpe)
{ … }

// This function replaces a specific node placed in "At" by the "Value" numbers. Its purpose
// is to fix scum dot on broken profiles/transforms. Works on 1, 3 and 4 channels
static
cmsBool  PatchLUT(cmsStage* CLUT, cmsUInt16Number At[], cmsUInt16Number Value[],
                  cmsUInt32Number nChannelsOut, cmsUInt32Number nChannelsIn)
{ … }

// Auxiliary, to see if two values are equal or very different
static
cmsBool WhitesAreEqual(cmsUInt32Number n, cmsUInt16Number White1[], cmsUInt16Number White2[] )
{ … }


// Locate the node for the white point and fix it to pure white in order to avoid scum dot.
static
cmsBool FixWhiteMisalignment(cmsPipeline* Lut, cmsColorSpaceSignature EntryColorSpace, cmsColorSpaceSignature ExitColorSpace)
{ … }

// -----------------------------------------------------------------------------------------------------------------------------------------------
// This function creates simple LUT from complex ones. The generated LUT has an optional set of
// prelinearization curves, a CLUT of nGridPoints and optional postlinearization tables.
// These curves have to exist in the original LUT in order to be used in the simplified output.
// Caller may also use the flags to allow this feature.
// LUTS with all curves will be simplified to a single curve. Parametric curves are lost.
// This function should be used on 16-bits LUTS only, as floating point losses precision when simplified
// -----------------------------------------------------------------------------------------------------------------------------------------------

static
cmsBool OptimizeByResampling(cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
{ … }


// -----------------------------------------------------------------------------------------------------------------------------------------------
// Fixes the gamma balancing of transform. This is described in my paper "Prelinearization Stages on
// Color-Management Application-Specific Integrated Circuits (ASICs)" presented at NIP24. It only works
// for RGB transforms. See the paper for more details
// -----------------------------------------------------------------------------------------------------------------------------------------------


// Normalize endpoints by slope limiting max and min. This assures endpoints as well.
// Descending curves are handled as well.
static
void SlopeLimiting(cmsToneCurve* g)
{ … }


// Precomputes tables for 8-bit on input devicelink.
static
Prelin8Data* PrelinOpt8alloc(cmsContext ContextID, const cmsInterpParams* p, cmsToneCurve* G[3])
{ … }

static
void Prelin8free(cmsContext ContextID, void* ptr)
{ … }

static
void* Prelin8dup(cmsContext ContextID, const void* ptr)
{ … }



// A optimized interpolation for 8-bit input.
#define DENS …
static CMS_NO_SANITIZE
void PrelinEval8(CMSREGISTER const cmsUInt16Number Input[],
                 CMSREGISTER cmsUInt16Number Output[],
                 CMSREGISTER const void* D)
{ … }

#undef DENS


// Curves that contain wide empty areas are not optimizeable
static
cmsBool IsDegenerated(const cmsToneCurve* g)
{ … }

// --------------------------------------------------------------------------------------------------------------
// We need xput over here

static
cmsBool OptimizeByComputingLinearization(cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
{ … }


// Curves optimizer ------------------------------------------------------------------------------------------------------------------

static
void CurvesFree(cmsContext ContextID, void* ptr)
{ … }

static
void* CurvesDup(cmsContext ContextID, const void* ptr)
{ … }

// Precomputes tables for 8-bit on input devicelink.
static
Curves16Data* CurvesAlloc(cmsContext ContextID, cmsUInt32Number nCurves, cmsUInt32Number nElements, cmsToneCurve** G)
{ … }

static
void FastEvaluateCurves8(CMSREGISTER const cmsUInt16Number In[],
                         CMSREGISTER cmsUInt16Number Out[],
                         CMSREGISTER const void* D)
{ … }


static
void FastEvaluateCurves16(CMSREGISTER const cmsUInt16Number In[],
                          CMSREGISTER cmsUInt16Number Out[],
                          CMSREGISTER const void* D)
{ … }


static
void FastIdentity16(CMSREGISTER const cmsUInt16Number In[],
                    CMSREGISTER cmsUInt16Number Out[],
                    CMSREGISTER const void* D)
{ … }


// If the target LUT holds only curves, the optimization procedure is to join all those
// curves together. That only works on curves and does not work on matrices.
static
cmsBool OptimizeByJoiningCurves(cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
{ … }

// -------------------------------------------------------------------------------------------------------------------------------------
// LUT is Shaper - Matrix - Matrix - Shaper, which is very frequent when combining two matrix-shaper profiles


static
void  FreeMatShaper(cmsContext ContextID, void* Data)
{ … }

static
void* DupMatShaper(cmsContext ContextID, const void* Data)
{ … }


// A fast matrix-shaper evaluator for 8 bits. This is a bit tricky since I'm using 1.14 signed fixed point
// to accomplish some performance. Actually it takes 256x3 16 bits tables and 16385 x 3 tables of 8 bits,
// in total about 50K, and the performance boost is huge!
static CMS_NO_SANITIZE
void MatShaperEval16(CMSREGISTER const cmsUInt16Number In[],
                     CMSREGISTER cmsUInt16Number Out[],
                     CMSREGISTER const void* D)
{ … }

// This table converts from 8 bits to 1.14 after applying the curve
static
void FillFirstShaper(cmsS1Fixed14Number* Table, cmsToneCurve* Curve)
{ … }

// This table converts form 1.14 (being 0x4000 the last entry) to 8 bits after applying the curve
static
void FillSecondShaper(cmsUInt16Number* Table, cmsToneCurve* Curve, cmsBool Is8BitsOutput)
{ … }

// Compute the matrix-shaper structure
static
cmsBool SetMatShaper(cmsPipeline* Dest, cmsToneCurve* Curve1[3], cmsMAT3* Mat, cmsVEC3* Off, cmsToneCurve* Curve2[3], cmsUInt32Number* OutputFormat)
{ … }

//  8 bits on input allows matrix-shaper boot up to 25 Mpixels per second on RGB. That's fast!
static
cmsBool OptimizeMatrixShaper(cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
{ … }


// -------------------------------------------------------------------------------------------------------------------------------------
// Optimization plug-ins

// List of optimizations
_cmsOptimizationCollection;


// The built-in list. We currently implement 4 types of optimizations. Joining of curves, matrix-shaper, linearization and resampling
static _cmsOptimizationCollection DefaultOptimization[] = …;

// The linked list head
_cmsOptimizationPluginChunkType _cmsOptimizationPluginChunk = …;


// Duplicates the zone of memory used by the plug-in in the new context
static
void DupPluginOptimizationList(struct _cmsContext_struct* ctx, 
                               const struct _cmsContext_struct* src)
{ … }

void  _cmsAllocOptimizationPluginChunk(struct _cmsContext_struct* ctx, 
                                         const struct _cmsContext_struct* src)
{ … }


// Register new ways to optimize
cmsBool  _cmsRegisterOptimizationPlugin(cmsContext ContextID, cmsPluginBase* Data)
{ … }

// The entry point for LUT optimization
cmsBool CMSEXPORT _cmsOptimizePipeline(cmsContext ContextID,
                             cmsPipeline**    PtrLut,
                             cmsUInt32Number  Intent,
                             cmsUInt32Number* InputFormat,
                             cmsUInt32Number* OutputFormat,
                             cmsUInt32Number* dwFlags)
{ … }
chromium/third_party/pdfium/third_party/lcms/src/cmsopt.c