color_gamma.c | Explore in Territory

/*
 * Copyright 2016 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 * Authors: AMD
 *
 */

#include "dc.h"
#include "opp.h"
#include "color_gamma.h"

/* When calculating LUT values the first region and at least one subsequent
 * region are calculated with full precision. These defines are a demarcation
 * of where the second region starts and ends.
 * These are hardcoded values to avoid recalculating them in loops.
 */
#define PRECISE_LUT_REGION_START …
#define PRECISE_LUT_REGION_END …

static struct hw_x_point coordinates_x[MAX_HW_POINTS + 2];

// Hardcoded table that depends on setup_x_points_distribution and sdr_level=80
// If x points are changed, then PQ Y points will be misaligned and a new
// table would need to be generated. Or use old method that calls compute_pq.
// The last point is above PQ formula range (0-125 in normalized FP16)
// The value for the last point (128) is such that interpolation from
// 120 to 128 will give 1.0 for X = 125.0
// first couple points are 0 - HW LUT is mirrored around zero, so making first
// segment 0 to 0 will effectively clip it, and these are very low PQ codes
// min nonzero value below (216825) is a little under 12-bit PQ code 1.
static const unsigned long long pq_divider = …;
static const unsigned long long pq_numerator[MAX_HW_POINTS + 1] = …;

// these are helpers for calculations to reduce stack usage
// do not depend on these being preserved across calls

/* Helper to optimize gamma calculation, only use in translate_from_linear, in
 * particular the dc_fixpt_pow function which is very expensive
 * The idea is that our regions for X points are exponential and currently they all use
 * the same number of points (NUM_PTS_IN_REGION) and in each region every point
 * is exactly 2x the one at the same index in the previous region. In other words
 * X[i] = 2 * X[i-NUM_PTS_IN_REGION] for i>=16
 * The other fact is that (2x)^gamma = 2^gamma * x^gamma
 * So we compute and save x^gamma for the first 16 regions, and for every next region
 * just multiply with 2^gamma which can be computed once, and save the result so we
 * recursively compute all the values.
 */

/*
 * Regamma coefficients are used for both regamma and degamma. Degamma
 * coefficients are calculated in our formula using the regamma coefficients.
 */
									 /*sRGB     709     2.2 2.4 P3*/
static const int32_t numerator01[] = …;
static const int32_t numerator02[] = …;
static const int32_t numerator03[] = …;
static const int32_t numerator04[] = …;
static const int32_t numerator05[] = …;

/* one-time setup of X points */
void setup_x_points_distribution(void)
{ … }

void log_x_points_distribution(struct dal_logger *logger)
{ … }

static void compute_pq(struct fixed31_32 in_x, struct fixed31_32 *out_y)
{ … }

static void compute_de_pq(struct fixed31_32 in_x, struct fixed31_32 *out_y)
{ … }


/* de gamma, non-linear to linear */
static void compute_hlg_eotf(struct fixed31_32 in_x,
		struct fixed31_32 *out_y,
		uint32_t sdr_white_level, uint32_t max_luminance_nits)
{ … }

/* re gamma, linear to non-linear */
static void compute_hlg_oetf(struct fixed31_32 in_x, struct fixed31_32 *out_y,
		uint32_t sdr_white_level, uint32_t max_luminance_nits)
{ … }


/* one-time pre-compute PQ values - only for sdr_white_level 80 */
void precompute_pq(void)
{ … }

/* one-time pre-compute dePQ values - only for max pixel value 125 FP16 */
void precompute_de_pq(void)
{ … }
struct dividers { … };


static bool build_coefficients(struct gamma_coefficients *coefficients,
		enum dc_transfer_func_predefined type)
{ … }

static struct fixed31_32 translate_from_linear_space(
		struct translate_from_linear_space_args *args)
{ … }


static struct fixed31_32 translate_from_linear_space_long(
		struct translate_from_linear_space_args *args)
{ … }

static struct fixed31_32 calculate_gamma22(struct fixed31_32 arg, bool use_eetf, struct calculate_buffer *cal_buffer)
{ … }


static struct fixed31_32 translate_to_linear_space(
	struct fixed31_32 arg,
	struct fixed31_32 a0,
	struct fixed31_32 a1,
	struct fixed31_32 a2,
	struct fixed31_32 a3,
	struct fixed31_32 gamma)
{ … }

static struct fixed31_32 translate_from_linear_space_ex(
	struct fixed31_32 arg,
	struct gamma_coefficients *coeff,
	uint32_t color_index,
	struct calculate_buffer *cal_buffer)
{ … }


static inline struct fixed31_32 translate_to_linear_space_ex(
	struct fixed31_32 arg,
	struct gamma_coefficients *coeff,
	uint32_t color_index)
{ … }


static bool find_software_points(
	const struct dc_gamma *ramp,
	const struct gamma_pixel *axis_x,
	struct fixed31_32 hw_point,
	enum channel_name channel,
	uint32_t *index_to_start,
	uint32_t *index_left,
	uint32_t *index_right,
	enum hw_point_position *pos)
{ … }

static bool build_custom_gamma_mapping_coefficients_worker(
	const struct dc_gamma *ramp,
	struct pixel_gamma_point *coeff,
	const struct hw_x_point *coordinates_x,
	const struct gamma_pixel *axis_x,
	enum channel_name channel,
	uint32_t number_of_points)
{ … }

static struct fixed31_32 calculate_mapped_value(
	struct pwl_float_data *rgb,
	const struct pixel_gamma_point *coeff,
	enum channel_name channel,
	uint32_t max_index)
{ … }

static void build_pq(struct pwl_float_data_ex *rgb_regamma,
		uint32_t hw_points_num,
		const struct hw_x_point *coordinate_x,
		uint32_t sdr_white_level)
{ … }

static void build_de_pq(struct pwl_float_data_ex *de_pq,
		uint32_t hw_points_num,
		const struct hw_x_point *coordinate_x)
{ … }

static bool build_regamma(struct pwl_float_data_ex *rgb_regamma,
		uint32_t hw_points_num,
		const struct hw_x_point *coordinate_x,
		enum dc_transfer_func_predefined type,
		struct calculate_buffer *cal_buffer)
{ … }

static void hermite_spline_eetf(struct fixed31_32 input_x,
				struct fixed31_32 max_display,
				struct fixed31_32 min_display,
				struct fixed31_32 max_content,
				struct fixed31_32 *out_x)
{ … }

static bool build_freesync_hdr(struct pwl_float_data_ex *rgb_regamma,
		uint32_t hw_points_num,
		const struct hw_x_point *coordinate_x,
		const struct hdr_tm_params *fs_params,
		struct calculate_buffer *cal_buffer)
{ … }

static bool build_degamma(struct pwl_float_data_ex *curve,
		uint32_t hw_points_num,
		const struct hw_x_point *coordinate_x, enum dc_transfer_func_predefined type)
{ … }





static void build_hlg_degamma(struct pwl_float_data_ex *degamma,
		uint32_t hw_points_num,
		const struct hw_x_point *coordinate_x,
		uint32_t sdr_white_level, uint32_t max_luminance_nits)
{ … }


static void build_hlg_regamma(struct pwl_float_data_ex *regamma,
		uint32_t hw_points_num,
		const struct hw_x_point *coordinate_x,
		uint32_t sdr_white_level, uint32_t max_luminance_nits)
{ … }

static void scale_gamma(struct pwl_float_data *pwl_rgb,
		const struct dc_gamma *ramp,
		struct dividers dividers)
{ … }

static void scale_gamma_dx(struct pwl_float_data *pwl_rgb,
		const struct dc_gamma *ramp,
		struct dividers dividers)
{ … }

/* todo: all these scale_gamma functions are inherently the same but
 *  take different structures as params or different format for ramp
 *  values. We could probably implement it in a more generic fashion
 */
static void scale_user_regamma_ramp(struct pwl_float_data *pwl_rgb,
		const struct regamma_ramp *ramp,
		struct dividers dividers)
{ … }

/*
 * RS3+ color transform DDI - 1D LUT adjustment is composed with regamma here
 * Input is evenly distributed in the output color space as specified in
 * SetTimings
 *
 * Interpolation details:
 * 1D LUT has 4096 values which give curve correction in 0-1 float range
 * for evenly spaced points in 0-1 range. lut1D[index] gives correction
 * for index/4095.
 * First we find index for which:
 *	index/4095 < regamma_y < (index+1)/4095 =>
 *	index < 4095*regamma_y < index + 1
 * norm_y = 4095*regamma_y, and index is just truncating to nearest integer
 * lut1 = lut1D[index], lut2 = lut1D[index+1]
 *
 * adjustedY is then linearly interpolating regamma Y between lut1 and lut2
 *
 * Custom degamma on Linux uses the same interpolation math, so is handled here
 */
static void apply_lut_1d(
		const struct dc_gamma *ramp,
		uint32_t num_hw_points,
		struct dc_transfer_func_distributed_points *tf_pts)
{ … }

static void build_evenly_distributed_points(
	struct gamma_pixel *points,
	uint32_t numberof_points,
	struct dividers dividers)
{ … }

static inline void copy_rgb_regamma_to_coordinates_x(
		struct hw_x_point *coordinates_x,
		uint32_t hw_points_num,
		const struct pwl_float_data_ex *rgb_ex)
{ … }

static bool calculate_interpolated_hardware_curve(
	const struct dc_gamma *ramp,
	struct pixel_gamma_point *coeff128,
	struct pwl_float_data *rgb_user,
	const struct hw_x_point *coordinates_x,
	const struct gamma_pixel *axis_x,
	uint32_t number_of_points,
	struct dc_transfer_func_distributed_points *tf_pts)
{ … }

/* The "old" interpolation uses a complicated scheme to build an array of
 * coefficients while also using an array of 0-255 normalized to 0-1
 * Then there's another loop using both of the above + new scaled user ramp
 * and we concatenate them. It also searches for points of interpolation and
 * uses enums for positions.
 *
 * This function uses a different approach:
 * user ramp is always applied on X with 0/255, 1/255, 2/255, ..., 255/255
 * To find index for hwX , we notice the following:
 * i/255 <= hwX < (i+1)/255  <=> i <= 255*hwX < i+1
 * See apply_lut_1d which is the same principle, but on 4K entry 1D LUT
 *
 * Once the index is known, combined Y is simply:
 * user_ramp(index) + (hwX-index/255)*(user_ramp(index+1) - user_ramp(index)
 *
 * We should switch to this method in all cases, it's simpler and faster
 * ToDo one day - for now this only applies to ADL regamma to avoid regression
 * for regular use cases (sRGB and PQ)
 */
static void interpolate_user_regamma(uint32_t hw_points_num,
		struct pwl_float_data *rgb_user,
		bool apply_degamma,
		struct dc_transfer_func_distributed_points *tf_pts)
{ … }

static void build_new_custom_resulted_curve(
	uint32_t hw_points_num,
	struct dc_transfer_func_distributed_points *tf_pts)
{ … }

static void apply_degamma_for_user_regamma(struct pwl_float_data_ex *rgb_regamma,
		uint32_t hw_points_num, struct calculate_buffer *cal_buffer)
{ … }

static bool map_regamma_hw_to_x_user(
	const struct dc_gamma *ramp,
	struct pixel_gamma_point *coeff128,
	struct pwl_float_data *rgb_user,
	struct hw_x_point *coords_x,
	const struct gamma_pixel *axis_x,
	const struct pwl_float_data_ex *rgb_regamma,
	uint32_t hw_points_num,
	struct dc_transfer_func_distributed_points *tf_pts,
	bool map_user_ramp,
	bool do_clamping)
{ … }

#define _EXTRA_POINTS …

bool calculate_user_regamma_coeff(struct dc_transfer_func *output_tf,
		const struct regamma_lut *regamma,
		struct calculate_buffer *cal_buffer,
		const struct dc_gamma *ramp)
{ … }

bool calculate_user_regamma_ramp(struct dc_transfer_func *output_tf,
		const struct regamma_lut *regamma,
		struct calculate_buffer *cal_buffer,
		const struct dc_gamma *ramp)
{ … }

bool mod_color_calculate_degamma_params(struct dc_color_caps *dc_caps,
		struct dc_transfer_func *input_tf,
		const struct dc_gamma *ramp, bool map_user_ramp)
{ … }

static bool calculate_curve(enum dc_transfer_func_predefined trans,
				struct dc_transfer_func_distributed_points *points,
				struct pwl_float_data_ex *rgb_regamma,
				const struct hdr_tm_params *fs_params,
				uint32_t sdr_ref_white_level,
				struct calculate_buffer *cal_buffer)
{ … }

bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf,
					const struct dc_gamma *ramp,
					bool map_user_ramp,
					bool can_rom_be_used,
					const struct hdr_tm_params *fs_params,
					struct calculate_buffer *cal_buffer)
{ … }
linux/drivers/gpu/drm/amd/display/modules/color/color_gamma.c